diff --git a/.clang-format b/.clang-format index 04f2bbaf85b2c..a4de8e7be8e07 100644 --- a/.clang-format +++ b/.clang-format @@ -6,11 +6,11 @@ # The basic usage is, # clang-format -i -style=file PATH/TO/SOURCE/CODE # -# The -style=file implicit use ".clang-format" file located in one of -# parent directory. +# The -style=file implicit use ".clang-format" file located in one of +# parent directory. # The -i means inplace change. # -# The document of clang-format is +# The document of clang-format is # http://clang.llvm.org/docs/ClangFormat.html # http://clang.llvm.org/docs/ClangFormatStyleOptions.html --- @@ -20,7 +20,7 @@ IndentWidth: 2 TabWidth: 2 ContinuationIndentWidth: 4 AccessModifierOffset: -1 # The private/protected/public has no indent in class -Standard: Cpp11 +Standard: Cpp11 AllowAllParametersOfDeclarationOnNextLine: true BinPackParameters: false BinPackArguments: false diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1fcb3dc4f521d..7b62f131b9587 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -53,7 +53,6 @@ python/paddle/base/compiler.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84 python/paddle/base/dygraph/layers.py @JiabinYang @phlrain python/paddle/base/framework.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84 python/paddle/base/__init__.py @phlrain @Aurelius84 @qili93 -python/paddle/base/parallel_executor.py @Xreki @zhhsplendid @Aurelius84 python/paddle/base/tests/unittests/white_list/check_op_sequence_batch_1_input_white_list.py @Aurelius84 @phlrain python/paddle/base/tests/unittests/white_list/check_op_sequence_instance_0_input_white_list.py @Aurelius84 @phlrain python/paddle/base/tests/unittests/white_list/check_shape_white_list.py @hong19860320 @Aurelius84 @phlrain diff --git a/.gitignore b/.gitignore index 12abbf0f03caa..667ca443fe77e 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,7 @@ paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp/* paddle/fluid/pybind/static_op_function.* paddle/fluid/pybind/ops_api.cc python/paddle/tensor/tensor.pyi +paddle/phi/kernels/fusion/cutlass/conv2d/build +paddle/phi/kernels/fusion/cutlass/conv2d/cutlass +paddle/phi/kernels/fusion/cutlass/gemm_epilogue/build +paddle/phi/kernels/fusion/cutlass/gemm_epilogue/cutlass diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ea3a927cf7bc..9e56241f69176 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,6 @@ repos: - id: sort-simple-yaml files: (ops|backward|op_[a-z_]+)\.yaml$ - id: trailing-whitespace - files: (.*\.(py|bzl|md|rst|c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps|cmake|yaml|yml|hook)|BUILD|.*\.BUILD|WORKSPACE|CMakeLists\.txt)$ - repo: https://github.com/Lucas-C/pre-commit-hooks.git rev: v1.5.1 hooks: @@ -55,7 +54,6 @@ repos: rev: 23.3.0 hooks: - id: black - files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.5 hooks: diff --git a/CMakeLists.txt b/CMakeLists.txt index 0aa41a26d700e..f0b2fa79d362a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,9 @@ if(WITH_GPU AND WITH_ROCM) endif() if(WITH_GPU AND NOT APPLE) + if(WITH_PIP_CUDA_LIBRARIES AND CMAKE_SYSTEM_NAME STREQUAL "Windows") + add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES) + endif() #(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") @@ -107,8 +110,8 @@ if(WITH_GPU AND NOT APPLE) CACHE BOOL "" FORCE) set(CMAKE_CUDA_FLAGS "--cudart shared") if(WITH_PIP_CUDA_LIBRARIES) - #(Note risemeup1): Flag 'WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA. - add_definitions(-DWITH_PIP_CUDA_LIBRARIES) + #(Note risemeup1): Flag 'PADDLE_WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA. + add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES) endif() endif() enable_language(CUDA) diff --git a/cmake/PaddleConfig.cmake.in b/cmake/PaddleConfig.cmake.in index d32c23f6f6edd..e55038bb77c63 100644 --- a/cmake/PaddleConfig.cmake.in +++ b/cmake/PaddleConfig.cmake.in @@ -12,7 +12,7 @@ get_filename_component(PADDLE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}/../.." ABSOLUTE) # include directories -set(PADDLE_INCLUDE_DIRS +set(PADDLE_INCLUDE_DIRS ${PADDLE_INSTALL_PREFIX}/include ${PADDLE_INSTALL_PREFIX}/include/third_party ) diff --git a/cmake/cinn/external/absl.cmake b/cmake/cinn/external/absl.cmake index 8d9e0e45b45ba..46859d7caa871 100644 --- a/cmake/cinn/external/absl.cmake +++ b/cmake/cinn/external/absl.cmake @@ -63,6 +63,10 @@ set(ABSL_LIB_NAMES raw_hash_set) set(ABSL_LIBS "") +if(WITH_ROCM) + list(APPEND ABSL_LIB_NAMES strings_internal raw_logging_internal) +endif() + add_library(absl STATIC IMPORTED GLOBAL) set_property(TARGET absl PROPERTY IMPORTED_LOCATION ${ABSL_INSTALL_DIR}/lib/libabsl_base.a) diff --git a/cmake/hip.cmake b/cmake/hip.cmake index 6efed5b468576..9c43b15b28a63 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -136,11 +136,7 @@ list(APPEND HIP_CXX_FLAGS -Wno-unused-local-typedef) list(APPEND HIP_CXX_FLAGS -Wno-missing-braces) list(APPEND HIP_CXX_FLAGS -Wno-sometimes-uninitialized) -if(WITH_CINN) - list(APPEND HIP_CXX_FLAGS -std=c++14) -else() - list(APPEND HIP_CXX_FLAGS -std=c++17) -endif() +list(APPEND HIP_CXX_FLAGS -std=c++17) list(APPEND HIP_CXX_FLAGS --gpu-max-threads-per-block=1024) if(CMAKE_BUILD_TYPE MATCHES Debug) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index c617f6e56c994..2d4528fa3316f 100755 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -285,6 +285,14 @@ else() inference_lib_dist SRCS ${paddle_phi_lib} DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + if(WITH_GPU OR WITH_ROCM) + set(paddle_phi_kernel_gpu_lib + ${PADDLE_BINARY_DIR}/paddle/phi/libphi_kernel_gpu.*) + copy( + inference_lib_dist + SRCS ${paddle_phi_kernel_gpu_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + endif() endif() endif() diff --git a/cmake/make_resource.py b/cmake/make_resource.py index ad8ee179d60c2..e80900da58777 100644 --- a/cmake/make_resource.py +++ b/cmake/make_resource.py @@ -24,7 +24,7 @@ "const unsigned char " + var + "[] = {" - + ",".join(["0x%02x" % ord(c) for c in open(res).read()]) + + ",".join([f"0x{ord(c):02x}" for c in open(res).read()]) + ",0};\n" + "const unsigned " + var diff --git a/paddle/.set_python_path.sh b/paddle/.set_python_path.sh index 8fd58925ee482..67e6304bf3d2e 100755 --- a/paddle/.set_python_path.sh +++ b/paddle/.set_python_path.sh @@ -14,11 +14,11 @@ # limitations under the License. # -# A simple test driver for cmake. +# A simple test driver for cmake. # set PYTHONPATH before run command. # Usage: # ./.set_python_pash.sh -p YOUR_PYTHON_PATH {exec...} -# +# # It same as PYTHONPATH=${YOUR_PYTHON_PATH}:$PYTHONPATH {exec...} # PYPATH="" diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc index 42986fff0dbb1..54805f2c78f50 100644 --- a/paddle/cinn/ast_gen_ius/ast_gen.cc +++ b/paddle/cinn/ast_gen_ius/ast_gen.cc @@ -131,6 +131,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { } else { iter_values.push_back(axis_vars[i]); } + ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]}); } VLOG(4) << "iter_value.size() and block_vars.size() is " << iter_values.size() << " " << block_vars.size(); @@ -167,6 +168,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { } else { reduce_iter_values.push_back(axis_vars[i]); } + ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]}); } VLOG(4) << "ast gen: reduce body is after replace 0" << reduce_body; for (int i = 0; i < reduce_axis.size(); ++i) { @@ -227,6 +229,9 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { ir::ScheduleBlock::Make( reduce_block_vars, {}, {}, tensor->name, reduce_body)); for (int i = static_cast(reduce_axis.size()) - 1; i >= 0; --i) { + ir::TryElevateInt32ToInt64({reduce_axis[i], + reduce_axis[i]->lower_bound, + reduce_axis[i]->upper_bound}); reduce_body = ir::For::Make(reduce_axis[i], reduce_axis[i]->lower_bound, reduce_axis[i]->upper_bound, diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc index 6f00ee34813d1..c51ba89806956 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc @@ -32,7 +32,7 @@ #include "paddle/cinn/lang/lower.h" #include "paddle/cinn/optim/optimize.h" #include "paddle/cinn/optim/transform_gpu_forloop.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -193,10 +193,14 @@ ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target, std::unordered_set GetReduceLoopVarNames(const ir::Expr block) { const ir::ScheduleBlockRealize* block_realize = block.As(); - CHECK_NOTNULL(block_realize); + PADDLE_ENFORCE_NOT_NULL( + block_realize, + phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize")); const ir::ScheduleBlock* block_node = block_realize->schedule_block.As(); - CHECK_NOTNULL(block_node); + PADDLE_ENFORCE_NOT_NULL( + block_node, + phi::errors::InvalidArgument("The block is not a ScheduleBlock")); std::vector iter_values = block_realize->iter_values; std::vector iter_vars = block_node->iter_vars; @@ -218,10 +222,14 @@ std::unordered_set GetReduceLoopVarNames(const ir::Expr block) { std::string GetBlockName(const ir::Expr block) { const ir::ScheduleBlockRealize* block_realize = block.As(); - CHECK_NOTNULL(block_realize); + PADDLE_ENFORCE_NOT_NULL( + block_realize, + phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize")); const ir::ScheduleBlock* block_node = block_realize->schedule_block.As(); - CHECK_NOTNULL(block_node); + PADDLE_ENFORCE_NOT_NULL( + block_node, + phi::errors::InvalidArgument("The block is not a ScheduleBlock")); return block_node->name; } diff --git a/paddle/cinn/auto_schedule/auto_tuner.cc b/paddle/cinn/auto_schedule/auto_tuner.cc index d45dcc743e525..9524e1ed3048f 100644 --- a/paddle/cinn/auto_schedule/auto_tuner.cc +++ b/paddle/cinn/auto_schedule/auto_tuner.cc @@ -34,7 +34,7 @@ #include "paddle/cinn/hlir/framework/op.h" #include "paddle/cinn/hlir/framework/visualize_helper.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -144,9 +144,10 @@ void PrintResult(const TuningResult& result) { } TuningResult AutoTuner::Tune(const TuningOptions& options) { - CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config"; - VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds - << ", tasks size=" << tasks_.size(); + PADDLE_ENFORCE_GT(options.num_tuning_rounds, + 0, + phi::errors::InvalidArgument( + "The num_tuning_rounds should be greater than 0.")); TuningResult result; result.subgraphs.resize(tasks_.size()); diff --git a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc index a9074c76fa8cf..54396ecaa6e2e 100644 --- a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc +++ b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc @@ -24,7 +24,7 @@ #include "paddle/cinn/auto_schedule/search_space/search_state.h" #include "paddle/cinn/common/target.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -45,8 +45,10 @@ void ExprCostModel::Train(const std::vector& samples, const cinn::common::Target& target) { trained_times_.store(1); size_t total_size = samples.size(); - CHECK_EQ(total_size, labels.size()) - << "Samples must have same size as labels"; + PADDLE_ENFORCE_EQ( + total_size, + labels.size(), + phi::errors::InvalidArgument("Samples must have same size as labels")); std::vector> train_feature_numbers(total_size); FeatureExtractor extractor; for (size_t i = 0; i < total_size; ++i) { @@ -63,8 +65,10 @@ void ExprCostModel::Update(const std::vector& samples, const cinn::common::Target& target) { ++trained_times_; size_t total_size = samples.size(); - CHECK_EQ(total_size, labels.size()) - << "Samples must have same size as labels"; + PADDLE_ENFORCE_EQ( + total_size, + labels.size(), + phi::errors::InvalidArgument("Samples must have same size as labels")); std::vector> train_feature_numbers(total_size); FeatureExtractor extractor; for (size_t i = 0; i < total_size; ++i) { diff --git a/paddle/cinn/auto_schedule/database/database.cc b/paddle/cinn/auto_schedule/database/database.cc index 2036b44a83fef..ee8277b9dadd6 100644 --- a/paddle/cinn/auto_schedule/database/database.cc +++ b/paddle/cinn/auto_schedule/database/database.cc @@ -22,7 +22,7 @@ #include "paddle/cinn/auto_schedule/task/task_registry.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/ir/schedule/schedule_desc.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -42,8 +42,10 @@ proto::TuningRecord TuningRecord::ToProto() const { Database::Database(int capacity_per_task) : capacity_per_task_(capacity_per_task) { - CHECK_GT(capacity_per_task_, 0) - << "capacity_per_task_ should be greater than 0"; + PADDLE_ENFORCE_GT(capacity_per_task_, + 0, + phi::errors::InvalidArgument( + "capacity_per_task_ should be greater than 0")); } std::unique_ptr Database::Make(const DatabaseConfig& config) { diff --git a/paddle/cinn/auto_schedule/measure/simple_builder.cc b/paddle/cinn/auto_schedule/measure/simple_builder.cc index 5be5b8528616f..0636cfc2b79fa 100644 --- a/paddle/cinn/auto_schedule/measure/simple_builder.cc +++ b/paddle/cinn/auto_schedule/measure/simple_builder.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/cinn/auto_schedule/measure/simple_builder.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -25,8 +25,10 @@ SimpleBuilder::SimpleBuilder(hlir::framework::GraphCompiler* graph_compiler) : graph_compiler_(graph_compiler) {} BuildResult SimpleBuilder::Build(const MeasureInput& input) { - CHECK_NE(graph_compiler_, static_cast(nullptr)) - << "empty handle to GraphCompiler"; + PADDLE_ENFORCE_NE( + graph_compiler_, + static_cast(nullptr), + phi::errors::InvalidArgument("empty handle to GraphCompiler")); CompilationContext& context = graph_compiler_->GetCompilationContext(); context.groups.emplace_back(input.task->subgraph); context.lowered_funcs.emplace_back(input.lowered_funcs); diff --git a/paddle/cinn/auto_schedule/measure/simple_runner.cc b/paddle/cinn/auto_schedule/measure/simple_runner.cc index 92dcc00693b5b..ec3929aff71ae 100644 --- a/paddle/cinn/auto_schedule/measure/simple_runner.cc +++ b/paddle/cinn/auto_schedule/measure/simple_runner.cc @@ -25,7 +25,7 @@ #include "paddle/cinn/hlir/framework/buffer.h" #include "paddle/cinn/hlir/framework/scope.h" #include "paddle/cinn/hlir/framework/tensor.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -76,8 +76,11 @@ static void PopulateRandomValue(const cinn::common::Type& type, std::generate_n( fmt_ptr, numel, [&engine, &dist]() { return dist(engine); }); } else { - CHECK_EQ(type.bytes(), 8) - << "Unsupported type: " << type << ", type.bytes = " << type.bytes(); + PADDLE_ENFORCE_EQ( + type.bytes(), + 8, + phi::errors::Unimplemented("Unsupported type, the type.bytes is %d", + type.bytes())); auto* fmt_ptr = reinterpret_cast(raw_ptr); std::uniform_int_distribution dist( std::numeric_limits::min(), @@ -127,7 +130,12 @@ static std::unordered_set ParamsNeedInitWithZero( std::vector param_idxs = kInitWithZeroParams.at(node->op()->name); const auto& inlinks = node->inlinks_in_order(); for (int param_idx : param_idxs) { - CHECK_GT(inlinks.size(), param_idx); + PADDLE_ENFORCE_GT(inlinks.size(), + param_idx, + phi::errors::InvalidArgument( + "The input size of the node is less than the " + "index of the parameter that needs to be " + "initialized to 0")); auto& edge = inlinks.at(param_idx); std::string param_name = edge->source()->as()->id(); @@ -141,7 +149,10 @@ static std::unordered_set ParamsNeedInitWithZero( } SimpleRunner::SimpleRunner(int repeat_times) : repeat_times_(repeat_times) { - CHECK_GT(repeat_times_, 0) << "repeat_times can't less than 0"; + PADDLE_ENFORCE_GT( + repeat_times_, + 0, + phi::errors::InvalidArgument("repeat_times should be greater than 0")); } // Prepare execution arguments of all instructions to run, a argument diff --git a/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process.cc b/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process.cc index 2e3c4b0e21661..ffc8a0f21d903 100644 --- a/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process.cc +++ b/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process.cc @@ -18,7 +18,7 @@ #include "paddle/cinn/ir/ir_printer.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/ir/schedule/schedule_desc.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -29,7 +29,10 @@ int ExtractNumThreads(const ir::IRSchedule& ir_schedule, if (step.type == "Bind" && step.attrs.find("thread_axis") != step.attrs.end() && absl::get(step.attrs.at("thread_axis")) == bind_axis) { - CHECK_EQ(step.inputs.at("loop").size(), 1); + PADDLE_ENFORCE_EQ(step.inputs.at("loop").size(), + 1, + phi::errors::InvalidArgument( + "The loop size of bind step should be 1")); return step.inputs.at("loop")[0].As()->extent.as_int32(); } } diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.cc index e59ba8b423293..523763942c64e 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.cc @@ -21,7 +21,7 @@ #include "paddle/cinn/ir/schedule_block_graph.h" #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/ir/utils/ir_nodes_collector.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -40,8 +40,11 @@ bool IsSpatialLoop(const ir::For* for_node) { const auto* schedule_block = block_realize->schedule_block.As(); CHECK(schedule_block) << "schedule_block field is not a ScheduleBlock"; - CHECK_EQ(block_realize->iter_values.size(), - schedule_block->iter_vars.size()); + PADDLE_ENFORCE_EQ( + block_realize->iter_values.size(), + schedule_block->iter_vars.size(), + phi::errors::InvalidArgument( + "The size of iter_values and iter_vars should be equal.")); for (int i = 0; i < block_realize->iter_values.size(); ++i) { const ir::Var& iter_var = schedule_block->iter_vars[i]; const ir::Expr& binding = block_realize->iter_values[i]; @@ -93,10 +96,16 @@ void BindGPUIndex(ir::IRSchedule* ir_schedule, int max_blocks, int max_threads_per_block) { auto all_loops = ir_schedule->GetLoops(block_name); - CHECK_LE(num_loops_to_bind, all_loops.size()) - << "The number of loops to be bind is greater than size of all_loops"; - CHECK_GE(num_loops_to_bind, 0) - << "The number of loops to be bind should be greater than 0"; + PADDLE_ENFORCE_LE( + num_loops_to_bind, + all_loops.size(), + phi::errors::InvalidArgument( + "The number of loops to be bind is greater than size of all_loops")); + PADDLE_ENFORCE_GE( + num_loops_to_bind, + 0, + phi::errors::InvalidArgument( + "The number of loops to be bind should be greater than 0")); // check whether it is the case that threadIdx has been binded but blockIdx // not, the threadIdx can only be binded in the first loop after // num_loops_to_bind loops because we has excluded other cases in @@ -130,13 +139,19 @@ void BindGPUIndex(ir::IRSchedule* ir_schedule, if (extent <= max_blocks * max_threads_per_block) { auto splits = ir_schedule->Split(fused_loop, {-1, max_threads_per_block}); - CHECK_EQ(splits.size(), 2); + PADDLE_ENFORCE_EQ( + splits.size(), + 2, + phi::errors::InvalidArgument("The size of splits should be 2.")); ir_schedule->Bind(splits[0], "blockIdx.x"); ir_schedule->Bind(splits[1], "threadIdx.x"); } else { auto splits = ir_schedule->Split(fused_loop, {-1, max_blocks, max_threads_per_block}); - CHECK_EQ(splits.size(), 3); + PADDLE_ENFORCE_EQ( + splits.size(), + 3, + phi::errors::InvalidArgument("The size of splits should be 3.")); ir_schedule->Reorder({splits[1], splits[2], splits[0]}); all_loops = ir_schedule->GetLoops(block_name); ir_schedule->Bind(all_loops[0], "blockIdx.x"); @@ -160,8 +175,11 @@ RuleApplyType AutoBind::Init(ir::IRSchedule* ir_schedule) { } void AutoBind::Apply(int index) { - CHECK_LT(index, applicable_schedule_blocks_.size()) - << "invalid apply index:" << index; + PADDLE_ENFORCE_LT( + index, + applicable_schedule_blocks_.size(), + phi::errors::InvalidArgument( + "The index should be less than size of applicable_schedule_blocks_")); auto applied_block = applicable_schedule_blocks_.at(index); auto all_loops = ir_schedule_->GetLoops(applied_block); BindGPUIndex(ir_schedule_, diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc index e52d91c125224..ef0dbef492a59 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc @@ -20,7 +20,7 @@ #include "paddle/cinn/common/target.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -28,16 +28,19 @@ AutoGenRule::AutoGenRule(const cinn::common::Target& target) : target_(&target) {} int AutoGenRule::NumberApplicable() const { - CHECK_GE(num_applicable_, 0) - << "Call " << GetRuleName() - << "::NumberApplicable() without initialization."; + PADDLE_ENFORCE_GE( + num_applicable_, + 0, + phi::errors::InvalidArgument( + "The num_applicable_ should be greater than or equal to 0.")); return num_applicable_; } void AutoGenRule::ApplyRandomly() { - CHECK_GT(num_applicable_, 0) - << "Call " << GetRuleName() - << "::ApplyRandomly() with NumberApplicable() == 0"; + PADDLE_ENFORCE_GT(num_applicable_, + 0, + phi::errors::InvalidArgument( + "The num_applicable_ should be greater than 0.")); int index = rand() % num_applicable_; // NOLINT return Apply(index); } diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.cc index c052d2995c8ad..a4ecd5036e2e7 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.cc @@ -22,7 +22,7 @@ #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/ir/utils/ir_nodes_collector.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -97,8 +97,9 @@ RuleApplyType AutoUnroll::Init(ir::IRSchedule* ir_schedule) { } void AutoUnroll::Apply(int index) { - CHECK_LT(index, applicable_schedule_blocks_.size()) - << "invalid apply index:" << index; + PADDLE_ENFORCE_LT(index, + applicable_schedule_blocks_.size(), + phi::errors::InvalidArgument("Index is out of range.")); auto applied_block = applicable_schedule_blocks_.at(index); int max_step = auto_unroll_options[std::rand() % auto_unroll_options.size()]; ir_schedule_->Annotate( diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h index 1bbc8da4497d6..759dbfa54d3a4 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h @@ -27,7 +27,7 @@ #include "paddle/cinn/ir/ir.h" #include "paddle/cinn/ir/ir_base.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -103,8 +103,11 @@ class MultiLevelTiling : public AutoGenRule { // Sample num_split integers whose product equals extent template std::vector SampleTileSplit(T extent, int num_split) const { - CHECK_GT(num_split, 0) - << "num_split in SampleTileSplit must be greater than 0"; + PADDLE_ENFORCE_GT( + num_split, + 0, + phi::errors::InvalidArgument( + "num_split in SampleTileSplit must be greater than 0")); if (num_split == 1) { return {extent}; } diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc index 85bc207c84fc7..0053c87a81394 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc @@ -23,7 +23,7 @@ #include "paddle/cinn/ir/tensor.h" #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/ir/utils/ir_nodes_collector.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -32,10 +32,16 @@ bool ReductionFactoring::CanApply(const std::string& block_name, ir::Expr block_expr = ir_schedule->GetBlock(block_name); ir::ScheduleBlockRealize* block_realize = block_expr.As(); - CHECK_NOTNULL(block_realize); + PADDLE_ENFORCE_NOT_NULL( + block_realize, + phi::errors::InvalidArgument( + "The block_expr should be a ScheduleBlockRealize.")); ir::ScheduleBlock* sch_block = block_realize->schedule_block.As(); - CHECK_NOTNULL(sch_block); + PADDLE_ENFORCE_NOT_NULL( + sch_block, + phi::errors::InvalidArgument( + "The schedule_block field is not a ScheduleBlock.")); AnalyzeScheduleBlockReadWriteBuffer(sch_block); // 1. The block must have write buffer @@ -135,7 +141,11 @@ void ReductionFactoring::Apply(const std::string& block_name, return; } // 3. Reorder if new_loop_order differs from the original order - CHECK_EQ(all_loops.size(), new_loop_order.size()); + PADDLE_ENFORCE_EQ( + all_loops.size(), + new_loop_order.size(), + phi::errors::InvalidArgument("The size of all_loops should be equal to " + "the size of new_loop_order.")); for (int i = 0; i < all_loops.size(); ++i) { if (all_loops[i].As()->loop_var->name != new_loop_order[i].As()->loop_var->name) { @@ -152,7 +162,11 @@ void ReductionFactoring::Apply(const std::string& block_name, for (int i = num_spatial_loops; i < all_loops.size(); ++i) { reduction_loop_indices.push_back(i); } - CHECK_EQ(reduction_loop_indices.size(), num_reduction_loops); + PADDLE_ENFORCE_EQ(reduction_loop_indices.size(), + num_reduction_loops, + phi::errors::InvalidArgument( + "The size of reduction_loop_indices should be equal " + "to num_reduction_loops.")); fused_reduce_loop = ir_schedule->Fuse(block_name, reduction_loop_indices); } else { all_loops = ir_schedule->GetLoops(block_name); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc index d56d97f83df60..fb327c130dbbf 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc @@ -23,8 +23,8 @@ #include "paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h" #include "paddle/cinn/ir/ir_printer.h" +#include "paddle/common/enforce.h" #include "test/cpp/cinn/concrete_program_builder.h" - PD_DECLARE_bool(cinn_new_group_scheduler); namespace cinn { @@ -64,8 +64,13 @@ class TestReductionFactoring : public TestAutoGenRuleBase { // check const std::vector& blocks = ir_schedule.GetAllBlocks(); - CHECK_EQ(blocks.size(), 2UL); - CHECK_EQ(ir.str(), expected_ir); + PADDLE_ENFORCE_EQ( + blocks.size(), + 2UL, + phi::errors::InvalidArgument("The size of blocks should be 2.")); + PADDLE_ENFORCE_EQ(ir.str(), + expected_ir, + phi::errors::InvalidArgument("The ir is not correct.")); } }; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc index 994027dba0ee4..66d25c65542d1 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc @@ -18,7 +18,6 @@ #include #include #include - #include "paddle/cinn/auto_schedule/analysis/analyze_ir.h" #include "paddle/cinn/backends/codegen_cuda_dev.h" #include "paddle/cinn/cinn.h" @@ -29,6 +28,7 @@ #include "paddle/cinn/hlir/framework/pass.h" #include "paddle/cinn/hlir/framework/tensor.h" #include "paddle/cinn/optim/transform_gpu_forloop.h" +#include "paddle/common/enforce.h" #ifdef CINN_WITH_CUDA #include #endif @@ -89,8 +89,10 @@ std::string TestAutoGenRuleBase::GetIR(const ir::IRSchedule& schedule) { ir::Module TestAutoGenRuleBase::BuildIRModule(const ir::IRSchedule& schedule) { auto&& updated_bodys = schedule.GetModule().GetExprs(); - CHECK_EQ(lowered_funcs_.size(), updated_bodys.size()) - << "associated exprs size not equal"; + PADDLE_ENFORCE_EQ( + lowered_funcs_.size(), + updated_bodys.size(), + phi::errors::InvalidArgument("Associated exprs size not equal")); ir::Module::Builder builder("test_builder", this->target_); for (int i = 0; i < lowered_funcs_.size(); ++i) { @@ -175,10 +177,16 @@ void CheckResult(raw_func_type test_func, const cinn::common::Target& target) { CHECK(input_names.size()) << "The number of inputs must be greater than 0."; CHECK(output_names.size()) << "The number of outputs must be greater than 0."; - CHECK_EQ(input_names.size(), input_shapes.size()) - << "The quantity of input_names and input_shapes must be equal."; - CHECK_EQ(output_names.size(), output_shapes.size()) - << "The quantity of output_names and output_shapes must be equal."; + PADDLE_ENFORCE_EQ( + input_names.size(), + input_shapes.size(), + phi::errors::InvalidArgument( + "The quantity of input_names and input_shapes must be equal.")); + PADDLE_ENFORCE_EQ( + output_names.size(), + output_shapes.size(), + phi::errors::InvalidArgument( + "The quantity of output_names and output_shapes must be equal.")); // Initialize data std::vector input_data_ptrs(input_names.size()); diff --git a/paddle/cinn/auto_schedule/search_space/block_sampler.cc b/paddle/cinn/auto_schedule/search_space/block_sampler.cc index 93de31e6a5e36..38d3b7badd02a 100644 --- a/paddle/cinn/auto_schedule/search_space/block_sampler.cc +++ b/paddle/cinn/auto_schedule/search_space/block_sampler.cc @@ -17,7 +17,7 @@ #include #include "paddle/cinn/ir/ir.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -27,7 +27,10 @@ std::unique_ptr BlockSampler::Make( const std::string& strategy, utils::LinearRandomEngine::StateType rand_seed, const std::vector& weights) { - CHECK_GT(all_blocks.size(), 0) << "Empty block list"; + PADDLE_ENFORCE_GT( + all_blocks.size(), + 0, + phi::errors::InvalidArgument("The all_blocks should not empty.")); if (strategy == "traversal") { VLOG(6) << "Init TraversalBlockSampler with block num = " << all_blocks.size(); @@ -87,7 +90,11 @@ ProbabilisticBlockSampler::ProbabilisticBlockSampler( if (weights.empty()) { weights_.resize(all_blocks.size(), 1); } else { - CHECK_EQ(all_blocks.size(), weights_.size()); + PADDLE_ENFORCE_EQ( + all_blocks.size(), + weights_.size(), + phi::errors::InvalidArgument( + "The size of all_blocks and weights should be equal.")); } remains_ = all_blocks.size(); } diff --git a/paddle/cinn/auto_schedule/search_space/rule_sampler.cc b/paddle/cinn/auto_schedule/search_space/rule_sampler.cc index 3c0868d0748e5..bd8e818546a91 100644 --- a/paddle/cinn/auto_schedule/search_space/rule_sampler.cc +++ b/paddle/cinn/auto_schedule/search_space/rule_sampler.cc @@ -16,7 +16,7 @@ #include #include - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -26,7 +26,10 @@ std::unique_ptr RuleSampler::Make( const std::string& strategy, utils::LinearRandomEngine::StateType rand_seed, const std::vector& weights) { - CHECK_GT(potential_rules.size(), 0) << "Empty rule list"; + PADDLE_ENFORCE_GT( + potential_rules.size(), + 0, + phi::errors::InvalidArgument("The potential_rules should not be empty.")); if (strategy == "traversal") { return std::make_unique(potential_rules, default_remove_policy); @@ -64,7 +67,11 @@ ProbabilisticRuleSampler::ProbabilisticRuleSampler( if (weights.empty()) { weights_.resize(potential_rules.size(), 1); } else { - CHECK_EQ(potential_rules.size(), weights_.size()); + PADDLE_ENFORCE_EQ( + potential_rules.size(), + weights_.size(), + phi::errors::InvalidArgument( + "Potential_rules's size should same as weights's size.")); } remains_ = potential_rules.size(); } diff --git a/paddle/cinn/auto_schedule/search_space/search_space.cc b/paddle/cinn/auto_schedule/search_space/search_space.cc index 650e1d572f831..a4f4db6472e1b 100644 --- a/paddle/cinn/auto_schedule/search_space/search_space.cc +++ b/paddle/cinn/auto_schedule/search_space/search_space.cc @@ -33,7 +33,7 @@ #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/runtime/flags.h" - +#include "paddle/common/enforce.h" PD_DECLARE_bool(auto_schedule_use_cost_model); namespace cinn { @@ -109,7 +109,10 @@ SearchState SearchSpace::RandomScheduleMutate(const SearchState& state) { --iter; int sample_rule_index = iter->second; - CHECK_LT(sample_rule_index, ret->applicable_rules.size()); + PADDLE_ENFORCE_LT(sample_rule_index, + ret->applicable_rules.size(), + phi::errors::InvalidArgument( + "The sample_rule_index should less than ret's.")); AutoGenRule* sample_rule = ret->applicable_rules.at(sample_rule_index); VLOG(7) << "Apply rule: " << sample_rule->GetRuleName() << " with index=" << sample_weighted_index - iter->first; diff --git a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search.cc b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search.cc index dcb6e1ca93914..6403283f18be1 100644 --- a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search.cc +++ b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search.cc @@ -35,7 +35,7 @@ #include "paddle/cinn/utils/multi_threading.h" #include "paddle/cinn/utils/sized_multi_set.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" PD_DECLARE_bool(auto_schedule_use_cost_model); namespace cinn { @@ -175,9 +175,11 @@ SearchState EvolutionarySearch::CrossOver(const SearchState& state1, std::vector mother_exprs = state2->ir_schedule.GetModule().GetExprs(); - CHECK_EQ(father_exprs.size(), mother_exprs.size()) - << "CrossOver ModuleExpr in EvolutionarySearch must have same number of " - "AST"; + PADDLE_ENFORCE_EQ(father_exprs.size(), + mother_exprs.size(), + phi::errors::InvalidArgument( + "CrossOver ModuleExpr in EvolutionarySearch must have " + "same number of AST")); for (size_t i = 0; i < father_exprs.size(); ++i) { if (utils::SampleUniformInt(0, 2, &rand_seed_) == 0) { @@ -200,10 +202,15 @@ SearchState EvolutionarySearch::CrossOver(const SearchState& state1, SearchState EvolutionarySearch::Mutate( const SearchState& state, utils::LinearRandomEngine::StateType* rand_seed) { - CHECK_GT(weighted_mutators_.size(), 0) - << "There is no mutate rule can be applied."; + PADDLE_ENFORCE_GT( + weighted_mutators_.size(), + 0, + phi::errors::InvalidArgument("There is no mutate rule can be applied.")); double accu_weight = (weighted_mutators_.rbegin())->first; - CHECK_GT(accu_weight, 0) << "The accumulate weight must be greater than 0."; + PADDLE_ENFORCE_GT(accu_weight, + 0, + phi::errors::InvalidArgument( + "The accumulate weight must be greater than 0.")); // sample a mutate rule double sample_weight = utils::SampleUniformDouble(0, accu_weight, rand_seed); auto sampled_iter = weighted_mutators_.upper_bound(sample_weight); diff --git a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc index 6a983d7f9aaac..7791cdf9f89d5 100644 --- a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc +++ b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc @@ -30,8 +30,8 @@ #include "paddle/cinn/hlir/framework/op_lowering.h" #include "paddle/cinn/ir/ir_base.h" #include "paddle/cinn/ir/schedule/ir_schedule.h" +#include "paddle/common/enforce.h" #include "test/cpp/cinn/program_builder.h" - namespace cinn { namespace auto_schedule { @@ -159,7 +159,10 @@ TEST(EvolutionarySearch, Evolve) { auto tasks = CreateTasks( tests::OpBuilder("matmul").Build({{"X", {32, 32}}, {"Y", {32, 32}}}), target); - CHECK_EQ(tasks.size(), 1); + PADDLE_ENFORCE_EQ( + tasks.size(), + 1, + phi::errors::InvalidArgument("The size of tasks should be 1.")); ExprCostModel cost_model; std::vector cost_model_samples(1); std::vector cost_model_labels(1); @@ -206,7 +209,11 @@ TEST(EvolutionarySearch, Evolve) { VLOG(6) << "cost = " << s->predicted_cost; } VLOG(6) << "total_cost_next = " << total_cost_next; - CHECK_LE(total_cost_next, total_cost_pre); + PADDLE_ENFORCE_LE( + total_cost_next, + total_cost_pre, + phi::errors::InvalidArgument("The total cost should be less than or " + "equal to the previous one.")); std::swap(population_pre_ptr, population_next_ptr); } } diff --git a/paddle/cinn/auto_schedule/task/task_optimizer.cc b/paddle/cinn/auto_schedule/task/task_optimizer.cc index 273cba4c4060e..a027dc9dd1ed5 100644 --- a/paddle/cinn/auto_schedule/task/task_optimizer.cc +++ b/paddle/cinn/auto_schedule/task/task_optimizer.cc @@ -18,7 +18,6 @@ #include #include - #include "paddle/cinn/auto_schedule/analysis/analyze_ir.h" #include "paddle/cinn/auto_schedule/cost_model/expr_cost_model.h" #include "paddle/cinn/auto_schedule/measure/measure.h" @@ -34,6 +33,7 @@ #include "paddle/cinn/optim/transform_gpu_forloop.h" #include "paddle/cinn/runtime/flags.h" #include "paddle/cinn/utils/string.h" +#include "paddle/common/enforce.h" #ifdef CINN_WITH_CUDA #include @@ -223,9 +223,12 @@ bool IsWrappedByCustomCall(const TuneTask* task) { TaskOptimizer::Result TaskOptimizer::OptimizeByEvolution( const TuningOptions& options) { - CHECK_EQ(options.num_measure_trials % options.num_samples_per_iteration, 0) - << "TuningOptions.num_measure_trials % " - "TuningOptions.num_samples_per_iteration must be 0."; + PADDLE_ENFORCE_EQ( + options.num_measure_trials % options.num_samples_per_iteration, + 0, + phi::errors::InvalidArgument( + "TuningOptions.num_measure_trials % " + "TuningOptions.num_samples_per_iteration must be 0.")); VLOG(4) << "Optimizing TuneTask with num_measure_trials:" << options.num_measure_trials @@ -290,9 +293,11 @@ TaskOptimizer::Result TaskOptimizer::OptimizeByEvolution( << measure_inputs.size(); std::vector measure_outputs = schedule_measurer_->Measure(measure_inputs); - CHECK_EQ(measure_outputs.size(), states.size()) - << "ScheduleMeasurer didn't output same number of MeasureOutput of " - "states in TaskOptimizer"; + PADDLE_ENFORCE_EQ(measure_outputs.size(), + states.size(), + phi::errors::InvalidArgument( + "ScheduleMeasurer didn't output same number of " + "MeasureOutput of states in TaskOptimizer")); // record to database for (size_t i = 0; i < states.size(); ++i) { database_->AddRecord(TuningRecord(measure_inputs[i].task->serialized_key, @@ -344,9 +349,11 @@ std::vector TaskOptimizer::SearchOneRound( for (size_t i = 0; i < states.size(); ++i) { std::vector best_exprs = states[i]->ir_schedule.GetModule().GetExprs(); - CHECK_EQ(best_exprs.size(), task_->lowered_funcs.size()) - << "RuntimeError: Expr size is not equal to LoweredFunc size in " - "TaskOptimizer"; + PADDLE_ENFORCE_EQ(best_exprs.size(), + task_->lowered_funcs.size(), + phi::errors::InvalidArgument( + "Expr size is not equal to LoweredFunc size in " + "TaskOptimizer")); auto init_funcs = ir::ir_utils::IRCopy(task_->lowered_funcs); std::vector valid_funcs; for (size_t j = 0; j < best_exprs.size(); ++j) { @@ -369,8 +376,11 @@ std::vector TaskOptimizer::SearchOneRound( } states.erase(states.begin() + valid_cnt, states.end()); - CHECK_EQ(states.size(), measure_candidates->size()) - << "result size of states not equal to measure_candidates"; + PADDLE_ENFORCE_EQ( + states.size(), + measure_candidates->size(), + phi::errors::InvalidArgument( + "result size of states not equal to measure_candidates")); VLOG(4) << "EvolutionarySearch return size=" << states.size() << ", valid count=" << valid_cnt; VLOG(4) << JoinStatesDebugString("TaskOptimizer::SearchOneRound-Result", diff --git a/paddle/cinn/auto_schedule/task_scheduler/task_scheduler.cc b/paddle/cinn/auto_schedule/task_scheduler/task_scheduler.cc index a8961e45b980d..f59acbe612635 100644 --- a/paddle/cinn/auto_schedule/task_scheduler/task_scheduler.cc +++ b/paddle/cinn/auto_schedule/task_scheduler/task_scheduler.cc @@ -19,7 +19,7 @@ #include "paddle/cinn/auto_schedule/task/tune_task.h" #include "paddle/cinn/auto_schedule/task_scheduler/efficiency_priority.h" #include "paddle/cinn/auto_schedule/task_scheduler/round_robin.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace auto_schedule { @@ -27,7 +27,10 @@ std::unique_ptr TaskScheduler::Make( const std::vector& tasks, const Config& config, const std::string& strategy) { - CHECK_GT(tasks.size(), 0) << "Empty task list"; + PADDLE_ENFORCE_GT( + tasks.size(), + 0, + phi::errors::InvalidArgument("The task's size should greater than 0.")); if (strategy == "round_robin") { return std::make_unique(tasks, config); } else if (strategy == "efficiency_priority") { diff --git a/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc b/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc index 2966467b3eda6..c9f2630ac6e8a 100644 --- a/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc +++ b/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc @@ -32,8 +32,8 @@ #include "paddle/cinn/ir/ir_base.h" #include "paddle/cinn/runtime/flags.h" #include "paddle/cinn/utils/data_util.h" +#include "paddle/common/enforce.h" #include "test/cpp/cinn/program_builder.h" - /* This test is used as a tool to evaluate or compare performance of 3 * schedules(no schedule, manual schedule, auto-schedule). One can specify which * schedules to be evaluated through `FLAGS_evaluate_knobs` and specify which @@ -355,7 +355,10 @@ TEST_F(PerformanceTester, Gather) { // paddle model test TEST_F(PerformanceTester, ResNet50) { - CHECK_NE(FLAGS_resnet50_model_dir, ""); + PADDLE_ENFORCE_NE(FLAGS_resnet50_model_dir, + "", + phi::errors::InvalidArgument( + "The FLAGS_resnet50_model's dir should not be empty.")); FLAGS_cinn_infer_model_version = 1.0; std::unordered_map> feeds = { {"inputs", {batch_size, 3, 224, 224}}}; diff --git a/paddle/cinn/backends/codegen_c.cc b/paddle/cinn/backends/codegen_c.cc index 85443b02c0a8c..07dc8421de6cc 100644 --- a/paddle/cinn/backends/codegen_c.cc +++ b/paddle/cinn/backends/codegen_c.cc @@ -26,7 +26,7 @@ #include "paddle/cinn/runtime/cpu/thread_backend.h" #include "paddle/cinn/runtime/intrinsic.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" //! Root of the builtin code. PD_DECLARE_string(cinn_x86_builtin_code_root); @@ -205,7 +205,10 @@ void CodeGenC::Visit(const ir::For *op) { Expr num_task_var = Var("num_task"); IrPrinter::Visit((op->extent + num_task_var - 1) / num_task_var); str_ += ";\n"; - CHECK_EQ(min.as_int32(), 0); + PADDLE_ENFORCE_EQ( + min.as_int32(), + 0, + phi::errors::InvalidArgument("The min of the for loop should be 0")); auto task_id = Var("task_id"); auto n_per_task = Var("n_per_task"); min = task_id * n_per_task; @@ -370,7 +373,10 @@ void CodeGenC::PrintCallArgs(const ir::Call *op) { } void CodeGenC::PrintCall_buffer_malloc(const ir::Call *op) { - CHECK_EQ(op->read_args.size(), 2UL); + PADDLE_ENFORCE_EQ( + op->read_args.size(), + 2UL, + phi::errors::InvalidArgument("The number of read_args should be 2")); str_ += op->name; str_ += "("; PrintCastExpr("void*", op->read_args[0]); @@ -380,7 +386,10 @@ void CodeGenC::PrintCall_buffer_malloc(const ir::Call *op) { } void CodeGenC::PrintCall_cinn_pod_value_to_(const ir::Call *op) { - CHECK_EQ(op->read_args.size(), 1UL); + PADDLE_ENFORCE_EQ( + op->read_args.size(), + 1UL, + phi::errors::InvalidArgument("The number of read_args should be 1")); str_ += op->name; str_ += "("; str_ += "&("; @@ -390,7 +399,10 @@ void CodeGenC::PrintCall_cinn_pod_value_to_(const ir::Call *op) { } void CodeGenC::PrintCall_get_address(const ir::Call *op) { - CHECK_EQ(op->read_args.size(), 1UL); + PADDLE_ENFORCE_EQ( + op->read_args.size(), + 1UL, + phi::errors::InvalidArgument("The number of read_args should be 1")); CHECK(op->write_args.empty()); auto *read_var = op->read_args.front().as_var(); auto *read_buf = op->read_args.front().as_buffer(); @@ -409,7 +421,10 @@ void CodeGenC::PrintCall_get_address(const ir::Call *op) { void CodeGenC::PrintCall_pod_values_to_array(const ir::Call *op) { CHECK(!op->read_args.empty()); - CHECK_EQ(op->write_args.size(), 1UL); + PADDLE_ENFORCE_EQ( + op->write_args.size(), + 1UL, + phi::errors::InvalidArgument("The number of write_args should be 1")); auto output_var = op->write_args.front().as_var_ref(); CHECK(output_var.defined()); @@ -612,9 +627,12 @@ void CodeGenC::Visit(const ir::_LoweredFunc_ *op) { DoIndent(); - CHECK_EQ(op->alloc_output_buffer_exprs.size(), - op->dealloc_output_buffer_exprs.size()) - << "the count of allocation and deallocation expressions is not match"; + PADDLE_ENFORCE_EQ( + op->alloc_output_buffer_exprs.size(), + op->dealloc_output_buffer_exprs.size(), + phi::errors::InvalidArgument( + "The count of allocation and deallocation expressions is not " + "match")); std::vector new_body; diff --git a/paddle/cinn/backends/codegen_c_x86.cc b/paddle/cinn/backends/codegen_c_x86.cc index 394b61e35816d..06a9ff1fda2f9 100644 --- a/paddle/cinn/backends/codegen_c_x86.cc +++ b/paddle/cinn/backends/codegen_c_x86.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/cinn/backends/codegen_c_x86.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -53,7 +53,11 @@ void CodeGenCX86::Visit(const ir::Load *op) { } void CodeGenCX86::Visit(const ir::Broadcast *op) { - CHECK_GT(op->type().lanes(), 1); + PADDLE_ENFORCE_GT( + op->type().lanes(), + 1, + phi::errors::InvalidArgument( + "The lanes of the broadcast op should be greater than 1.")); int bits = op->type().bits() * op->type().lanes(); if (SupportsAVX512() && bits == 512) { diff --git a/paddle/cinn/backends/codegen_c_x86.h b/paddle/cinn/backends/codegen_c_x86.h index f0b040a94f1ae..bf90612292d20 100644 --- a/paddle/cinn/backends/codegen_c_x86.h +++ b/paddle/cinn/backends/codegen_c_x86.h @@ -18,7 +18,7 @@ #include "paddle/cinn/backends/codegen_c.h" #include "paddle/cinn/ir/intrinsic_ops.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -114,8 +114,10 @@ void CodeGenCX86::VisitBinaryOp(const Op *op, Expr a, Expr b, const std::string &op_repr) { - CHECK_EQ(a.type(), b.type()) << " a is : " << a << ", and b is : " << b - << ". op_repr is : " << op_repr; + PADDLE_ENFORCE_EQ( + a.type(), + b.type(), + phi::errors::InvalidArgument("The type of a and b should be the same.")); // scalar. if (a.type().lanes() == 1) { diff --git a/paddle/cinn/backends/codegen_cuda_dev.cc b/paddle/cinn/backends/codegen_cuda_dev.cc index 9c19c6faffb73..919edfc680ca7 100644 --- a/paddle/cinn/backends/codegen_cuda_dev.cc +++ b/paddle/cinn/backends/codegen_cuda_dev.cc @@ -26,8 +26,8 @@ #include "paddle/cinn/ir/op/ir_operators.h" #include "paddle/cinn/ir/utils/ir_verify.h" #include "paddle/cinn/optim/ir_simplify.h" +#include "paddle/common/enforce.h" #include "paddle/common/errors.h" - namespace cinn { namespace backends { @@ -122,7 +122,8 @@ std::vector FilterDeallocTempBuffers(const std::vector &frees) { std::vector filtered; for (const Expr &free : frees) { const ir::Free *op = free.As(); - CHECK_NOTNULL(op); + PADDLE_ENFORCE_NOT_NULL( + op, phi::errors::InvalidArgument("Free is not a free node")); bool has_symbolic_constant = false; const ir::_Buffer_ *buffer = op->destination.As(); for (Expr shape : buffer->shape) { @@ -305,7 +306,10 @@ std::string CodeGenCUDA_Dev::Compile(const ir::Module &module, void CodeGenCUDA_Dev::PrintIncludes() { str_ += GetSourceHeader(); } void CodeGenCUDA_Dev::PrintTempBufferCreation(const ir::Buffer &buffer) { - CHECK_NE(buffer->type(), Void()); + PADDLE_ENFORCE_NE( + buffer->type(), + Void(), + phi::errors::InvalidArgument("buffer type should not be void")); // Calculate buffer size and determine if it contains a symbolic constant Expr buffer_size(1); for (int i = 0; i < buffer->shape.size(); i++) { diff --git a/paddle/cinn/backends/codegen_cuda_host.cc b/paddle/cinn/backends/codegen_cuda_host.cc index b888db7c7c726..1ba4714153395 100644 --- a/paddle/cinn/backends/codegen_cuda_host.cc +++ b/paddle/cinn/backends/codegen_cuda_host.cc @@ -23,7 +23,7 @@ #include "paddle/cinn/backends/extern_func_jit_register.h" #include "paddle/cinn/backends/llvm/llvm_util.h" #include "paddle/cinn/runtime/intrinsic.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -65,10 +65,22 @@ llvm::Value* CodeGenCUDA_Host::LowerGPUKernelLauncher( llvm::Value* kernel_stream = nullptr; if (ll_function_args.size() == 3) { kernel_stream = ll_function_args[2]; - CHECK_EQ(kernel_stream->getType(), ll_void_p_ty()); // void* stream + PADDLE_ENFORCE_EQ( + kernel_stream->getType(), + ll_void_p_ty(), + phi::errors::InvalidArgument( + "The type of kernel_stream should be void*")); // void* stream } - CHECK_EQ(kernel_args->getType(), ll_void_p_ty()); // void* args - CHECK_EQ(kernel_args_count->getType(), ll_int32_ty()); // int32 + PADDLE_ENFORCE_EQ( + kernel_args->getType(), + ll_void_p_ty(), + phi::errors::InvalidArgument( + "The type of kernel_args should be void*")); // void* args + PADDLE_ENFORCE_EQ( + kernel_args_count->getType(), + ll_int32_ty(), + phi::errors::InvalidArgument( + "The type of kernel_args_count should be int32")); // int32 std::unordered_map global_args = { {KERNEL_ARGS, kernel_args}, @@ -199,7 +211,11 @@ llvm::Value* CodeGenCUDA_Host::LowerHostFunc(const ir::_LoweredFunc_* func) { // @} // Set local scope table - CHECK_EQ(ll_function_args.size(), func->args.size()); + PADDLE_ENFORCE_EQ(ll_function_args.size(), + func->args.size(), + phi::errors::InvalidArgument( + "The number of arguments is not equal to the number of " + "function arguments")); for (int i = 0; i < ll_function_args.size(); ++i) { SetVar(func->args[i].name(), ll_function_args[i]); } @@ -224,7 +240,11 @@ llvm::Value* CodeGenCUDA_Host::LowerParseArgsValueCall( const ir::Call* call_ir) { auto ret_type = CinnTypeToLLVMType(Int(64), m_); std::vector args_type; - CHECK_EQ(call_ir->read_args.size(), 2); + PADDLE_ENFORCE_EQ( + call_ir->read_args.size(), + 2, + phi::errors::InvalidArgument( + "The number of arguments of ParseArgsValue should be 2")); CHECK(call_ir->read_args[0].is_var() && call_ir->read_args[0].as_var()->type().is_cpp_handle()); CHECK(call_ir->read_args[1].type().is_int(32)); @@ -251,10 +271,22 @@ llvm::Value* CodeGenCUDA_Host::LowerCUDAKernelCall(const ir::Call* call_ir) { llvm::Value* kernel_stream = nullptr; if (ll_function_args.size() == 3) { kernel_stream = ll_function_args[2]; - CHECK_EQ(kernel_stream->getType(), ll_void_p_ty()); // void* stream + PADDLE_ENFORCE_EQ( + kernel_stream->getType(), + ll_void_p_ty(), + phi::errors::InvalidArgument( + "The type of kernel_stream should be void*")); // void* stream } - CHECK_EQ(kernel_args->getType(), ll_void_p_ty()); // void* args - CHECK_EQ(kernel_args_count->getType(), ll_int32_ty()); // int32 + PADDLE_ENFORCE_EQ( + kernel_args->getType(), + ll_void_p_ty(), + phi::errors::InvalidArgument( + "The type of kernel_args should be void*")); // void* args + PADDLE_ENFORCE_EQ( + kernel_args_count->getType(), + ll_int32_ty(), + phi::errors::InvalidArgument( + "The type of kernel_args_count should be int32")); // int32 std::unordered_map global_args = { {KERNEL_ARGS, kernel_args}, diff --git a/paddle/cinn/backends/codegen_device_util.cc b/paddle/cinn/backends/codegen_device_util.cc index 3373ed15e3bec..91c18ea35e9ea 100644 --- a/paddle/cinn/backends/codegen_device_util.cc +++ b/paddle/cinn/backends/codegen_device_util.cc @@ -68,6 +68,18 @@ std::string Predicate2String(ir::Expr predicate) { return ss.str(); } +static std::string CurTailFnName(const std::string &origin_fn_name) { + const int MaxStrLength = 16383; + if (origin_fn_name.length() <= MaxStrLength) { + return origin_fn_name; + } + VLOG(6) << "Funtion name too long. Curtail and concat hash."; + const std::string new_fn_name = + origin_fn_name.substr(0, MaxStrLength) + + std::to_string(std::hash()(origin_fn_name)); + return new_fn_name; +} + std::string detail::CollectBucketStrategyHostFunctionVisitor::GenDeviceKernelName( const std::string &fn_name, ir::Expr predicate) { @@ -80,7 +92,10 @@ detail::CollectBucketStrategyHostFunctionVisitor::GenDeviceKernelName( pos = cond_str.find("-", pos + replacement.length()); } VLOG(3) << "predicate string: " << cond_str; - return fn_name + "__COND_" + cond_str + "__kernel"; + // NOTE(chenxi67): The kernel name is too long to be supported in cuda12.3 so + // we need to curtail it. + const std::string new_fn_name = CurTailFnName(fn_name); + return new_fn_name + "__COND_" + cond_str + "__kernel"; } void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc( diff --git a/paddle/cinn/backends/codegen_device_util.h b/paddle/cinn/backends/codegen_device_util.h index caada3153e63b..ff3114c71296b 100644 --- a/paddle/cinn/backends/codegen_device_util.h +++ b/paddle/cinn/backends/codegen_device_util.h @@ -27,7 +27,7 @@ #include "paddle/cinn/ir/ir_mutator.h" #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/runtime/flags.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -205,7 +205,11 @@ struct CollectBucketStrategyHostFunctionVisitor if (op->functions.size() == 1 && op->predicates.size() == 0) { expr->as_module()->predicates.push_back(ir::Expr(true)); } - CHECK_EQ(op->functions.size(), op->predicates.size()); + PADDLE_ENFORCE_EQ( + op->functions.size(), + op->predicates.size(), + phi::errors::InvalidArgument( + "The size of functions and predicates should be equal")); for (int i = 0; i < op->functions.size(); ++i) { ProcessLoweredFunc(op->functions[i], op->predicates[i]); if (i == 0) { diff --git a/paddle/cinn/backends/compiler.cc b/paddle/cinn/backends/compiler.cc index 4f02a35411413..72678eec44c22 100644 --- a/paddle/cinn/backends/compiler.cc +++ b/paddle/cinn/backends/compiler.cc @@ -230,15 +230,23 @@ void SourceCodePrint::write(const std::string& source_code) { } } -void Compiler::Build(const Module& module, const std::string& code) { - auto PatternMatch = - adt::match{[&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, - [&](common::X86Arch) { CompileX86Module(module); }, - [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, - [&](common::NVGPUArch) { CompileCudaModule(module, code); }}; +void Compiler::Build(const Module& module, + const std::string& code, + const bool end) { + auto PatternMatch = adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { CompileX86Module(module, end); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { CompileCudaModule(module, code, end); }}; return std::visit(PatternMatch, target_.arch.variant()); } +void Compiler::AppendCX86(const Module& module) { + VLOG(3) << "Start Compiler::BuildCX86" << module; + CompileX86Module(module, true); + VLOG(3) << "Over Compiler::BuildCX86"; +} + std::string Compiler::GetSourceCode(const ir::Module& module) { return target_.arch.Visit(adt::match{ [&](common::UnknownArch) -> std::string { CINN_NOT_IMPLEMENTED; }, @@ -287,7 +295,8 @@ std::string GetFileContent(const std::string& path) { } // namespace void Compiler::CompileCudaModule(const Module& module, - const std::string& code) { + const std::string& code, + bool add_module) { #ifdef CINN_WITH_CUDA auto _host_module_device_module_ = SplitDeviceAndHostModule(module); // NOLINT @@ -337,15 +346,15 @@ void Compiler::CompileCudaModule(const Module& module, } engine_ = ExecutionEngine::Create(ExecutionOptions(), std::move(symbols)); - engine_->Link(host_module); + engine_->Link(host_module, add_module); #else CINN_NOT_IMPLEMENTED #endif } -void Compiler::CompileX86Module(const Module& module) { - engine_->Link(module); +void Compiler::CompileX86Module(const Module& module, bool add_module) { + engine_->Link(module, add_module); } void Compiler::ExportObject(const std::string& path) { diff --git a/paddle/cinn/backends/compiler.h b/paddle/cinn/backends/compiler.h index f269b00492a42..d43455cf76287 100644 --- a/paddle/cinn/backends/compiler.h +++ b/paddle/cinn/backends/compiler.h @@ -107,7 +107,10 @@ class Compiler final { /** * Compile and link to a CINN module. */ - void Build(const ir::Module& module, const std::string& code = ""); + void Build(const ir::Module& module, + const std::string& code = "", + const bool end = true); + void AppendCX86(const ir::Module& module); void ExportObject(const std::string& path); @@ -125,9 +128,10 @@ class Compiler final { private: void CompileCudaModule(const ir::Module& module, - const std::string& code = ""); + const std::string& code = "", + bool add_module = true); - void CompileX86Module(const ir::Module& module); + void CompileX86Module(const ir::Module& module, bool add_module = true); explicit Compiler(const Target& target) : target_(target), engine_(ExecutionEngine::Create(ExecutionOptions())) {} diff --git a/paddle/cinn/backends/function_prototype.cc b/paddle/cinn/backends/function_prototype.cc index e413521246b8f..e46b172bf65ed 100644 --- a/paddle/cinn/backends/function_prototype.cc +++ b/paddle/cinn/backends/function_prototype.cc @@ -20,7 +20,7 @@ #include "paddle/cinn/ir/tensor.h" #include "paddle/cinn/runtime/flags.h" - +#include "paddle/common/enforce.h" PD_DECLARE_bool(verbose_function_register); namespace cinn { @@ -42,13 +42,22 @@ bool FunctionProto::Match(const ir::Call *op) const { } void FunctionProto::AssertMatch(const ir::Call *op) const { - CHECK_EQ(name, op->name); - CHECK_EQ(ret_type, op->type()) - << "function proto " << name << " check failed"; - CHECK_EQ(op->read_args.size(), readonly_arg_types.size()) - << "function proto " << name << " check failed"; - CHECK_EQ(op->write_args.size(), mutable_arg_types.size()) - << "function proto " << name << " check failed"; + PADDLE_ENFORCE_EQ( + name, + op->name, + phi::errors::InvalidArgument("function proto's op name check failed")); + PADDLE_ENFORCE_EQ( + ret_type, + op->type(), + phi::errors::InvalidArgument("function proto's op type check failed")); + PADDLE_ENFORCE_EQ(op->read_args.size(), + readonly_arg_types.size(), + phi::errors::InvalidArgument( + "function proto's readonly arg types check failed")); + PADDLE_ENFORCE_EQ(op->write_args.size(), + mutable_arg_types.size(), + phi::errors::InvalidArgument( + "function proto's mutable arg types check failed")); auto get_type = [](Expr u) { if (u.as_tensor() || u.as_buffer()) { @@ -61,14 +70,21 @@ void FunctionProto::AssertMatch(const ir::Call *op) const { if (readonly_arg_types[i] == type_of()) { if (!op->read_args[i].as_tensor()) continue; } else { - CHECK_EQ(get_type(op->read_args[i]), readonly_arg_types[i]); + PADDLE_ENFORCE_EQ( + get_type(op->read_args[i]), + readonly_arg_types[i], + phi::errors::InvalidArgument( + "function proto's readonly arg types check failed")); } } for (int i = 0; i < op->write_args.size(); i++) { if (mutable_arg_types[i] == type_of()) { if (!op->write_args[i].as_tensor()) continue; } else { - CHECK_EQ(get_type(op->write_args[i]), mutable_arg_types[i]); + PADDLE_ENFORCE_EQ(get_type(op->write_args[i]), + mutable_arg_types[i], + phi::errors::InvalidArgument( + "function proto's mutable arg types check failed")); } } } @@ -86,7 +102,10 @@ void FunctionProto::CheckValid() { FunctionProto::shape_inference_t FunctionProto::ShapeFollowNthArgument(int n) { return [=](const std::vector &args, int value_offset) { - CHECK_LT(n, args.size()); + PADDLE_ENFORCE_LT( + n, + args.size(), + phi::errors::InvalidArgument("The argument index is out of range")); auto x = args[n].as_tensor(); CHECK(x); return x->shape; diff --git a/paddle/cinn/backends/ir_schedule_test.cc b/paddle/cinn/backends/ir_schedule_test.cc index 29eae201bbb78..a516b20c75804 100644 --- a/paddle/cinn/backends/ir_schedule_test.cc +++ b/paddle/cinn/backends/ir_schedule_test.cc @@ -31,7 +31,7 @@ #include "paddle/cinn/optim/remove_schedule_block.h" #include "paddle/cinn/optim/unroll_loops.h" #include "paddle/cinn/optim/vectorize_loops.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -563,7 +563,10 @@ TEST(IrSchedule, vectorize) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 2U); + PADDLE_ENFORCE_EQ( + loops.size(), + 2U, + phi::errors::InvalidArgument("The size of loops should be 2.")); ir_sch.Vectorize(loops[1], 16); std::string origin = utils::GetStreamCnt(func[0]); EXPECT_EQ(origin, utils::Trim(R"ROC( @@ -637,7 +640,10 @@ TEST(IrSchedule, unroll) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 2U); + PADDLE_ENFORCE_EQ( + loops.size(), + 2U, + phi::errors::InvalidArgument("The size of loops should be 2.")); ir_sch.Unroll(loops[1]); std::string origin = utils::GetStreamCnt(func[0]); EXPECT_EQ(origin, utils::Trim(R"ROC( @@ -711,7 +717,10 @@ TEST(IrSchedule, bind) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 2U); + PADDLE_ENFORCE_EQ( + loops.size(), + 2U, + phi::errors::InvalidArgument("The size of loops should be 2.")); ir_sch.Bind(loops[0], "blockIdx.x"); std::string origin = utils::GetStreamCnt(func[0]); EXPECT_EQ(origin, utils::Trim(R"ROC( @@ -753,7 +762,10 @@ TEST(IrSchedule, simple_compute_at) { auto func = cinn::lang::LowerVec( "test_simple_compute_at", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -826,7 +838,10 @@ TEST(IrSchedule, compute_at0) { auto func = cinn::lang::LowerVec( "test_compute_at0", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -900,7 +915,10 @@ TEST(IrSchedule, compute_at1) { auto func = cinn::lang::LowerVec( "test_compute_at1", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -972,7 +990,10 @@ TEST(IrSchedule, compute_at2) { auto func = cinn::lang::LowerVec( "test_compute_at2", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1044,7 +1065,10 @@ TEST(IrSchedule, compute_at3) { auto func = cinn::lang::LowerVec( "test_compute_at3", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1125,7 +1149,10 @@ TEST(IrSchedule, compute_at4) { auto func = cinn::lang::LowerVec( "test_compute_at4", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1187,7 +1214,10 @@ TEST(IrSchedule, compute_at5) { auto func = cinn::lang::LowerVec( "test_compute_at5", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1250,7 +1280,10 @@ TEST(IrSchedule, compute_at6) { auto func = cinn::lang::LowerVec( "test_compute_at6", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1316,7 +1349,10 @@ TEST(IrSchedule, cache_read1) { auto func = cinn::lang::LowerVec( "test_cache_read1", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1362,7 +1398,7 @@ void test_cache_read1(void* _args, int32_t num_args) }; for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]); + B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f); }; }; for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 16; cache_ax0_0 += 1) { @@ -1372,7 +1408,7 @@ void test_cache_read1(void* _args, int32_t num_args) }; for (int32_t i = 0; i < 16; i += 1) { for (int32_t j = 0; j < 16; j += 1) { - C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]); + C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f); }; }; cinn_buffer_free((void*)(0), _B); @@ -1399,7 +1435,10 @@ TEST(IrSchedule, cache_read2) { auto func = cinn::lang::LowerVec( "test_cache_read2", stages, {A, B}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1441,7 +1480,7 @@ void test_cache_read2(void* _args, int32_t num_args) for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { A_local_temp_buffer[((32 * i) + j)] = A[((32 * i) + j)]; - B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((32 * i) + j)]); + B[((32 * i) + j)] = (A_local_temp_buffer[((32 * i) + j)] * 2.00000000f); }; }; cinn_buffer_free((void*)(0), _B); @@ -1469,7 +1508,10 @@ TEST(IrSchedule, cache_write1) { auto func = cinn::lang::LowerVec( "test_cache_write1", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1511,7 +1553,7 @@ void test_cache_write1(void* _args, int32_t num_args) float* C = ((float*)(_C->memory)); for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]); + B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f); }; }; for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) { @@ -1521,7 +1563,7 @@ void test_cache_write1(void* _args, int32_t num_args) }; for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]); + C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f); }; }; for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) { @@ -1553,7 +1595,10 @@ TEST(IrSchedule, cache_write2) { auto func = cinn::lang::LowerVec( "test_cache_write2", stages, {A, B}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1592,7 +1637,7 @@ void test_cache_write2(void* _args, int32_t num_args) float* B = ((float*)(_B->memory)); for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) { for (int32_t cache_ax1 = 0; cache_ax1 < 32; cache_ax1 += 1) { - B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (2.00000000f * A[((32 * cache_ax0) + cache_ax1)]); + B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (A[((32 * cache_ax0) + cache_ax1)] * 2.00000000f); B[((32 * cache_ax0) + cache_ax1)] = B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)]; }; }; @@ -1624,7 +1669,10 @@ TEST(IrSchedule, cache_read3) { auto func = cinn::lang::LowerVec( "test_cache_read3", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1665,7 +1713,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C) }; for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]); + B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f); }; __syncthreads(); }; @@ -1677,7 +1725,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C) for (int32_t i = 0; i < 16; i += 1) { __syncthreads(); for (int32_t j = 0; j < 16; j += 1) { - C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]); + C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f); }; }; } @@ -1705,7 +1753,10 @@ TEST(IrSchedule, cache_write3) { auto func = cinn::lang::LowerVec( "test_cache_write3", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1743,7 +1794,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C) float* B = _B_temp_buffer; for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]); + B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f); }; }; for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) { @@ -1754,7 +1805,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C) __syncthreads(); for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]); + C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f); }; }; __syncthreads(); @@ -1788,7 +1839,10 @@ TEST(IrSchedule, sync_threads) { auto func = cinn::lang::LowerVec( "test_sync_threads", stages, {A, C}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1824,7 +1878,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C) float* B = _B_temp_buffer; for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]); + B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f); }; }; for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) { @@ -1835,7 +1889,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C) }; for (int32_t i = 0; i < 64; i += 1) { for (int32_t j = 0; j < 32; j += 1) { - C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]); + C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f); }; }; for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) { @@ -1870,7 +1924,10 @@ TEST(IrSchedule, cache_write4) { auto func = cinn::lang::LowerVec( "test_cache_write4", stages, {A, B}, {}, {}, nullptr, target, true); - CHECK_EQ(func.size(), 1U); + PADDLE_ENFORCE_EQ( + func.size(), + 1U, + phi::errors::InvalidArgument("The size of func should be 1.")); auto ast_expr = func[0]->body; std::vector vec_ast{ast_expr}; @@ -1953,7 +2010,10 @@ TEST(IrSchedule, rfactor) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 3U); + PADDLE_ENFORCE_EQ( + loops.size(), + 3U, + phi::errors::InvalidArgument("The size of loops should be 3.")); auto new_rf_tensor = ir_sch.Rfactor(loops[2], 0); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2080,7 +2140,10 @@ TEST(IrSchedule, rfactor1) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 3U); + PADDLE_ENFORCE_EQ( + loops.size(), + 3U, + phi::errors::InvalidArgument("The size of loops should be 3.")); auto new_rf_tensor = ir_sch.Rfactor(loops[1], 1); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2206,7 +2269,10 @@ TEST(IrSchedule, rfactor2) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("C"); - CHECK_EQ(loops.size(), 3U); + PADDLE_ENFORCE_EQ( + loops.size(), + 3U, + phi::errors::InvalidArgument("The size of loops should be 3.")); auto new_rf_tensor = ir_sch.Rfactor(loops[2], 0); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2347,7 +2413,10 @@ TEST(IrSchedule, factorize_reduction) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 3U); + PADDLE_ENFORCE_EQ( + loops.size(), + 3U, + phi::errors::InvalidArgument("The size of loops should be 3.")); auto new_rf_tensor = ir_sch.FactorizeReduction(loops[1], 0); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2436,7 +2505,10 @@ TEST(IrSchedule, factorize_reduction1) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 3U); + PADDLE_ENFORCE_EQ( + loops.size(), + 3U, + phi::errors::InvalidArgument("The size of loops should be 3.")); auto new_rf_tensor = ir_sch.FactorizeReduction(loops[1], 1); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2520,9 +2592,15 @@ TEST(IrSchedule, factorize_reduction2) { ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); auto loops = ir_sch.GetLoops("B"); - CHECK_EQ(loops.size(), 2U); + PADDLE_ENFORCE_EQ( + loops.size(), + 2U, + phi::errors::InvalidArgument("The size of loops should be 2.")); auto splited_loops = ir_sch.Split(loops[1], {4, 5}); - CHECK_EQ(splited_loops.size(), 2U); + PADDLE_ENFORCE_EQ( + splited_loops.size(), + 2U, + phi::errors::InvalidArgument("The size of splited_loops should be 2.")); auto new_rf_tensor = ir_sch.FactorizeReduction(splited_loops[0], 1); auto* new_rf_tensor_ref = new_rf_tensor.As(); CHECK(new_rf_tensor_ref); @@ -2638,7 +2716,7 @@ void test_compute_inline1(void* _args, int32_t num_args) for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((32 * i) + ((1024 * j) + k))], 2.00000000f); + C[((1024 * i) + ((32 * j) + k))] = ((A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f); }; }; }; @@ -2712,7 +2790,7 @@ void test_compute_inline2(void* _args, int32_t num_args) for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f); + C[((1024 * i) + ((32 * j) + k))] = ((A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f); }; }; }; @@ -2777,7 +2855,7 @@ void test_compute_inline3(const float* __restrict__ A, float* __restrict__ C) for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((32 * i) + ((1024 * j) + k))])); + C[((1024 * i) + ((32 * j) + k))] = ((A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f); }; }; }; @@ -2839,7 +2917,7 @@ void test_compute_inline4(const float* __restrict__ A, float* __restrict__ C) for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((1024 * i) + ((32 * j) + k))])); + C[((1024 * i) + ((32 * j) + k))] = ((A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f); }; }; }; @@ -2901,7 +2979,7 @@ void test_compute_inline1(void* _args, int32_t num_args) float* C = ((float*)(_C->memory)); for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 64; j += 1) { - C[((32 * j) + i)] = fma(2.00000000f, A[((64 * i) + j)], 2.00000000f); + C[((32 * j) + i)] = (2.00000000f * (1.00000000f + A[((64 * i) + j)])); }; }; cinn_buffer_free((void*)(0), _B); @@ -2969,7 +3047,7 @@ void test_compute_inline1(void* _args, int32_t num_args) for (int32_t i = 0; i < 32; i += 1) { for (int32_t j = 0; j < 32; j += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((32 * i) + ((1024 * j) + k))] = fma(2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f); + C[((32 * i) + ((1024 * j) + k))] = (2.00000000f * (1.00000000f + A[((1024 * i) + ((32 * j) + k))])); }; }; }; @@ -3047,7 +3125,7 @@ void test_copytransform1(void* _args, int32_t num_args) for (int32_t j = 0; j < 8; j += 1) { for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) { for (int32_t k = 0; k < 32; k += 1) { - B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (1.00000000f + A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))]); + B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] + 1.00000000f); }; }; }; @@ -3058,7 +3136,7 @@ void test_copytransform1(void* _args, int32_t num_args) for (int32_t j = 0; j < 8; j += 1) { for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) { for (int32_t k = 0; k < 32; k += 1) { - C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (2.00000000f * B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))]); + C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))] * 2.00000000f); }; }; }; @@ -3136,7 +3214,7 @@ void test_copytransform2(void* _args, int32_t num_args) for (int32_t i_0 = 0; i_0 < 8; i_0 += 1) { for (int32_t j = 0; j < 64; j += 1) { for (int32_t k = 0; k < 128; k += 1) { - B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (1.00000000f + A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))]); + B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] + 1.00000000f); }; }; }; @@ -3146,7 +3224,7 @@ void test_copytransform2(void* _args, int32_t num_args) for (int32_t j = 0; j < 8; j += 1) { for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) { for (int32_t k = 0; k < 128; k += 1) { - C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (2.00000000f * B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))]); + C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))] * 2.00000000f); }; }; }; @@ -3278,13 +3356,19 @@ TEST(IrSchedule, ComplexIndices) { VLOG(3) << "Lowered Expr:" << ir_sch.GetModule().GetExprs().front(); auto loops_b = ir_sch.GetLoops("B"); - CHECK_EQ(loops_b.size(), 2); + PADDLE_ENFORCE_EQ( + loops_b.size(), + 2, + phi::errors::InvalidArgument("The loops size of B should be 2.")); ir_sch.Split("B", 0, {8, -1}); ir_sch.Split( "B", 2, {32, -1}); // after first splited, loops size has added to 3 VLOG(3) << "Splited Expr:" << ir_sch.GetModule().GetExprs().front(); - CHECK_EQ(ir_sch.GetLoops("B").size(), 4); + PADDLE_ENFORCE_EQ(ir_sch.GetLoops("B").size(), + 4, + phi::errors::InvalidArgument( + "The loops size of B should be 4 after split.")); ir_sch.Reorder("B", {2, 0, 3, 1}); VLOG(3) << "Reordered Expr:\n" << ir_sch.GetModule().GetExprs().front(); diff --git a/paddle/cinn/backends/llvm/codegen_llvm.cc b/paddle/cinn/backends/llvm/codegen_llvm.cc index 2f8a387045bf6..d7889ebb9fc15 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm.cc @@ -24,7 +24,6 @@ #include #include #include - #include #include #include @@ -32,6 +31,7 @@ #include #include #include +#include "paddle/common/enforce.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -205,12 +205,12 @@ llvm::Value *CodeGenLLVM::EmitBinaryOp(llvm::Value *lhs, bool is_integral, bool is_signed) { llvm::Instruction::BinaryOps ops; - CHECK_EQ(lhs->getType(), rhs->getType()) - << "the types of operands of binary operation are mismatch" - << ", lhs[" << DumpToString(*lhs) << "] " << opcode << " rhs[" - << DumpToString(*rhs) << "]" - << ", lhs_type[" << DumpToString(*lhs->getType()) << "], rhs_type[" - << DumpToString(*rhs->getType()) << "]"; + PADDLE_ENFORCE_EQ( + lhs->getType(), + rhs->getType(), + phi::errors::InvalidArgument( + "the types of operands of binary operation are mismatch")); + switch (opcode) { case '+': ops = is_integral ? llvm::Instruction::BinaryOps::Add @@ -288,6 +288,7 @@ llvm::Value *CodeGenLLVM::Visit(const ir::Sub *op) { } llvm::Value *CodeGenLLVM::Visit(const ir::Mul *op) { + ir::TryElevateInt32ToInt64({op->a(), op->b()}); auto *lhs = Visit(&op->a()); auto *rhs = Visit(&op->b()); return EmitBinaryOp(lhs, rhs, '*', is_integral_type(op->type())); @@ -591,8 +592,8 @@ llvm::Value *CodeGenLLVM::CreateSerialFor(const ir::For *op, int stride) { llvm::Value *old_var = GetVar(op->loop_var->name); // loop iterator - llvm::AllocaInst *loop_var = - Alloca(b_->getInt32Ty(), nullptr, op->loop_var->name); + llvm::AllocaInst *loop_var = Alloca( + b_->getIntNTy(op->min->type().bits()), nullptr, op->loop_var->name); loop_var->setAlignment(llvm::Align(4)); SetVar(op->loop_var->name, loop_var); @@ -613,7 +614,8 @@ llvm::Value *CodeGenLLVM::CreateSerialFor(const ir::For *op, int stride) { // loop_body b_->SetInsertPoint(body_bb); - llvm::Value *step = llvm::ConstantInt::get(b_->getInt32Ty(), stride); + llvm::Value *step = + llvm::ConstantInt::get(b_->getIntNTy(op->min->type().bits()), stride); Visit(&op->body); llvm::Value *indvar_inc = Add(indvar, @@ -880,7 +882,10 @@ llvm::Value *CodeGenLLVM::Visit(const ir::Load *op) { { int alignment = op->type().bits(); alignment = 8; - CHECK_GT(alignment, 0); + PADDLE_ENFORCE_GT( + alignment, + 0, + phi::errors::InvalidArgument("alignment should be greater than 0")); load_inst->setAlignment(llvm::Align(std::min(alignment, 8))); } @@ -949,7 +954,10 @@ llvm::Value *CodeGenLLVM::Visit(const ir::Store *op) { { int alignment = op->type().bits(); alignment = 8; - CHECK_GT(alignment, 0); + PADDLE_ENFORCE_GT( + alignment, + 0, + phi::errors::InvalidArgument("alignment should be greater than 0")); store_inst->setAlignment(llvm::Align(std::min(alignment, 8))); } // TODO(fc500110): tbaa AliasAnalysis @@ -1059,9 +1067,12 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_LoweredFunc_ *op) { auto init_function_state = [this]() { alias_vars_.clear(); }; init_function_state(); - CHECK_EQ(op->alloc_output_buffer_exprs.size(), - op->dealloc_output_buffer_exprs.size()) - << "the count of allocation and deallocation expressions is not match"; + PADDLE_ENFORCE_EQ( + op->alloc_output_buffer_exprs.size(), + op->dealloc_output_buffer_exprs.size(), + phi::errors::InvalidArgument( + "the count of allocation and deallocation expressions is not " + "match")); std::vector new_body; auto create_temp_buffers = op->PrepareCreateTempBufferExprs(); @@ -1228,7 +1239,11 @@ llvm::Value *CodeGenLLVM::EmitCall_get_address(const ir::Call *op) { llvm::Value *CodeGenLLVM::EmitCall_debug_info(const ir::Call *op) { auto callee = m_->getFunction(runtime::intrinsic::debug_log_repr); - CHECK_GE(op->read_args.size(), 1UL); + PADDLE_ENFORCE_GE(op->read_args.size(), + 1UL, + phi::errors::InvalidArgument( + "The arguments of debug_log_repr should be greater " + "than 1")); std::vector args; for (auto &arg : op->read_args) { args.push_back(Visit(&arg)); @@ -1315,7 +1330,9 @@ llvm::Value *CodeGenLLVM::DenseVectorLoad(const ir::Load *op) { slices.push_back(load_inst); } - CHECK_EQ(slices.size(), 1UL); + PADDLE_ENFORCE_EQ(slices.size(), + 1UL, + phi::errors::InvalidArgument("slices size should be 1.")); return slices[0]; } @@ -1323,7 +1340,11 @@ llvm::Value *CodeGenLLVM::DenseVectorLoad(const ir::Load *op) { llvm::Value *CodeGenLLVM::CreateBufferVecPtr(Type t, llvm::Value *buffer, llvm::Value *index) { - CHECK_GT(t.lanes(), 1) << "type is not a vector type: " << t; + PADDLE_ENFORCE_GT(t.lanes(), + 1, + phi::errors::InvalidArgument("type lanes should be greater " + "than 1, but received %d", + t.lanes())); llvm::PointerType *btype = llvm::dyn_cast(buffer->getType()); CHECK(btype); @@ -1338,7 +1359,11 @@ llvm::Value *CodeGenLLVM::CreateBufferVecPtr(Type t, llvm::Value *CodeGenLLVM::CreateBufferPtr(Type t, llvm::Value *buffer, llvm::Value *index) { - CHECK_EQ(t.lanes(), 1); + PADDLE_ENFORCE_EQ(t.lanes(), + 1, + phi::errors::InvalidArgument("type lanes should be 1, but " + "received %d", + t.lanes())); auto *btype = llvm::dyn_cast(buffer->getType()); CHECK(btype); auto *ptype = @@ -1355,7 +1380,10 @@ llvm::Value *CodeGenLLVM::CreateVecSlice(llvm::Value *vec, int lanes) { int total_lanes = llvm::dyn_cast(vec->getType())->getNumElements(); - CHECK_LE(begin + lanes, total_lanes); + PADDLE_ENFORCE_LE(begin + lanes, + total_lanes, + phi::errors::InvalidArgument( + "begin + lanes should be less than total_lanes")); if (lanes == total_lanes && begin == 0) return vec; // full slice std::vector indices; for (int i = 0; i < lanes; ++i) { @@ -1422,7 +1450,10 @@ void CodeGenLLVM::AddTbaaMetadata(llvm::Instruction *inst, if (pstride_int && pbase_int) { int stride = pstride_int->value; base = pbase_int->value; - CHECK_GE(base, 0); + PADDLE_ENFORCE_GE( + base, + 0, + phi::errors::InvalidArgument("base should be greater than 0")); width = NextPowerOfTwo(ramp->lanes * stride); while (base % width) { @@ -1491,12 +1522,15 @@ llvm::Value *CodeGenLLVM::Visit(const ir::intrinsics::BufferCreate *op) { CHECK(buffer_node); std::vector args( {ll_const_int32(buffer_node->target.runtime_arch())}); - uint64_t memory_size = (buffer_node->dtype.ElementOf().bits() + 7) / 8; - for (auto shape : buffer_node->shape) { - int shape_int = shape.as_int32(); - memory_size *= shape_int; + int64_t memory_size = (buffer_node->dtype.ElementOf().bits() + 7) / 8; + // Calculate buffer size and determine if it contains a symbolic constant + Expr buffer_size(static_cast(1)); + buffer_size = buffer_size * ir::Expr(memory_size); + for (int i = 0; i < buffer_node->shape.size(); i++) { + buffer_size = buffer_size * buffer_node->shape[i]; } - args.push_back(ll_const_int64(memory_size)); + ir::TryElevateInt32ToInt64({buffer_size}); + args.push_back(Visit(&buffer_size)); args.push_back(ll_const_int32(32)); return Call(callee, args); @@ -1596,29 +1630,50 @@ llvm::Value *CodeGenLLVM::Visit(const ir::intrinsics::BuiltinIntrin *op) { std::string func_name = op->name; if (op->id == -1) { if (func_name == "bitwise_and") { - CHECK_GE(op->args.size(), 2U); + PADDLE_ENFORCE_GE(op->args.size(), + 2U, + phi::errors::InvalidArgument( + "bitwise_and should have at least 2 arguments")); return b_->CreateAnd(Visit(&op->args[0]), Visit(&op->args[1])); } else if (func_name == "bitwise_or") { - CHECK_GE(op->args.size(), 2U); + PADDLE_ENFORCE_GE(op->args.size(), + 2U, + phi::errors::InvalidArgument( + "bitwise_or should have at least 2 arguments")); return b_->CreateOr(Visit(&op->args[0]), Visit(&op->args[1])); } else if (func_name == "bitwise_xor") { - CHECK_GE(op->args.size(), 2U); + PADDLE_ENFORCE_GE(op->args.size(), + 2U, + phi::errors::InvalidArgument( + "bitwise_xor should have at least 2 arguments")); return b_->CreateXor(Visit(&op->args[0]), Visit(&op->args[1])); } else if (func_name == "bitwise_not") { - CHECK_GE(op->args.size(), 1U); + PADDLE_ENFORCE_GE(op->args.size(), + 1U, + phi::errors::InvalidArgument( + "bitwise_not should have at least 1 argument")); return b_->CreateNot(Visit(&op->args[0])); } else if (func_name == "left_shift") { - CHECK_GE(op->args.size(), 2U); + PADDLE_ENFORCE_GE(op->args.size(), + 2U, + phi::errors::InvalidArgument( + "left_shift should have at least 2 arguments")); return b_->CreateShl(Visit(&op->args[0]), Visit(&op->args[1])); } else if (func_name == "right_shift") { - CHECK_GE(op->args.size(), 2U); + PADDLE_ENFORCE_GE(op->args.size(), + 2U, + phi::errors::InvalidArgument( + "right_shift should have at least 2 arguments")); if (op->args[0]->type().is_int()) { return b_->CreateAShr(Visit(&op->args[0]), Visit(&op->args[1])); } else { return b_->CreateLShr(Visit(&op->args[0]), Visit(&op->args[1])); } } else if (func_name == "isnan") { - CHECK_GE(op->args.size(), 1U); + PADDLE_ENFORCE_GE(op->args.size(), + 1U, + phi::errors::InvalidArgument( + "isnan should have at least 1 argument")); llvm::Value *v = Visit(&op->args[0]); return b_->CreateFCmpUNO(v, v); } diff --git a/paddle/cinn/backends/llvm/codegen_llvm_test.cc b/paddle/cinn/backends/llvm/codegen_llvm_test.cc index 930e70f22e869..074e960aba678 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm_test.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm_test.cc @@ -21,12 +21,12 @@ #include #include #include - #include #include #include #include #include +#include "paddle/common/enforce.h" #include "paddle/cinn/backends/llvm/cinn_runtime_llvm_ir.h" #include "paddle/cinn/cinn.h" @@ -96,7 +96,10 @@ auto CreateIrBuffer(cinn::common::Type t, std::string name, std::vector shape, int data_alignment = 0) { - CHECK_GE(data_alignment, 0); + PADDLE_ENFORCE_GE(data_alignment, + 0, + phi::errors::InvalidArgument( + "data_alignment should be greater than or equal to 0")); auto buffer = ir::_Buffer_::Make(std::move(name), std::move(t)); if (data_alignment) { diff --git a/paddle/cinn/backends/llvm/codegen_x86.cc b/paddle/cinn/backends/llvm/codegen_x86.cc index cfd796162241c..5987e3af7a7c3 100644 --- a/paddle/cinn/backends/llvm/codegen_x86.cc +++ b/paddle/cinn/backends/llvm/codegen_x86.cc @@ -30,7 +30,7 @@ #include "paddle/cinn/ir/op/ir_operators.h" #include "paddle/cinn/ir/utils/ir_nodes_collector.h" #include "paddle/cinn/runtime/intrinsic.h" - +#include "paddle/common/enforce.h" namespace cinn::backends { CodeGenX86::CodeGenX86(llvm::Module* m, @@ -144,8 +144,10 @@ void CodeGenX86::CreateParallelLaunch(Expr body, int num_task) { symbol_table_->PopScope(); std::swap(parallel_env_, par_env); std::swap(f_, f); - CHECK_NE(par_env.parallel_loop_count, 0) - << "find no parallel loop within parallel launch"; + PADDLE_ENFORCE_NE(par_env.parallel_loop_count, + 0, + phi::errors::InvalidArgument( + "find no parallel loop within parallel launch")); b_->SetInsertPoint(launch_end); } diff --git a/paddle/cinn/backends/llvm/execution_engine.cc b/paddle/cinn/backends/llvm/execution_engine.cc index 050fd4e0d8389..8a84d69a1d7a0 100644 --- a/paddle/cinn/backends/llvm/execution_engine.cc +++ b/paddle/cinn/backends/llvm/execution_engine.cc @@ -166,17 +166,20 @@ std::unique_ptr NaiveObjectCache::getObject( VLOG(2) << "===================== Create CINN ExecutionEngine end " "===================="; + engine->ctx = std::make_unique(); + engine->b = std::make_unique>(*engine->ctx); + llvm::SMDiagnostic error; + engine->m = llvm::parseAssemblyString( + AsStringRef(backends::kRuntimeLlvmIr), error, *engine->ctx); + return engine; } template -void ExecutionEngine::Link(const ir::Module &module) { +void ExecutionEngine::Link(const ir::Module &module, bool add_module) { utils::RecordEvent("ExecutionEngine Link", utils::EventType::kOrdinary); llvm::SMDiagnostic error; - auto ctx = std::make_unique(); - auto m = llvm::parseAssemblyString( - AsStringRef(backends::kRuntimeLlvmIr), error, *ctx); - auto b = std::make_unique>(*ctx); + auto ir_emitter = std::make_unique(m.get(), b.get()); VLOG(3) << "ir_emitter->Compile(module) Begin"; ir_emitter->Compile(module); @@ -200,7 +203,9 @@ void ExecutionEngine::Link(const ir::Module &module) { pass_manager, rawstream, nullptr, llvm::CGFT_ObjectFile); pass_manager.run(*m); - CHECK(AddModule(std::move(m), std::move(ctx))); + if (add_module) { + AddSelfModule(); + } if (VLOG_IS_ON(5)) { VLOG(5) << "======= dump jit execution session ======"; @@ -231,6 +236,9 @@ bool ExecutionEngine::AddModule(std::unique_ptr module, llvm::cantFail(jit_->addIRModule(std::move(tsm))); return true; } +bool ExecutionEngine::AddSelfModule() { + return AddModule(std::move(m), std::move(ctx)); +} void ExecutionEngine::ExportObject(const std::string &path) { FILE *of = fopen(path.c_str(), "w"); @@ -268,8 +276,11 @@ void ExecutionEngine::RegisterRuntimeSymbols() { } } -template void ExecutionEngine::Link(const ir::Module &module); -template void ExecutionEngine::Link(const ir::Module &module); -template void ExecutionEngine::Link(const ir::Module &module); +template void ExecutionEngine::Link(const ir::Module &module, + bool add_module); +template void ExecutionEngine::Link(const ir::Module &module, + bool add_module); +template void ExecutionEngine::Link(const ir::Module &module, + bool add_module); } // namespace cinn::backends diff --git a/paddle/cinn/backends/llvm/execution_engine.h b/paddle/cinn/backends/llvm/execution_engine.h index 63f9427a53edb..44b212f245f90 100644 --- a/paddle/cinn/backends/llvm/execution_engine.h +++ b/paddle/cinn/backends/llvm/execution_engine.h @@ -79,18 +79,22 @@ class ExecutionEngine { void *Lookup(absl::string_view name); template - void Link(const ir::Module &module); + void Link(const ir::Module &module, bool add_module = true); void ExportObject(const std::string &path); bool AddModule(std::unique_ptr module, std::unique_ptr context); + bool AddSelfModule(); + protected: explicit ExecutionEngine(bool enable_object_cache, RuntimeSymbols &&module_symbols) : cache_(std::make_unique()), - module_symbols_(std::move(module_symbols)) {} + module_symbols_(std::move(module_symbols)), + ctx(std::make_unique()), + b(std::make_unique>(*ctx)) {} void RegisterRuntimeSymbols(); @@ -106,6 +110,10 @@ class ExecutionEngine { std::unique_ptr jit_; std::unique_ptr cache_; RuntimeSymbols module_symbols_; + + std::unique_ptr ctx; + std::unique_ptr m; + std::unique_ptr> b; }; } // namespace cinn::backends diff --git a/paddle/cinn/backends/llvm/execution_engine_test.cc b/paddle/cinn/backends/llvm/execution_engine_test.cc index a13f329a81259..beb3ec61fae25 100644 --- a/paddle/cinn/backends/llvm/execution_engine_test.cc +++ b/paddle/cinn/backends/llvm/execution_engine_test.cc @@ -26,7 +26,6 @@ #include #include #include - #include #include #include @@ -35,6 +34,7 @@ #include #include #include +#include "paddle/common/enforce.h" #include "paddle/cinn/backends/llvm/cinn_runtime_llvm_ir.h" #include "paddle/cinn/backends/llvm/codegen_llvm.h" @@ -91,7 +91,11 @@ auto CreateTestBuffer() { } float *Cd = reinterpret_cast(C->memory); - CHECK_EQ(C->num_elements(), A->num_elements()); + PADDLE_ENFORCE_EQ( + C->num_elements(), + A->num_elements(), + phi::errors::InvalidArgument( + "The number of elements of C and A should be the same.")); return std::make_tuple(A, B, C); } diff --git a/paddle/cinn/backends/llvm/llvm_intrin_rule.h b/paddle/cinn/backends/llvm/llvm_intrin_rule.h index 903c056196f4e..14e3718299c0f 100644 --- a/paddle/cinn/backends/llvm/llvm_intrin_rule.h +++ b/paddle/cinn/backends/llvm/llvm_intrin_rule.h @@ -26,17 +26,24 @@ #include "paddle/cinn/ir/intrinsic_ops.h" #include "paddle/cinn/ir/registry.h" #include "paddle/cinn/lang/packed_func.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace codegen { template inline void MakeFloatIntrinOp(lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg = args[0]; ir::Call *node = arg->as(); CHECK(node); - CHECK_GE(node->read_args.size(), arg_nums); + PADDLE_ENFORCE_GE( + node->read_args.size(), + arg_nums, + phi::errors::InvalidArgument( + "The number of read args should be greater than arg_nums.")); if (add_float_suffix) { CHECK(node->type().is_float()); *rv = ir::intrinsics::BuiltinIntrin::Make( @@ -85,7 +92,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_isfinite", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -96,7 +106,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_isinf", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -113,7 +126,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_rsqrt", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -124,7 +140,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_exp10", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -136,7 +155,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_tan", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -147,7 +169,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_tanh", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -168,7 +193,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_cosh", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); @@ -180,7 +208,10 @@ void RegisterCpuIntrinRule() { ir::Registry::Register("lower_cpu_intrinsic_sinh", true) .SetBody([](lang::Args args, lang::RetValue *rv) { - CHECK_GE(args.size(), 1U); + PADDLE_ENFORCE_GE(args.size(), + 1U, + phi::errors::InvalidArgument( + "The number of args should be greater than 1.")); Expr arg0 = args[0]; ir::Call *node = arg0->as(); CHECK(node); diff --git a/paddle/cinn/backends/llvm/llvm_optimizer.cc b/paddle/cinn/backends/llvm/llvm_optimizer.cc index e64fb9f42ee0b..22f9a37351664 100644 --- a/paddle/cinn/backends/llvm/llvm_optimizer.cc +++ b/paddle/cinn/backends/llvm/llvm_optimizer.cc @@ -74,12 +74,12 @@ class CustomPassManager : public PassManagerT { void add(llvm::Pass *pass) override { if (print_passes_) { if (is_function_pass_manager_) { - VLOG(1) << "llvm run function pass[" << std::string(pass->getPassName()) + VLOG(4) << "llvm run function pass[" << std::string(pass->getPassName()) << "]"; } if (is_module_pass_manager_) { - VLOG(1) << "llvm run module pass[" << std::string(pass->getPassName()) + VLOG(4) << "llvm run module pass[" << std::string(pass->getPassName()) << "]"; } } diff --git a/paddle/cinn/backends/llvm/runtime_symbol_registry.cc b/paddle/cinn/backends/llvm/runtime_symbol_registry.cc index 3885ebe0c4199..52dbe7f024307 100644 --- a/paddle/cinn/backends/llvm/runtime_symbol_registry.cc +++ b/paddle/cinn/backends/llvm/runtime_symbol_registry.cc @@ -20,8 +20,8 @@ #include #include "paddle/cinn/runtime/flags.h" +#include "paddle/common/enforce.h" #include "paddle/common/flags.h" - PD_DECLARE_bool(verbose_function_register); namespace cinn { @@ -51,8 +51,10 @@ void RuntimeSymbols::Register(const std::string &name, void *address) { std::lock_guard lock(mu_); auto it = symbols_.find(name); if (it != symbols_.end()) { - CHECK_EQ(it->second, address) - << "Duplicate register symbol [" << name << "]"; + PADDLE_ENFORCE_EQ( + it->second, + address, + phi::errors::InvalidArgument("Duplicate register symbol")); return; } diff --git a/paddle/cinn/backends/modular.cc b/paddle/cinn/backends/modular.cc index fb736154c7bfc..f735b8b6da56a 100644 --- a/paddle/cinn/backends/modular.cc +++ b/paddle/cinn/backends/modular.cc @@ -15,7 +15,7 @@ #include "paddle/cinn/backends/modular.h" #include "paddle/cinn/ir/ir_visitor.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { @@ -104,8 +104,14 @@ class ModularEvaluator : public ir::IRVisitorRequireReImpl { } static int gcd(int a, int b) { - CHECK_GE(a, 0); - CHECK_GE(b, 0); + PADDLE_ENFORCE_GE( + a, + 0, + phi::errors::InvalidArgument("a should be greater than or equal to 0")); + PADDLE_ENFORCE_GE( + b, + 0, + phi::errors::InvalidArgument("b should be greater than or equal to 0")); if (a < b) std::swap(a, b); if (b == 0) return a; diff --git a/paddle/cinn/backends/nvrtc/header_generator.cc b/paddle/cinn/backends/nvrtc/header_generator.cc index d4b2b9504673f..7d88ed16d0413 100644 --- a/paddle/cinn/backends/nvrtc/header_generator.cc +++ b/paddle/cinn/backends/nvrtc/header_generator.cc @@ -16,7 +16,7 @@ #include "glog/logging.h" #include "jitify.hpp" // NOLINT - +#include "paddle/common/enforce.h" namespace cinn { namespace backends { namespace nvrtc { @@ -27,8 +27,10 @@ HeaderGeneratorBase& JitSafeHeaderGenerator::GetInstance() { } const size_t JitSafeHeaderGenerator::size() const { - CHECK_EQ(include_names_.size(), headers_.size()) - << "Internal error in size of header files."; + PADDLE_ENFORCE_EQ( + include_names_.size(), + headers_.size(), + phi::errors::InvalidArgument("Internal error in size of header files.")); return include_names_.size(); } diff --git a/paddle/cinn/backends/nvrtc/nvrtc_util.cc b/paddle/cinn/backends/nvrtc/nvrtc_util.cc index 737d887ea809c..1b887268a1ae8 100644 --- a/paddle/cinn/backends/nvrtc/nvrtc_util.cc +++ b/paddle/cinn/backends/nvrtc/nvrtc_util.cc @@ -29,7 +29,7 @@ #include "paddle/cinn/common/common.h" #include "paddle/cinn/runtime/flags.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" PD_DECLARE_string(cinn_nvcc_cmd_path); PD_DECLARE_string(nvidia_package_dir); PD_DECLARE_bool(nvrtc_compile_to_cubin); @@ -187,7 +187,9 @@ std::string Compiler::CompileCudaSource(const std::string& code, std::string log; log.resize(log_size); NVRTC_CALL(nvrtcGetProgramLog(prog, &log[0])); - CHECK_EQ(compile_res, NVRTC_SUCCESS) << log << "\nThe code is:\n" << code; + PADDLE_ENFORCE_EQ(compile_res, + NVRTC_SUCCESS, + phi::errors::Fatal("NVRTC compilation failed")); } size_t size; diff --git a/paddle/cinn/common/arithmetic.cc b/paddle/cinn/common/arithmetic.cc index e2c4ed1b8a6a7..08bf94724dedb 100644 --- a/paddle/cinn/common/arithmetic.cc +++ b/paddle/cinn/common/arithmetic.cc @@ -25,7 +25,7 @@ #include "paddle/cinn/ir/ir_visitor.h" #include "paddle/cinn/ir/op/ir_operators.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -222,7 +222,10 @@ class GiNaCToExprVisitor : public GiNaC::symbol::visitor, auto* intv = cur.As(); CHECK(intv); - CHECK_EQ(intv->value, -1); + PADDLE_ENFORCE_EQ( + intv->value, + -1, + phi::errors::InvalidArgument("The power value should be -1.")); cur = Div::Make(Expr(1), a); } @@ -296,9 +299,12 @@ std::tuple Solve(Expr lhs, Expr rhs, Var var) { ginac::lst vars{symbol}; ginac::ex res = ginac::lsolve(eqs, vars); - CHECK_EQ(res.nops(), 1); + PADDLE_ENFORCE_EQ( + res.nops(), 1, phi::errors::InvalidArgument("The res npos should be 1.")); auto item = res.op(0); - CHECK_EQ(item.nops(), 2); + PADDLE_ENFORCE_EQ(item.nops(), + 2, + phi::errors::InvalidArgument("The item npos should be 2.")); Expr value = converter.GinacToExpr(item.op(1)); // tell the symbol diff --git a/paddle/cinn/common/broadcast_tree.cc b/paddle/cinn/common/broadcast_tree.cc index 74ed4aff42798..f50e06bed4fd4 100644 --- a/paddle/cinn/common/broadcast_tree.cc +++ b/paddle/cinn/common/broadcast_tree.cc @@ -17,8 +17,12 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/flags.h" #include "paddle/pir/include/dialect/shape/utils/dim_expr_util.h" +COMMON_DECLARE_int64(pir_broadcast_tree_limit); + namespace cinn::common { namespace { @@ -91,6 +95,9 @@ template bool SearchBroadcastImpl(const symbol::Broadcast& variadic, const DoEachT& DoEach) { const auto& operands = *(variadic.operands); + if (operands.size() > 3) { + PADDLE_THROW(phi::errors::Fatal("Too many broadcast leaves to compile!")); + } for (const auto& operand : operands) { CHECK(!operand.isa()); if (SearchBroadcast(operand, DoEach)) return true; @@ -213,21 +220,22 @@ BroadcastLeaf GetCstrRhsEqOneLeaves( BroadcastBranch ConstructBroadcastBranch( const symbol::Broadcastable& broadcastable_condition, - const BroadcastLeaf& leaves) { + const BroadcastLeaf& leaves, + int* num_of_leaves) { BroadcastLeaf cstr_lhs_eq_rhs_leaves = GetCstrLhsEqRhsLeaves(broadcastable_condition, leaves); BroadcastLeaf cstr_lhs_eq_one_leaves = GetCstrLhsEqOneLeaves(broadcastable_condition, leaves); BroadcastLeaf cstr_rhs_eq_one_leaves = GetCstrRhsEqOneLeaves(broadcastable_condition, leaves); - // clang-format off return BroadcastBranch{ /*broadcastable_condition*/ broadcastable_condition, - /*cstr_lhs_eq_rhs_branch*/ ConstructBroadcastTree(cstr_lhs_eq_rhs_leaves), - /*cstr_lhs_eq_one_branch*/ ConstructBroadcastTree(cstr_lhs_eq_one_leaves), - /*cstr_rhs_eq_one_branch*/ ConstructBroadcastTree(cstr_rhs_eq_one_leaves) - }; - // clang-format on + /*cstr_lhs_eq_rhs_branch*/ + ConstructBroadcastTree(cstr_lhs_eq_rhs_leaves, num_of_leaves), + /*cstr_lhs_eq_one_branch*/ + ConstructBroadcastTree(cstr_lhs_eq_one_leaves, num_of_leaves), + /*cstr_rhs_eq_one_branch*/ + ConstructBroadcastTree(cstr_rhs_eq_one_leaves, num_of_leaves)}; } } // namespace @@ -288,7 +296,10 @@ std::optional> GetFirstCstrBroadcastable( if (ret.has_value()) return ret.value(); ForEachBroadcastDimExpr(leaves, [&](const auto& broadcast) -> bool { const auto& operands = broadcast.operands; - CHECK_GE(operands->size(), 2); + PADDLE_ENFORCE_GE(operands->size(), + 2, + phi::errors::InvalidArgument( + "The operands size should be greater than 2.")); CHECK(operands->at(0) != operands->at(1)); ret = symbol::Broadcastable{operands->at(0), operands->at(1)}; @@ -297,11 +308,19 @@ std::optional> GetFirstCstrBroadcastable( return ret; } -BroadcastTree ConstructBroadcastTree(const BroadcastLeaf& leaves) { +BroadcastTree ConstructBroadcastTree(const BroadcastLeaf& leaves, + int* num_of_leaves) { std::optional> broadcastable_condition = GetFirstCstrBroadcastable(leaves); - if (!broadcastable_condition.has_value()) return leaves; - return ConstructBroadcastBranch(broadcastable_condition.value(), leaves); + if (!broadcastable_condition.has_value()) { + (*num_of_leaves)++; + if (*num_of_leaves > FLAGS_pir_broadcast_tree_limit) { + PADDLE_THROW(phi::errors::Fatal("Too many broadcast leaves to compile!")); + } + return leaves; + } + return ConstructBroadcastBranch( + broadcastable_condition.value(), leaves, num_of_leaves); } namespace { diff --git a/paddle/cinn/common/broadcast_tree.h b/paddle/cinn/common/broadcast_tree.h index 5b8c051299af8..eee72a1f3cd38 100644 --- a/paddle/cinn/common/broadcast_tree.h +++ b/paddle/cinn/common/broadcast_tree.h @@ -29,7 +29,8 @@ using BroadcastLeaf = adt::List>; using BroadcastTree = adt::Tree; -BroadcastTree ConstructBroadcastTree(const BroadcastLeaf& leaves); +BroadcastTree ConstructBroadcastTree(const BroadcastLeaf& leaves, + int* num_of_leaves); std::string ToTxtString(const BroadcastTree&); diff --git a/paddle/cinn/common/broadcast_tree_test.cc b/paddle/cinn/common/broadcast_tree_test.cc index 8a09e8abd7dee..0484d38690dd2 100644 --- a/paddle/cinn/common/broadcast_tree_test.cc +++ b/paddle/cinn/common/broadcast_tree_test.cc @@ -66,7 +66,8 @@ TEST(BroadcastTree, Naive) { MakeBroadcastDimExpr(expr1, expr2), MakeBroadcastDimExpr(expr3, expr4)}; BroadcastLeaf leaf = adt::List>{tensor_shape}; - BroadcastTree tree = ConstructBroadcastTree(leaf); + int num_of_leaves = 0; + BroadcastTree tree = ConstructBroadcastTree(leaf, &num_of_leaves); ASSERT_TRUE(tree.Has>()); const auto& branch = tree.Get>(); const auto& [cstr_broadcastable, @@ -96,7 +97,8 @@ TEST(BroadcastTree, SimplifyConstantBroadcast) { MakeBroadcastDimExpr(expr1, expr2), MakeBroadcastDimExpr(expr3, expr4)}; BroadcastLeaf leaf = adt::List>{tensor_shape}; - BroadcastTree tree = ConstructBroadcastTree(leaf); + int num_of_leaves = 0; + BroadcastTree tree = ConstructBroadcastTree(leaf, &num_of_leaves); ASSERT_TRUE(tree.Has>()); const auto& branch = tree.Get>(); const auto& [cstr_broadcastable, diff --git a/paddle/cinn/common/cas.cc b/paddle/cinn/common/cas.cc index 3b4f2e7f2f3d9..4b1021f3dcc2a 100644 --- a/paddle/cinn/common/cas.cc +++ b/paddle/cinn/common/cas.cc @@ -28,7 +28,7 @@ #include "paddle/cinn/ir/utils/ir_copy.h" #include "paddle/cinn/ir/utils/ir_nodes_collector.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { using namespace ir; // NOLINT @@ -37,6 +37,9 @@ Expr AutoSimplify( Expr u, const absl::flat_hash_map& var_intervals) { VLOG(7) << "Begin AutoSimplify: " << u; + if (u.type().is_float()) { + return u; + } u = detail::ConvertCinnToCAS(u); absl::flat_hash_map s_var_intervals; for (auto& item : var_intervals) { @@ -136,7 +139,8 @@ namespace detail { // Is a Divisible to b. // @{ bool IsDivisible(int64_t a, int64_t b) { - CHECK_NE(b, 0); + PADDLE_ENFORCE_NE( + b, 0, phi::errors::InvalidArgument("The divisor %d should not be 0.", b)); return a % b == 0; } bool IsDivisible(const Sum* a, int b); @@ -1482,7 +1486,10 @@ Expr CasSimplifyMutator::SimplifySpecificSum(Expr tmp) { if (!right_mod || (!left_mul && !left_div)) { return tmp; } - CHECK_GE(right_mod->operands().size(), 2U); + PADDLE_ENFORCE_GE(right_mod->operands().size(), + 2U, + phi::errors::InvalidArgument( + "right_mod's operands size should be greater than 2")); Expr mod_left = right_mod->operand(0); Expr mod_right = right_mod->operand(1); if (!mod_left->type().is_integer() || !mod_right->type().is_integer()) { @@ -1492,7 +1499,10 @@ Expr CasSimplifyMutator::SimplifySpecificSum(Expr tmp) { // case 1: (m / n) * n + m % n = m (m, n's type is int) // case 2: (m / n1) * n3 + (n2 * m) % n3 = n2 * m if n3 = n1 * n2 (m, n1, // n2, n3's type is int) - CHECK_GE(left_mul->operands().size(), 2U); + PADDLE_ENFORCE_GE(left_mul->operands().size(), + 2U, + phi::errors::InvalidArgument( + "left_mul's operands size should be greater than 2")); Expr mul_left = left_mul->operand(0); Expr mul_right = left_mul->operand(1); @@ -1509,7 +1519,10 @@ Expr CasSimplifyMutator::SimplifySpecificSum(Expr tmp) { if (!div) { return tmp; } - CHECK_GE(div->operands().size(), 2U); + PADDLE_ENFORCE_GE(div->operands().size(), + 2U, + phi::errors::InvalidArgument( + "div's operands size should be greater than 2")); Expr div_left = div->operand(0); Expr div_right = div->operand(1); if (!div_left->type().is_integer() || !div_right->type().is_integer()) { diff --git a/paddle/cinn/common/cas.h b/paddle/cinn/common/cas.h old mode 100755 new mode 100644 index 7fbd0bfe6aa00..729d8e40c0db7 --- a/paddle/cinn/common/cas.h +++ b/paddle/cinn/common/cas.h @@ -22,7 +22,7 @@ #include "paddle/cinn/ir/ir.h" #include "paddle/cinn/ir/ir_printer.h" #include "paddle/cinn/optim/ir_simplify.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -37,7 +37,10 @@ Expr ReplaceMaxToConstant(Expr expr); struct CasInterval { template CasInterval(T l, T r) : l(l), r(r) { - CHECK_LE(l, r) << "left should not be larger than right"; + PADDLE_ENFORCE_LE( + l, + r, + phi::errors::InvalidArgument("left should not be larger than right")); } /** @@ -51,12 +54,12 @@ struct CasInterval { * 1 <= iterator_i <= 5 */ CasInterval(Expr expr_l, Expr expr_r) { - VLOG(2) << "CasInterval is : [" << expr_l << ", " << expr_r << "]."; + VLOG(6) << "CasInterval is : [" << expr_l << ", " << expr_r << "]."; expr_r = detail::ReplaceMinToConstant(expr_r); expr_l = detail::ReplaceMaxToConstant(expr_l); optim::Simplify(&expr_l); optim::Simplify(&expr_r); - VLOG(2) << "After simplify, CasInterval is : [" << expr_l << ", " << expr_r + VLOG(6) << "After simplify, CasInterval is : [" << expr_l << ", " << expr_r << "]."; if (expr_l.is_constant() && expr_r.is_constant()) { diff --git a/paddle/cinn/common/cas_test.cc b/paddle/cinn/common/cas_test.cc index c0b614eb972fa..62ca04e85467f 100644 --- a/paddle/cinn/common/cas_test.cc +++ b/paddle/cinn/common/cas_test.cc @@ -458,9 +458,6 @@ TEST(CAS, cond) { TEST(CAS, SimplifyFracOp) { Expr frac = Expr(1) / Expr(7) / Expr(6) / Expr(5) / Expr(4); EXPECT_EQ(GetStreamCnt(AutoSimplify(frac)), "0"); - - Expr frac_f = Expr(20.0f) / Expr(2.0f) / Expr(1.0f) / Expr(5.0f); - EXPECT_EQ(GetStreamCnt(AutoSimplify(frac_f)), "2.00000000f"); } } // namespace common diff --git a/paddle/cinn/common/cinn_value.cc b/paddle/cinn/common/cinn_value.cc index 3b25f93201333..82a3c86b12720 100644 --- a/paddle/cinn/common/cinn_value.cc +++ b/paddle/cinn/common/cinn_value.cc @@ -18,7 +18,7 @@ #include "paddle/cinn/ir/ir_base.h" #include "paddle/cinn/poly/stage.h" #include "paddle/cinn/runtime/cinn_runtime.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace ir { @@ -128,23 +128,38 @@ bool CINNValue::is_tensor() const { } CINNValue::operator std::string() const { - CHECK_EQ(type_code_, TypeCode()); + PADDLE_ENFORCE_EQ( + type_code_, + TypeCode(), + phi::errors::InvalidArgument("The type_code is not std::string.")); return absl::any_cast(shared_); } CINNValue::operator ir::Var() const { - CHECK_EQ(type_code_, TypeCode()); + PADDLE_ENFORCE_EQ( + type_code_, + TypeCode(), + phi::errors::InvalidArgument("The type_code is not ir::Var.")); return absl::any_cast(shared_); } CINNValue::operator ir::Expr() const { - CHECK_EQ(type_code_, TypeCode()); + PADDLE_ENFORCE_EQ( + type_code_, + TypeCode(), + phi::errors::InvalidArgument("The type_code is not ir::Expr.")); return absl::any_cast(shared_); } CINNValue::operator CINNValuePack() const { - CHECK_EQ(type_code_, TypeCode()); + PADDLE_ENFORCE_EQ( + type_code_, + TypeCode(), + phi::errors::InvalidArgument("The type_code is not CINNValuePack.")); return absl::any_cast(shared_); } CINNValue::operator poly::StageMap() const { - CHECK_EQ(type_code(), TypeCode()); + PADDLE_ENFORCE_EQ( + type_code(), + TypeCode(), + phi::errors::InvalidArgument("The type_code is not poly::StageMap.")); return absl::any_cast(shared_); } CINNValue::CINNValue(char *value) @@ -181,11 +196,17 @@ CINNValuePack _CINNValuePack_::Make(const std::vector &array) { return CINNValuePack(node); } CINNValue &_CINNValuePack_::operator[](int offset) { - CHECK_LT(offset, size()); + PADDLE_ENFORCE_LT( + offset, + size(), + phi::errors::InvalidArgument("The offset is out of range.")); return values_[offset]; } const CINNValue &_CINNValuePack_::operator[](int offset) const { - CHECK_LT(offset, size()); + PADDLE_ENFORCE_LT( + offset, + size(), + phi::errors::InvalidArgument("The offset is out of range.")); return values_[offset]; } void _CINNValuePack_::AddValue(const CINNValue &value) { diff --git a/paddle/cinn/common/cinn_value.h b/paddle/cinn/common/cinn_value.h old mode 100755 new mode 100644 index 3cfb4214d76b9..aa64b129df673 --- a/paddle/cinn/common/cinn_value.h +++ b/paddle/cinn/common/cinn_value.h @@ -23,7 +23,7 @@ #include "paddle/cinn/common/object.h" #include "paddle/cinn/common/type.h" #include "paddle/cinn/runtime/cinn_runtime.h" - +#include "paddle/common/enforce.h" struct cinn_buffer_t; namespace cinn { @@ -97,12 +97,18 @@ struct CINNValuePack : public Shared<_CINNValuePack_> { bool empty() const { return (*operator->()).empty(); } CINNValue& back() { - CHECK_GT((*operator->()).size(), 0); + PADDLE_ENFORCE_GT((*operator->()).size(), + 0, + phi::errors::InvalidArgument( + "The size of the array should greater than 0.")); return (*operator->())[size() - 1]; } const CINNValue& back() const { - CHECK_GT((*operator->()).size(), 0); + PADDLE_ENFORCE_GT((*operator->()).size(), + 0, + phi::errors::InvalidArgument( + "The size of the array should greater than 0.")); return (*operator->())[size() - 1]; } diff --git a/paddle/cinn/common/float16_bfloat16_cuda_test.cu b/paddle/cinn/common/float16_bfloat16_cuda_test.cu index fd6c39cc51f8f..5cded20e9cadf 100644 --- a/paddle/cinn/common/float16_bfloat16_cuda_test.cu +++ b/paddle/cinn/common/float16_bfloat16_cuda_test.cu @@ -39,9 +39,15 @@ class CudaMem { CudaMem() = default; void* mutable_data(size_t bytes) { - CHECK_GT(bytes, 0) << "Cannot allocate empty memory!"; + PADDLE_ENFORCE_GT( + bytes, + 0, + phi::errors::InvalidArgument("Cannot allocate empty memory!")); if (ptr) { - CHECK_EQ(bytes, bytes_) << "Try allocate memory twice!"; + PADDLE_ENFORCE_EQ( + bytes, + bytes_, + phi::errors::InvalidArgument("Try allocate memory twice!")); return ptr; } CUDA_CALL(cudaMalloc(&ptr, bytes)); @@ -67,12 +73,14 @@ class CudaMem { void MemcpyFromHost(const void* src, size_t bytes, cudaStream_t stream = nullptr) { - CHECK_LE(bytes, bytes_) << "Too many data need copy"; + PADDLE_ENFORCE_LE( + bytes, bytes_, phi::errors::InvalidArgument("Too many data need copy")); CUDA_CALL(cudaMemcpyAsync(ptr, src, bytes, cudaMemcpyHostToDevice, stream)); } void MemcpyToHost(void* dst, size_t bytes, cudaStream_t stream = nullptr) { - CHECK_LE(bytes, bytes_) << "Too many data need copy"; + PADDLE_ENFORCE_LE( + bytes, bytes_, phi::errors::InvalidArgument("Too many data need copy")); CUDA_CALL(cudaMemcpyAsync(dst, ptr, bytes, cudaMemcpyDeviceToHost, stream)); } diff --git a/paddle/cinn/common/graph_utils.cc b/paddle/cinn/common/graph_utils.cc old mode 100755 new mode 100644 index b1110e8ca8aa0..b6223443b04fd --- a/paddle/cinn/common/graph_utils.cc +++ b/paddle/cinn/common/graph_utils.cc @@ -23,7 +23,7 @@ #include "paddle/cinn/common/common.h" #include "paddle/cinn/utils/dot_lang.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -98,7 +98,10 @@ Graph::topological_order() const { queue.pop_front(); for (auto &edge : top_node->outlinks()) { - CHECK_EQ(edge->source(), top_node); + PADDLE_ENFORCE_EQ(edge->source(), + top_node, + phi::errors::InvalidArgument( + "The edge's source is not equal to the top node.")); edge_order.push_back(edge.get()); auto *sink = edge->sink(); if ((--indegree[sink->id()]) == 0) { @@ -107,9 +110,10 @@ Graph::topological_order() const { } } - CHECK_EQ(node_order.size(), nodes().size()) - << "circle detected in the schedule graph:\n\n" - << Visualize(); + PADDLE_ENFORCE_EQ(node_order.size(), + nodes().size(), + phi::errors::InvalidArgument( + "The node_order size is not equal to the nodes size.")); return std::make_tuple(node_order, edge_order); } diff --git a/paddle/cinn/common/graph_utils.h b/paddle/cinn/common/graph_utils.h index 9834b2368d460..55d12bcfd12ae 100644 --- a/paddle/cinn/common/graph_utils.h +++ b/paddle/cinn/common/graph_utils.h @@ -31,7 +31,7 @@ #include "paddle/cinn/common/object.h" #include "paddle/cinn/common/shared.h" #include "paddle/cinn/common/type.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -86,7 +86,8 @@ class GraphNode : public Object { std::tuple LinkTo(GraphNode* other) { EdgeT *a, *b; CHECK(other); - CHECK_NE(other, this) << "Cannot link to itself"; + PADDLE_ENFORCE_NE( + other, this, phi::errors::InvalidArgument("Cannot link to itself")); auto outlink_edge = make_shared(this, other, index_outlinks); auto inlink_edge = make_shared(this, other, other->index_inlinks); @@ -127,7 +128,10 @@ class GraphNode : public Object { break; } } - CHECK_EQ(outlink_linked, inlink_linked); + PADDLE_ENFORCE_EQ(outlink_linked, + inlink_linked, + phi::errors::InvalidArgument( + "The outlink_linked should same as inlink_linked.")); if (outlink_linked) return; else diff --git a/paddle/cinn/common/ir_util.cc b/paddle/cinn/common/ir_util.cc index d326e652a7be7..c73091e8196be 100644 --- a/paddle/cinn/common/ir_util.cc +++ b/paddle/cinn/common/ir_util.cc @@ -21,7 +21,7 @@ #include "paddle/cinn/ir/ir_mutator.h" #include "paddle/cinn/ir/ir_printer.h" #include "paddle/cinn/ir/op/ir_operators.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -29,19 +29,34 @@ namespace { // ramp + scalar or broadcast Expr RampRelatedMul(ir::Ramp *ramp, Expr other) { - CHECK_EQ(other.type().ElementOf(), Int(32)); - CHECK_EQ(ramp->base.type(), Int(32)); - CHECK_EQ(ramp->stride.type(), Int(32)); + PADDLE_ENFORCE_EQ( + other.type().ElementOf(), + Int(32), + phi::errors::InvalidArgument("The type of other should be int32.")); + PADDLE_ENFORCE_EQ( + ramp->base.type(), + Int(32), + phi::errors::InvalidArgument("The type of ramp->base should be int32.")); + PADDLE_ENFORCE_EQ(ramp->stride.type(), + Int(32), + phi::errors::InvalidArgument( + "The type of ramp->stride should be int32.")); auto *other_broadcast = other.As(); if (other_broadcast) { - CHECK_EQ(ramp->lanes, other_broadcast->lanes); + PADDLE_ENFORCE_EQ(ramp->lanes, + other_broadcast->lanes, + phi::errors::InvalidArgument( + "The lanes of ramp and other should be equal.")); other = other_broadcast->value; } return ir::Ramp::Make(ramp->base * other, ramp->stride * other, ramp->lanes); } Expr RampRelatedMul(ir::Broadcast *broadcast, Expr other) { - CHECK_EQ(other.type().lanes(), 1); + PADDLE_ENFORCE_EQ( + other.type().lanes(), + 1, + phi::errors::InvalidArgument("The lanes of other should be 1.")); return ir::Broadcast::Make(broadcast->value * other, broadcast->lanes); } // ramp * ramp @@ -51,17 +66,26 @@ Expr RampRelatedMul(ir::Ramp *ramp, ir::Ramp *other) { } // ramp + scalar Expr RampRelatedAdd(ir::Ramp *ramp, Expr other) { - CHECK_EQ(other.type().ElementOf(), Int(32)); + PADDLE_ENFORCE_EQ( + other.type().ElementOf(), + Int(32), + phi::errors::InvalidArgument("The type of other should be int32.")); auto *other_broadcast = other.As(); if (other_broadcast) { - CHECK_EQ(ramp->lanes, other_broadcast->lanes); + PADDLE_ENFORCE_EQ(ramp->lanes, + other_broadcast->lanes, + phi::errors::InvalidArgument( + "The lanes of ramp and other should be equal.")); other = other_broadcast->value; } return ir::Ramp::Make(ramp->base + other, ramp->stride, ramp->lanes); } Expr RampRelatedAdd(ir::Broadcast *broadcast, Expr other) { - CHECK_EQ(other.type().lanes(), 1); + PADDLE_ENFORCE_EQ( + other.type().lanes(), + 1, + phi::errors::InvalidArgument("The lanes of other should be 1.")); return ir::Broadcast::Make(broadcast->value + other, broadcast->lanes); } // ramp + ramp @@ -98,7 +122,11 @@ Expr RampRelatedAdd(Expr a, Expr b) { } else if (!a_broadcast && b_broadcast) { return RampRelatedAdd(b_broadcast, a); } else if (a_broadcast && b_broadcast) { - CHECK_EQ(a_broadcast->lanes, b_broadcast->lanes); + PADDLE_ENFORCE_EQ( + a_broadcast->lanes, + b_broadcast->lanes, + phi::errors::InvalidArgument( + "The lanes of a_broadcast and b_broadcast should be equal.")); return ir::Broadcast::Make(a_broadcast->value + b_broadcast->value, a_broadcast->lanes); } else { @@ -125,7 +153,11 @@ Expr RampRelatedMul(Expr a, Expr b) { } else if (!a_broadcast && b_broadcast) { return RampRelatedMul(b_broadcast, a); } else if (a_broadcast && b_broadcast) { - CHECK_EQ(a_broadcast->lanes, b_broadcast->lanes); + PADDLE_ENFORCE_EQ( + a_broadcast->lanes, + b_broadcast->lanes, + phi::errors::InvalidArgument( + "The lanes of a_broadcast and b_broadcast should be equal.")); return ir::Broadcast::Make(a_broadcast->value * b_broadcast->value, a_broadcast->lanes); } else { @@ -141,7 +173,11 @@ Expr IndiceToAbsOffset(const std::vector &shape, VLOG(3) << "Begin IndiceToAbsOffset"; VLOG(3) << "shape is : " << utils::Join(shape, ","); VLOG(3) << "indices is : " << utils::Join(indices, ","); - CHECK_LE(shape.size(), indices.size()); + PADDLE_ENFORCE_LE( + shape.size(), + indices.size(), + phi::errors::InvalidArgument("The size of shape should be less than or " + "equal to the size of indices.")); Expr res; ir::TryElevateInt32ToInt64(shape); for (int i = 0; i < shape.size(); i++) { @@ -261,10 +297,11 @@ void CheckTensorUniqueInExpr(Expr expr) { if (!tensor_names.count(tp->name)) { tensor_names[tp->name] = tp; } else { - CHECK_EQ(tensor_names[tp->name], tp) - << "Found tensor not unique [" << tp->name - << "]\nThe original expression is \n" - << expr; + PADDLE_ENFORCE_EQ( + tensor_names[tp->name], + tp, + phi::errors::InvalidArgument( + "Found tensor not unique, The original express is %d .", expr)); } } } @@ -281,7 +318,11 @@ void CheckBufferUniqueInExpr(Expr expr) { absl::flat_hash_map buffer_name; auto check_buffer_uniq = [&](const ir::_Buffer_ *b) { if (buffer_name.count(b->name)) { - CHECK_EQ(buffer_name[b->name], b); + PADDLE_ENFORCE_EQ( + buffer_name[b->name], + b, + phi::errors::InvalidArgument( + "Found buffer not unique, The original express is %d .", expr)); } else { buffer_name[b->name] = b->const_self(); } @@ -426,12 +467,18 @@ std::vector GetForloopStackToStore(Expr *expr, } Expr max(Expr a, Expr b) { - CHECK_EQ(a.type(), b.type()); + PADDLE_ENFORCE_EQ( + a.type(), + b.type(), + phi::errors::InvalidArgument("The type of a and b should be equal.")); return ir::Max::Make(a, b); } Expr min(Expr a, Expr b) { - CHECK_EQ(a.type(), b.type()); + PADDLE_ENFORCE_EQ( + a.type(), + b.type(), + phi::errors::InvalidArgument("The type of a and b should be equal.")); return ir::Min::Make(a, b); } diff --git a/paddle/cinn/common/type.cc b/paddle/cinn/common/type.cc index 41cfd9e638f90..5163d7b921d59 100644 --- a/paddle/cinn/common/type.cc +++ b/paddle/cinn/common/type.cc @@ -137,7 +137,10 @@ Type Type::ElementOf() const { } void Type::CheckTypeValid() const { - CHECK_NE(GetStorage().type_, type_t::Unk); + PADDLE_ENFORCE_NE( + GetStorage().type_, + type_t::Unk, + phi::errors::InvalidArgument("The type is not initialized.")); if (GetStorage().type_ == type_t::Float && GetStorage().bits_ == 16) { CHECK(GetStorage().specific_type_ == specific_type_t::FP16 || GetStorage().specific_type_ == specific_type_t::BF16) diff --git a/paddle/cinn/common/union_find.h b/paddle/cinn/common/union_find.h index a88f52dafe515..a76157e7f760e 100644 --- a/paddle/cinn/common/union_find.h +++ b/paddle/cinn/common/union_find.h @@ -26,7 +26,7 @@ #include "paddle/cinn/common/object.h" #include "paddle/cinn/common/shared.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace common { @@ -62,8 +62,11 @@ struct UnionFindNode : public Object { template T* safe_as() { - CHECK_EQ(std::strcmp(T::__type_info__, type_info()), 0) - << "Want a " << T::__type_info__ << " but get a " << type_info(); + PADDLE_ENFORCE_EQ( + std::strcmp(T::__type_info__, type_info()), + 0, + phi::errors::InvalidArgument( + "Want a %d but get a %d", T::__type_info__, type_info())); return reinterpret_cast(this); } diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc index 36fe9e340fcd9..5e7d3e6d876cf 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc @@ -324,12 +324,12 @@ void SplitOp::Build(pir::Builder& builder, // NOLINT const char* GenerateShapeOp::attributes_name[attributes_num] = { "output_dim_exprs", "symbol_bindings"}; -void GenerateShapeOp::Build( - pir::Builder& builder, - pir::OperationArgument& argument, - const std::vector& inputs, - const std::vector& output_dim_exprs, - const GenerateShapeOp::SymbolBindings& symbol_bindings) { +void GenerateShapeOp::Build(pir::Builder& builder, + pir::OperationArgument& argument, + const std::vector& inputs, + const std::vector& output_dim_exprs, + const SymbolBindings& symbol_bindings, + const pir::Type& output_type) { if (inputs.empty()) { VLOG(3) << "GenerateShapeOp inputs is empty"; for (const auto& attr : output_dim_exprs) { @@ -344,13 +344,7 @@ void GenerateShapeOp::Build( argument.AddAttribute( "symbol_bindings", ConvertSymbolBindingsToAttribute(builder, symbol_bindings)); - argument.AddOutputs({[&]() { - auto* ctx = pir::IrContext::Instance(); - auto type = pir::Int64Type::get(ctx); - auto dim = - ::common::make_ddim({static_cast(output_dim_exprs.size())}); - return DenseTensorType::get(ctx, type, dim); - }()}); + argument.AddOutput(output_type); ::pir::PassStopGradientsDefaultly(argument); } diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h index 1eddfaffd0df1..06f306a0e3623 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h @@ -168,7 +168,8 @@ class IR_API GenerateShapeOp pir::OperationArgument &argument, // NOLINT const std::vector &inputs, const std::vector &output_dim_exprs, - const SymbolBindings &symbol_bindings); + const SymbolBindings &symbol_bindings, + const pir::Type &output_type); void VerifySig() {} diff --git a/paddle/cinn/hlir/dialect/operator/ir/ops.yaml b/paddle/cinn/hlir/dialect/operator/ir/ops.yaml index a8eac75248186..efbeaf298e7a0 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/ops.yaml +++ b/paddle/cinn/hlir/dialect/operator/ir/ops.yaml @@ -81,9 +81,9 @@ - op : reshape args : (Tensor x, int[] shape) - output : Tensor(out) + output : Tensor(out), Tensor(xshape) infer_meta : - func : ReshapeInferMeta + func : ReshapeWithXShapeInferMeta kernel : func : reshape interfaces : paddle::dialect::InferSymbolicShapeInterface diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc index 6aea2dc8b759b..c864410715531 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc @@ -57,6 +57,7 @@ COMMON_DECLARE_bool(print_ir); COMMON_DECLARE_bool(disable_dyshape_in_train); COMMON_DECLARE_bool(enable_cinn_accuracy_check); COMMON_DECLARE_bool(enable_fuse_parallel_matmul_pass); +COMMON_DECLARE_bool(logging_pir_py_code_dump_symbolic_dims); PD_DECLARE_bool(group_schedule_tiling_first); namespace cinn::dialect::ir { @@ -229,7 +230,7 @@ void ApplyCinnPass(::pir::Program* program, CreatePassManager) { PirToPyCodeConverter(program) .file_name("original_programs.py") - .dump_symbolic_shape(false) + .dump_symbolic_shape(FLAGS_logging_pir_py_code_dump_symbolic_dims) .SaveIfFlagEnabled(); ApplyPdToCinnPass(program, CreatePassManager); ApplyCinnPreprocessPass(program, CreatePassManager); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc index 63d5b519ce887..ec82d41742a70 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc @@ -232,7 +232,7 @@ class BlockDimExprsAsserter { }; std::vector input_tensors{}; std::vector output_dim_expr_attrs{}; - GenerateShapeOp::SymbolBindings symbol_bindings{}; + SymbolBindings symbol_bindings{}; bool success = MakeGenerateShapeOpAttribute(ir_ctx_, LocalDimExprs4Value, @@ -242,14 +242,13 @@ class BlockDimExprsAsserter { &output_dim_expr_attrs, &symbol_bindings); if (!success) return std::nullopt; - auto out_shape_value = - builder_ - .Build( - input_tensors, output_dim_expr_attrs, symbol_bindings) - .out(); + auto out_type = paddle::dialect::DenseTensorType::get( + builder_.ir_context(), + pir::Int64Type::get(builder_.ir_context()), + ::common::make_ddim({dim_exprs.size()})); return builder_ .Build( - input_tensors, output_dim_expr_attrs, symbol_bindings) + input_tensors, output_dim_expr_attrs, symbol_bindings, out_type) .out(); } @@ -298,8 +297,11 @@ class BlockDimExprsAsserter { PADDLE_ENFORCE_EQ(lhs_numel, rhs_numel, ::common::errors::InvalidArgument( + "Check [%s id:%d] infer symbolic shape failed." "The numel of lhs and rhs must be equal, but " "received lhs's numel is [%d], rhs's numel is [%d]", + op->name(), + op->id(), lhs_numel, rhs_numel)); @@ -326,8 +328,8 @@ class BlockDimExprsAsserter { .out(); auto assert_op = builder_.Build( all_eq, assert_data, lhs_numel); - const std::string error_msg = "Check [" + op->name() + "_" + - std::to_string(op->id()) + + const std::string error_msg = "Check [" + op->name() + + " id:" + std::to_string(op->id()) + "] infer symbolic shape failed."; assert_op->set_attribute( paddle::dialect::AssertOp::ERROR_INFO_ATTR_NAME, diff --git a/paddle/cinn/hlir/dialect/operator/transforms/cinn_to_pd_util.cc b/paddle/cinn/hlir/dialect/operator/transforms/cinn_to_pd_util.cc index 6281baeadbef2..ca422c1a593c8 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/cinn_to_pd_util.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/cinn_to_pd_util.cc @@ -190,6 +190,15 @@ ::pir::Operation* ConvertConcatOp(::pir::Operation* op, return pd_op; } +::pir::Operation* ConvertGenerateShapeOp( + ::pir::Operation* op, + ::pir::IrMapping& ir_mapping, // NOLINT + ::pir::Builder& builder) { // NOLINT + auto* new_op = op->Clone(ir_mapping, {true, true, true}); + builder.Insert(new_op); + return new_op; +} + ::pir::Operation* ConvertScaleOp(::pir::Operation* op, ::pir::IrMapping& ir_mapping, // NOLINT ::pir::PatternRewriter& rewriter) { // NOLINT @@ -404,6 +413,9 @@ REGISTER_TRANSFORM_RULES(concat_op, cinn::dialect::ConcatOp::name(), cinn::dialect::details::ConvertConcatOp); +REGISTER_TRANSFORM_RULES(generate_shape_op, + cinn::dialect::GenerateShapeOp::name(), + cinn::dialect::details::ConvertGenerateShapeOp); REGISTER_TRANSFORM_RULES(scale_op, cinn::dialect::ScaleOp::name(), cinn::dialect::details::ConvertScaleOp); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.cc index 17317924fb07e..0ffd284ac79f7 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.cc @@ -38,9 +38,10 @@ bool ReplaceOpWithReshapeOp(pir::Operation* op, std::vector output_dim_expr_attrs{}; GenerateShapeOp::SymbolBindings symbol_bindings{}; - unsigned output_dim_idx = 0, input_dim_idx = 0; int64_t local_dim_expr_id = 0; - for (; output_dim_idx < output_shape.size(); ++output_dim_idx) { + for (unsigned output_dim_idx = 0, input_dim_idx = 0; + output_dim_idx < output_shape.size(); + ++output_dim_idx) { const auto& dim_expr = output_shape.at(output_dim_idx); if (dim_expr.isa()) { output_dim_expr_attrs.emplace_back( @@ -64,8 +65,16 @@ bool ReplaceOpWithReshapeOp(pir::Operation* op, } } } + auto out_type = paddle::dialect::DenseTensorType::get( + rewriter.ir_context(), + pir::Int64Type::get(rewriter.ir_context()), + ::common::make_ddim( + {static_cast(output_dim_expr_attrs.size())})); auto cinn_generate_shape = rewriter.Build( - std::vector{input}, output_dim_expr_attrs, symbol_bindings); + std::vector{input}, + output_dim_expr_attrs, + symbol_bindings, + out_type); auto pd_reshape = rewriter.Build( op->operand_source(0), cinn_generate_shape.result(0)); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.cc index 0578c79b35a2b..473763bb4dcec 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.cc @@ -313,9 +313,18 @@ std::optional GetOutOfRewrittenGenerateShapeOp( &output_dim_expr_attrs, &symbol_bindings); if (!success) return std::nullopt; + auto out_type = [&]() -> pir::Type { + if (shape.type().isa()) { + return shape.type(); + } + return paddle::dialect::DenseTensorType::get( + rewriter->ir_context(), + pir::Int64Type::get(rewriter->ir_context()), + ::common::make_ddim({output_dim_expr_attrs.size()})); + }(); return rewriter ->Build( - input_tensors, output_dim_expr_attrs, symbol_bindings) + input_tensors, output_dim_expr_attrs, symbol_bindings, out_type) .out(); } @@ -323,9 +332,8 @@ bool ReplaceShapeOpsToGenerateShape( pir::OpOperand shape_operand, pir::PatternRewriter* rewriter, pir::ShapeConstraintIRAnalysis* shape_analysis) { - if (shape_operand.source() - .defining_op() - ->isa()) { + auto* shape_def_op = shape_operand.source().defining_op(); + if (!shape_def_op || shape_def_op->isa()) { return false; } auto ShapeOrDataDimExprs4Value = @@ -379,6 +387,82 @@ class FuseShapeOpsIntoGenerateShapeOpPattern } }; +class FuseSingleElementShapeOpsIntoGenerateShapeOpPattern + : public pir::RewritePattern { + public: + explicit FuseSingleElementShapeOpsIntoGenerateShapeOpPattern( + pir::IrContext* context) + : pir::RewritePattern(MatchAnyOpTypeTag(), + 1 /*benefit*/, + context, + {} /*generated_names*/) {} + + bool Match(pir::Operation* op) const override { + auto& shape_analysis = + pir::ShapeAnalysisManager::Instance().Get(op->GetParentProgram()); + if (!IsSingleElementShapeOp(op, &shape_analysis)) return false; + if (op->isa()) return false; + + // all user op's output should has no data of shape expr + pir::Value output = op->result(0); + if (output.use_empty()) return false; + for (auto iter = output.use_begin(); iter != output.use_end(); ++iter) { + auto* user = iter->owner(); + if (IsSingleElementShapeOp(user, &shape_analysis)) return false; + if (user->isa()) return false; + } + + return true; + } + + void Rewrite(pir::Operation* op, + pir::PatternRewriter& rewriter) const override { + auto& shape_analysis = + pir::ShapeAnalysisManager::Instance().Get(op->GetParentProgram()); + + auto ShapeOrDataDimExprs4Value = + [&shape_analysis]( + pir::Value value) -> const symbol::ShapeOrDataDimExprs& { + return shape_analysis.GetShapeOrDataForValue(value); + }; + std::optional opt_generated_shape = + GetOutOfRewrittenGenerateShapeOp( + op->result(0), &rewriter, ShapeOrDataDimExprs4Value); + if (!opt_generated_shape.has_value()) { + LOG(WARNING) << "Create GenerateShapeOp Failed."; + return; + } + + rewriter.ReplaceAllUsesWith(op->result(0), opt_generated_shape.value()); + + if (op->use_empty()) { + rewriter.EraseOp(op); + } + } + + private: + bool IsSingleElementShapeOp( + pir::Operation* op, + pir::ShapeConstraintIRAnalysis* shape_analysis) const { + if (op->num_operands() == 0) return false; + if (op->num_results() != 1) return false; + + pir::Value output = op->result(0); + const auto& out_shape = shape_analysis->GetShapeOrDataForValue(output); + if (!out_shape.isa()) return false; + if (!out_shape.data().has_value()) return false; + + auto dtype = + output.type().dyn_cast().dtype(); + if (!dtype.isa() && !dtype.isa()) { + return false; + } + + // Only process the op which output is a single element + return out_shape.data()->size() == 1; + } +}; + class FuseShapeOpsIntoGenerateShapeOpPass : public pir::PatternRewritePass { public: FuseShapeOpsIntoGenerateShapeOpPass() @@ -393,6 +477,7 @@ class FuseShapeOpsIntoGenerateShapeOpPass : public pir::PatternRewritePass { context); ps.Add>( context); + ps.Add(context); return ps; } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/generate_shape_util.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/generate_shape_util.cc index 30b470d42ca2a..f2afbae3d515d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/generate_shape_util.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/generate_shape_util.cc @@ -83,8 +83,10 @@ std::optional InsertGenerateShapeOpToRunFirst( &symbol_bindings); if (success) { return builder - ->Build( - minimal_inputs, output_dim_expr_attrs, symbol_bindings) + ->Build(minimal_inputs, + output_dim_expr_attrs, + symbol_bindings, + value.type()) .out(); } return std::nullopt; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/simplify_dim_expr_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/simplify_dim_expr_pass.cc index 69723f8be0b86..86ae8d77d5296 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/simplify_dim_expr_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/simplify_dim_expr_pass.cc @@ -101,6 +101,9 @@ void SimplifyDimExpr(pir::Operation* module_op) { VisitEachOp(module_op, [&](pir::Operation& op) { VisitEachValue(op, [&](pir::Value value) { + if (!value || !value.type()) { + return; + } const symbol::ShapeOrDataDimExprs& shape_or_data = shape_analysis->GetShapeOrDataForValue(value); VLOG(8) << op.name() << " origin_shape_or_data: " << shape_or_data; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc index 8f0bab178d75c..7beec47823a4d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc @@ -233,17 +233,24 @@ std::tuple BroadcastableToCondValue( &rhs_symbol_bindings); CHECK(success); + auto out_type = paddle::dialect::DenseTensorType::get( + builder.ir_context(), + pir::Int64Type::get(builder.ir_context()), + ::common::make_ddim({1})); + auto lhs_value = builder .Build(lhs_minimal_inputs, lhs_output_dim_expr_attrs, - lhs_symbol_bindings) + lhs_symbol_bindings, + out_type) .out(); auto rhs_value = builder .Build(rhs_minimal_inputs, rhs_output_dim_expr_attrs, - rhs_symbol_bindings) + rhs_symbol_bindings, + out_type) .out(); auto const_one = builder @@ -435,9 +442,11 @@ std::shared_ptr ConstructBroadcastTree( const cinn::common::BroadcastLeaf& leaves) { VLOG(6) << "before constructed. broadcast-leaf: \n" << ToTxtString(cinn::common::BroadcastTree(leaves)); + int num_of_leaves = 0; auto broadcast_tree = std::make_shared( - cinn::common::ConstructBroadcastTree( - cinn::common::BroadcastLeaf(leaves))); + cinn::common::ConstructBroadcastTree(cinn::common::BroadcastLeaf(leaves), + &num_of_leaves)); + VLOG(4) << "num of broadcast tree leaves:" << num_of_leaves; VLOG(4) << "broadcast-tree: \n" << ToTxtString(*broadcast_tree); return broadcast_tree; } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc index a36c208f0c96c..c2604697d68af 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc @@ -110,23 +110,26 @@ OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { : group_op_kind; } } - - auto group = std::make_shared(ops); - - if (fusion_op.attributes().count("group_info")) { - auto attr = fusion_op.attribute("group_info") - .dyn_cast() - .data(); - - group_op_kind = - static_cast(attr.op_pattern_kind) > static_cast(group_op_kind) - ? attr.op_pattern_kind - : group_op_kind; - group->set_loop_ranges(attr.loop_ranges); - group->set_loop_ranges_expr(attr.loop_ranges_expr); - group->set_reduce_axis(attr.reduce_axis); - group->set_alignment_schedule_info(attr.alignment_schedule_info); - } + PADDLE_ENFORCE_GT(fusion_op.attributes().count("group_info"), + 0UL, + phi::errors::InvalidArgument( + "fusion_op should have group_info attribute.")); + + const auto attr = fusion_op.attribute("group_info") + .dyn_cast() + .data(); + + const auto& fn_name = attr.fn_name; + auto group = std::make_shared(ops, fn_name); + + group_op_kind = + static_cast(attr.op_pattern_kind) > static_cast(group_op_kind) + ? attr.op_pattern_kind + : group_op_kind; + group->set_loop_ranges(attr.loop_ranges); + group->set_loop_ranges_expr(attr.loop_ranges_expr); + group->set_reduce_axis(attr.reduce_axis); + group->set_alignment_schedule_info(attr.alignment_schedule_info); group->set_op_pattern_kind(group_op_kind); // Rebuild output_ops and input_ops of the group diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc index 648b3af363241..d4229ea9093bc 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc @@ -33,116 +33,128 @@ namespace dialect { namespace ir { using CompatibleInfo = cinn::hlir::framework::pir::CompatibleInfo; -class SumOpPattern : public paddle::drr::DrrPatternBase { - public: - std::string name() const override { return "SumOpPattern"; } - - void operator()(paddle::drr::DrrPatternContext *ctx) const override { - // Source Pattern - paddle::drr::SourcePattern pattern = ctx->SourcePattern(); - const auto &full_int_array = - pattern.Op(paddle::dialect::FullIntArrayOp::name(), - {{"value", pattern.Attr("axis_info")}, - {"dtype", pattern.Attr("dtype_2")}, - {"place", pattern.Attr("place_2")}}); - - const auto &sum = pattern.Op(paddle::dialect::SumOp::name(), - {{"dtype", pattern.Attr("dtype")}, - {"keepdim", pattern.Attr("keep_dim")}}); - pattern.Tensor("ret") = sum(pattern.Tensor("arg0"), full_int_array()); - - // Result patterns - paddle::drr::ResultPattern res = pattern.ResultPattern(); - const auto &cinn_reduce_sum = - res.Op(cinn::dialect::ReduceSumOp::name(), - {{"dim", pattern.Attr("axis_info")}, - {"dtype", pattern.Attr("dtype")}, - {"keep_dim", pattern.Attr("keep_dim")}}); - res.Tensor("ret") = cinn_reduce_sum(res.Tensor("arg0")); +namespace { + +template +std::vector GetVectorFromIntArrayAttribute( + const pir::ArrayAttribute &array_attr) { + const auto &vector_attr = array_attr.AsVector(); + + std::vector result; + if (vector_attr.size() > 0) { + PADDLE_ENFORCE_EQ(vector_attr[0].isa<::pir::Int64Attribute>(), + true, + phi::errors::Unimplemented( + "the 0th elementwise MUST be ir::Int64Attribute")); + for (size_t i = 0; i < vector_attr.size(); ++i) { + result.push_back(vector_attr[i].dyn_cast<::pir::Int64Attribute>().data()); + } } -}; + return result; +} -class MaxOpPattern : public paddle::drr::DrrPatternBase { - public: - std::string name() const override { return "MaxOpPattern"; } +} // namespace - void operator()(paddle::drr::DrrPatternContext *ctx) const override { - // Source Pattern - paddle::drr::SourcePattern pattern = ctx->SourcePattern(); - const auto &full_int_array = - pattern.Op(paddle::dialect::FullIntArrayOp::name(), - {{"value", pattern.Attr("axis_info")}, - {"dtype", pattern.Attr("dtype_2")}, - {"place", pattern.Attr("place_2")}}); +class SumOpPattern : public pir::OpRewritePattern { + public: + using pir::OpRewritePattern::OpRewritePattern; - const auto &pd_max = pattern.Op(paddle::dialect::MaxOp::name(), - {{"keepdim", pattern.Attr("keep_dim")}}); - pattern.Tensor("ret") = pd_max(pattern.Tensor("arg0"), full_int_array()); + bool Match(paddle::dialect::SumOp op) const override { + if (CompatibleInfo::IsDeniedForCinn(*op.operation())) return false; + auto *axes_op = op->operand_source(1).defining_op(); + return axes_op && axes_op->isa(); + } - // Result patterns - paddle::drr::ResultPattern res = pattern.ResultPattern(); - const auto &cinn_reduce_max = - res.Op(cinn::dialect::ReduceMaxOp::name(), - {{"dim", pattern.Attr("axis_info")}, - {"keep_dim", pattern.Attr("keep_dim")}}); - res.Tensor("ret") = cinn_reduce_max(res.Tensor("arg0")); + void Rewrite(paddle::dialect::SumOp op, + pir::PatternRewriter &rewriter) const override { + auto *axes_op = op->operand_source(1).defining_op(); + auto full_int_array_op = + axes_op->dyn_cast(); + + // get attribute value from full_int_array op + const std::vector axis = GetVectorFromIntArrayAttribute( + full_int_array_op.attribute("value").dyn_cast()); + const bool keep_dim = + op.attribute("keepdim").dyn_cast<::pir::BoolAttribute>().data(); + const auto &dtype = op.attribute("dtype") + .dyn_cast() + .data(); + + auto cinn_reduce = rewriter.Build( + op->operand_source(0), axis, keep_dim, dtype); + rewriter.ReplaceAllUsesWith(op.result(0), cinn_reduce.result(0)); + rewriter.EraseOp(op); + if (full_int_array_op->use_empty()) { + rewriter.EraseOp(full_int_array_op); + } } }; -class MinOpPattern : public paddle::drr::DrrPatternBase { +template +class ReduceMinMaxOpPattern : public pir::OpRewritePattern { public: - std::string name() const override { return "MinOpPattern"; } + using pir::OpRewritePattern::OpRewritePattern; - void operator()(paddle::drr::DrrPatternContext *ctx) const override { - // Source Pattern - paddle::drr::SourcePattern pattern = ctx->SourcePattern(); - const auto &full_int_array = - pattern.Op(paddle::dialect::FullIntArrayOp::name(), - {{"value", pattern.Attr("axis_info")}, - {"dtype", pattern.Attr("dtype_2")}, - {"place", pattern.Attr("place_2")}}); - - const auto &pd_max = pattern.Op(paddle::dialect::MinOp::name(), - {{"keepdim", pattern.Attr("keep_dim")}}); - pattern.Tensor("ret") = pd_max(pattern.Tensor("arg0"), full_int_array()); + bool Match(SOURCE_OP op) const override { + if (CompatibleInfo::IsDeniedForCinn(*op.operation())) return false; + auto *axes_op = op->operand_source(1).defining_op(); + return axes_op && axes_op->template isa(); + } - // Result patterns - paddle::drr::ResultPattern res = pattern.ResultPattern(); - const auto &cinn_reduce_max = - res.Op(cinn::dialect::ReduceMinOp::name(), - {{"dim", pattern.Attr("axis_info")}, - {"keep_dim", pattern.Attr("keep_dim")}}); - res.Tensor("ret") = cinn_reduce_max(res.Tensor("arg0")); + void Rewrite(SOURCE_OP op, pir::PatternRewriter &rewriter) const override { + auto *axes_op = op->operand_source(1).defining_op(); + auto full_int_array_op = + axes_op->template dyn_cast(); + + // get attribute value from full_int_array op + const std::vector axis = GetVectorFromIntArrayAttribute( + full_int_array_op.attribute("value") + .template dyn_cast()); + const bool keep_dim = op.attribute("keepdim") + .template dyn_cast<::pir::BoolAttribute>() + .data(); + + auto cinn_reduce = + rewriter.Build(op->operand_source(0), axis, keep_dim); + rewriter.ReplaceAllUsesWith(op.result(0), cinn_reduce.result(0)); + rewriter.EraseOp(op); + if (full_int_array_op->use_empty()) { + rewriter.EraseOp(full_int_array_op); + } } }; -class ProdOpPattern : public paddle::drr::DrrPatternBase { +class ProdOpPattern : public pir::OpRewritePattern { public: - std::string name() const override { return "ProdOpPattern"; } + using pir::OpRewritePattern::OpRewritePattern; - void operator()(paddle::drr::DrrPatternContext *ctx) const override { - // Source Pattern - paddle::drr::SourcePattern pattern = ctx->SourcePattern(); - const auto &full_int_array = - pattern.Op(paddle::dialect::FullIntArrayOp::name(), - {{"value", pattern.Attr("axis_info")}, - {"dtype", pattern.Attr("dtype_2")}, - {"place", pattern.Attr("place_2")}}); - - const auto &pd_max = - pattern.Op(paddle::dialect::ProdOp::name(), - {{"keep_dim", pattern.Attr("keep_dim")}, - {"reduce_all", pattern.Attr("reduce_all")}}); - pattern.Tensor("ret") = pd_max(pattern.Tensor("arg0"), full_int_array()); + bool Match(paddle::dialect::ProdOp op) const override { + if (CompatibleInfo::IsDeniedForCinn(*op.operation())) return false; + auto *axes_op = op->operand_source(1).defining_op(); + return axes_op && axes_op->isa(); + } - // Result patterns - paddle::drr::ResultPattern res = pattern.ResultPattern(); - const auto &cinn_reduce_max = - res.Op(cinn::dialect::ReduceProdOp::name(), - {{"dim", pattern.Attr("axis_info")}, - {"keep_dim", pattern.Attr("keep_dim")}, - {"reduce_all", pattern.Attr("reduce_all")}}); - res.Tensor("ret") = cinn_reduce_max(res.Tensor("arg0")); + void Rewrite(paddle::dialect::ProdOp op, + pir::PatternRewriter &rewriter) const override { + auto *axes_op = op->operand_source(1).defining_op(); + auto full_int_array_op = + axes_op->dyn_cast(); + + // get attribute value from full_int_array op + const std::vector axis = GetVectorFromIntArrayAttribute( + full_int_array_op.attribute("value").dyn_cast()); + const bool keep_dim = + op.attribute("keep_dim").dyn_cast<::pir::BoolAttribute>().data(); + const bool reduce_all = + op.attribute("reduce_all").dyn_cast<::pir::BoolAttribute>().data(); + + auto cinn_reduce = rewriter.Build( + op->operand_source(0), axis, keep_dim, reduce_all); + rewriter.ReplaceAllUsesWith(op.result(0), cinn_reduce.result(0)); + rewriter.EraseOp(op); + if (full_int_array_op->use_empty()) { + rewriter.EraseOp(full_int_array_op); + } } }; @@ -238,6 +250,7 @@ class ReshapeOpPattern auto cinn_reshape = rewriter.Build( op->operand_source(0), vec_out_shape); rewriter.ReplaceAllUsesWith(op.result(0), cinn_reshape.result(0)); + rewriter.ReplaceAllUsesWith(op.result(1), cinn_reshape.result(1)); rewriter.EraseOp(op); } }; @@ -882,6 +895,7 @@ class SqueezeOpPattern op->operand_source(0), output_shape); rewriter.ReplaceAllUsesWith(op.result(0), cinn_reshape.result(0)); + rewriter.ReplaceAllUsesWith(op.result(1), cinn_reshape.result(1)); rewriter.EraseOp(op); @@ -929,6 +943,7 @@ class UnsqueezeOpPattern op->operand_source(0), output_shape); rewriter.ReplaceAllUsesWith(op.result(0), cinn_reshape.result(0)); + rewriter.ReplaceAllUsesWith(op.result(1), cinn_reshape.result(1)); rewriter.EraseOp(op); @@ -1023,6 +1038,7 @@ class FlattenOpPattern reshape_op.result(0).set_type(op.result(0).type()); rewriter.ReplaceAllUsesWith(op.result(0), reshape_op.result(0)); + rewriter.ReplaceAllUsesWith(op.result(1), reshape_op.result(1)); rewriter.EraseOp(op); } @@ -1117,10 +1133,12 @@ pir::RewritePatternSet PdOpToCinnOpPass::InitializePatterns( pir::RewritePatternSet ps(context); ps.Add( context); // NOTE, scale op pattern should before AddBroadcastTo - ps.Add(paddle::drr::Create(context)); - ps.Add(paddle::drr::Create(context)); - ps.Add(paddle::drr::Create(context)); - ps.Add(paddle::drr::Create(context)); + ps.Add(context); + ps.Add>(context); + ps.Add>(context); + ps.Add(context); ps.Add(context); ps.Add(context); ps.Add(context); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc b/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc index 74f3e4b4f200d..162d33a20ee54 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.cc @@ -225,10 +225,23 @@ struct PirToPyCodeConverterHelper { std::vector GetFreeVars(const pir::Block& block) { std::vector inputs; + const auto IsBlockPositionalArg = [&](pir::Value value) { + const auto& args = block.args(); + return std::find(args.begin(), args.end(), value) != args.end(); + }; + const auto IsBlockKeywardArg = [&](pir::Value value) { + const auto& kwargs = block.kwargs(); + for (const auto& [_, kwarg] : kwargs) { + if (kwarg == value) return true; + } + return false; + }; for (const auto& value : GetUsedExternalValue(block)) { if (!value) continue; if (std::find(inputs.begin(), inputs.end(), value) != inputs.end()) continue; + if (IsBlockPositionalArg(value)) continue; + if (IsBlockKeywardArg(value)) continue; inputs.push_back(value); } return inputs; @@ -330,6 +343,9 @@ struct PirToPyCodeConverterHelper { "):"); IStrings return_lambda{ret_lambda_declare}; PushBackIndented(&return_lambda, block_body); + if (block_body.empty()) { + return_lambda.push_back(Indent("pass")); + } return return_lambda; }; std::string free_vars_as_args = ConvertFreeVarsAsArgs(block); @@ -866,27 +882,52 @@ struct PirToPyCodeConverterHelper { } std::string ConvertInputTypes(const pir::Operation* op) { - std::stringstream ss; - ss << "["; - for (int i = 0; i < op->num_operands(); ++i) { - if (i > 0) { - ss << ", "; + const auto& VisitValue = [&](const auto& DoEachValue) { + for (int i = 0; i < op->num_operands(); ++i) { + DoEachValue(op->operand_source(i)); } - ss << ConvertType(op->operand_source(i).type()); - } - ss << "]"; - return ss.str(); + }; + return ConvertValueTypes(VisitValue); + } + + std::string ConvertBlockArgTypes(const pir::Block& block) { + const auto& VisitValue = [&](const auto& DoEachValue) { + for (const auto& arg : block.args()) { + DoEachValue(arg); + } + }; + return ConvertValueTypes(VisitValue); + } + + std::string ConvertBlockKwArgTypes(const pir::Block& block) { + const auto& VisitValue = [&](const auto& DoEachValue) { + for (const auto& [_, arg] : block.kwargs()) { + DoEachValue(arg); + } + }; + return ConvertValueTypes(VisitValue); } std::string ConvertOutputTypes(const pir::Operation* op) { + const auto& VisitValue = [&](const auto& DoEachValue) { + for (int i = 0; i < op->num_results(); ++i) { + DoEachValue(op->result(i)); + } + }; + return ConvertValueTypes(VisitValue); + } + + template + std::string ConvertValueTypes(const VisitValueT& VisitValue) { std::stringstream ss; ss << "["; - for (int i = 0; i < op->num_results(); ++i) { - if (i > 0) { + int i = 0; + VisitValue([&](pir::Value value) { + if (i++ > 0) { ss << ", "; } - ss << ConvertType(op->result(i).type()); - } + ss << ConvertType(value.type()); + }); ss << "]"; return ss.str(); } @@ -1098,7 +1139,45 @@ struct PirToPyCodeConverterHelper { } ss << "]"; } - ss << "]"; + ss << "], "; + } + { + int i = 0; + ss << "block_positional_arg_types=["; + for (const auto& region : *op) { + if (i++ > 0) { + ss << ","; + } + int j = 0; + ss << "["; + for (const auto& block : region) { + if (j++ > 0) { + ss << ","; + } + ss << ConvertBlockArgTypes(block); + } + ss << "]"; + } + ss << "], "; + } + { + int i = 0; + ss << "block_keyword_arg_types=["; + for (const auto& region : *op) { + if (i++ > 0) { + ss << ","; + } + int j = 0; + ss << "["; + for (const auto& block : region) { + if (j++ > 0) { + ss << ","; + } + ss << ConvertBlockKwArgTypes(block); + } + ss << "]"; + } + ss << "], "; } return ss.str(); } @@ -1138,18 +1217,10 @@ struct PirToPyCodeConverterHelper { std::string GetPyClassName() { std::ostringstream ss; - ss << "PirProgram_" << RandomInt(); + ss << "PirProgram_" << program_->id(); return ss.str(); } - int64_t RandomInt() { - std::random_device rd{}; - std::mt19937_64 gen(rd()); - std::uniform_int_distribution dis( - 0, std::numeric_limits::max()); - return dis(gen); - } - std::string ConvertIStringsToString(const IStrings& istrings) { std::stringstream ss; for (const auto& istring : istrings) { diff --git a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc index 4dd7e3ecf3e7d..98a8ff2e7ec3e 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc @@ -136,10 +136,10 @@ struct CachedDimExprToValueConverter { ->Build(value, 0, dims.size() - 1) .out(); }; - if (tensor_dim.value.type() - .dyn_cast() - .dims() - .size() == 0) { + const auto& ddim = tensor_dim.value.type() + .dyn_cast() + .dims(); + if (ddim.size() == 0 || (ddim.size() == 1 && ddim[0] == 1)) { return CastToInt64IfNeed(tensor_dim.value); } return CastToInt64IfNeed(rewriter diff --git a/paddle/cinn/hlir/framework/op_lowering_impl_base.h b/paddle/cinn/hlir/framework/op_lowering_impl_base.h index 4d5284f22f6ed..3711f102dc2e8 100644 --- a/paddle/cinn/hlir/framework/op_lowering_impl_base.h +++ b/paddle/cinn/hlir/framework/op_lowering_impl_base.h @@ -31,6 +31,8 @@ struct BucketLoweredFuncsWrapper { std::vector> predicate2funcs; ir::LoweredFunc infer_shape_func; + std::vector> + predicate2funcsCX86; }; template diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.cc b/paddle/cinn/hlir/framework/pir/compilation_cache.cc index 1c5322c38866e..86f65bfb5c8db 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.cc @@ -37,11 +37,22 @@ void* BackendResource::GetInferFuncPtr() const { return ptr; } +void* BackendResource::GetCX86HostFuncPtr() const { + VLOG(4) << "Lookup kernel name: " << host_fn_name_ + "_CX86"; + void* ptr = backend_compiler_->Lookup(host_fn_name_ + "_CX86"); + PADDLE_ENFORCE_NOT_NULL( + ptr, + ::common::errors::InvalidArgument("Can't find kernel function %s", + host_fn_name_ + "_CX86")); + return ptr; +} + pir::CINNKernelInfo BackendResource::GenerateKernelInfo() const { pir::CINNKernelInfo kernel_info; kernel_info.fn_name = host_fn_name_; kernel_info.fn_ptr = GetHostFuncPtr(); kernel_info.infer_shape_fn_ptr = GetInferFuncPtr(); + kernel_info.CX86_fn_ptr = GetCX86HostFuncPtr(); kernel_info.int_args_map = GetIntArgsMap(); return kernel_info; } diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.h b/paddle/cinn/hlir/framework/pir/compilation_cache.h index 0294755d399ef..f0f6c53380395 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.h +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.h @@ -41,6 +41,7 @@ class BackendResource final { void* GetHostFuncPtr() const; void* GetInferFuncPtr() const; + void* GetCX86HostFuncPtr() const; const std::map& GetIntArgsMap() const { return int_args_map_; } diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.cc b/paddle/cinn/hlir/framework/pir/compilation_task.cc index 1304979d14a61..39ddcf8291306 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_task.cc @@ -29,6 +29,11 @@ void GroupCompilationContext::SetLoweredFuncs( predicates_.push_back(std::move(predicate2func.first)); lowered_funcs_.push_back(std::move(predicate2func.second)); } + for (std::pair& predicate2func : + funcs.predicate2funcsCX86) { + CX86_predicates_.push_back(std::move(predicate2func.first)); + CX86_lowered_funcs_.push_back(std::move(predicate2func.second)); + } infer_shape_lowered_func_ = std::move(funcs.infer_shape_func); } @@ -73,11 +78,24 @@ std::shared_ptr CompilationTask::CodegenAndJit() { } builder.SetInferShapeFunc(context_->infer_shape_lowered_func_); ir::Module ir_module = builder.Build(); - return BuildPirCINNKernelInfo(ir_module); + + ir::Module::Builder builder_CX86(cinn::common::UniqName("module"), + common::DefaultHostTarget()); + CHECK_EQ(context_->CX86_predicates_.size(), + context_->CX86_lowered_funcs_.size()); + for (const ir::Expr& predicate : context_->CX86_predicates_) { + builder_CX86.AddPredicate(predicate); + } + for (const ir::LoweredFunc& func : context_->CX86_lowered_funcs_) { + builder_CX86.AddFunction(func); + } + ir::Module ir_moduleCX86 = builder_CX86.Build(); + + return BuildPirCINNKernelInfo(ir_module, ir_moduleCX86); } std::shared_ptr CompilationTask::BuildPirCINNKernelInfo( - const ir::Module& module) { + const ir::Module& module, const ir::Module& CX86module) { auto compilation_result = std::make_shared(context_->target_); auto backend_resource = std::make_shared( @@ -86,7 +104,8 @@ std::shared_ptr CompilationTask::BuildPirCINNKernelInfo( context_->group_->FuncName() + "_infer_shape", context_->group_->int_args_map()); VLOG(5) << "Start to compile module into cuda kernel..."; - backend_resource->GetBackendCompiler()->Build(module, ""); + backend_resource->GetBackendCompiler()->Build(module, "", false); + backend_resource->GetBackendCompiler()->AppendCX86(CX86module); compilation_result->SetBackendResource(backend_resource); VLOG(5) << "End to compile module into cuda kernel."; return compilation_result; diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.h b/paddle/cinn/hlir/framework/pir/compilation_task.h index d104d264b6852..1ed3e2d5e6217 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.h +++ b/paddle/cinn/hlir/framework/pir/compilation_task.h @@ -42,6 +42,8 @@ class GroupCompilationContext { const pir::OpLoweringGroupPtr& group_; std::vector predicates_; std::vector lowered_funcs_; + std::vector CX86_predicates_; + std::vector CX86_lowered_funcs_; ir::LoweredFunc infer_shape_lowered_func_; }; @@ -56,7 +58,7 @@ class CompilationTask { void Lowering(); std::shared_ptr CodegenAndJit(); std::shared_ptr BuildPirCINNKernelInfo( - const ir::Module& module); + const ir::Module& module, const ir::Module& CX86module); GroupCompilationContext* context_; }; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc index e5187f47ab471..e23ec953431c0 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc @@ -145,8 +145,9 @@ std::shared_ptr OpLoweringGroup::Clone( ops_mapper[op] = new_op; } + const auto new_fn_name = this->fn_name_ + "_cloned"; // Construct Base information for new Group - auto new_group = std::make_shared(new_ops); + auto new_group = std::make_shared(new_ops, new_fn_name); for (auto* op : this->output_ops_) { new_group->output_ops_.insert(ops_mapper.at(op)); } diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.h b/paddle/cinn/hlir/framework/pir/op_lowering_group.h index 935e759ed2331..7595985d4d5b9 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.h +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.h @@ -42,15 +42,13 @@ class OpLoweringGroup { OpLoweringGroup(const OpLoweringGroup&) = delete; OpLoweringGroup(OpLoweringGroup&&) = delete; - explicit OpLoweringGroup(const std::vector<::pir::Operation*>& group_ops) - : ops_(group_ops) { - fn_name_ = CompatibleInfo::GroupOpsName(ops_); - } + explicit OpLoweringGroup(const std::vector<::pir::Operation*>& group_ops, + const std::string& fn_name) + : ops_(group_ops), fn_name_(fn_name) {} - explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops) - : ops_(group_ops) { - fn_name_ = CompatibleInfo::GroupOpsName(ops_); - } + explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops, + const std::string& fn_name) + : ops_(group_ops), fn_name_(fn_name) {} const std::string& FuncName() const { return this->fn_name_; } ::pir::Block* GetParentBlock() const; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc index 8ba8753a84eaf..4c4362aec935d 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc @@ -20,6 +20,7 @@ #include "paddle/cinn/ast_gen_ius/tensor_group.h" #include "paddle/cinn/backends/codegen_device_util.h" #include "paddle/cinn/common/dim_expr_converter.h" +#include "paddle/cinn/common/target.h" #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h" #include "paddle/cinn/hlir/framework/compile_error.h" @@ -124,19 +125,9 @@ std::shared_ptr OpLowererImpl::GetGroupInfo( } } - BuildBroadcastInfo(group, group_info); - for (auto& op : group->output_ops()) { group_info->direct_output_var_names.insert(ValueName(op->result(0))); // collect all output tensor. - if (op->name() == "cinn_op.yield_store") { - auto input_var_name = ValueName(op->operand_source(0)); - if (group_info->broadcast_info.count(input_var_name)) { - auto base_info = group_info->broadcast_info[input_var_name]; - base_info.with_constrain = true; - group_info->broadcast_info[ValueName(op->result(0))] = base_info; - } - } for (auto opresult : op->results()) { if (tensor_map.count(opresult) == 0) { continue; @@ -146,13 +137,7 @@ std::shared_ptr OpLowererImpl::GetGroupInfo( } for (const auto& val : group->output_values()) { - if (val.defining_op()->name() == "cinn_op.reshape" && - erase_reshape.count(val.defining_op())) { - group_info->direct_output_var_names.insert( - ValueName(val.defining_op()->operand_source(0))); - } else { - group_info->direct_output_var_names.insert(ValueName(val)); - } + group_info->direct_output_var_names.insert(ValueName(val)); } return group_info; } @@ -207,6 +192,8 @@ BucketLoweredFuncsWrapper OpLowererImpl::BucketLower( if (ops.size() == 1 && ops[0]->name() == "custom_call") { return {{{ir::Expr(1), LowerCustomCall(group)[0]}}, ir::LoweredFunc()}; } + auto X86Expr = LowerX86(group, ops, apply_op_schedule); + VLOG(3) << "After x86 lower, ir is: \n" << X86Expr; std::vector group_func_arg_tensors; std::unordered_map<::pir::Value, ir::Tensor> tensor_map; @@ -272,6 +259,9 @@ BucketLoweredFuncsWrapper OpLowererImpl::BucketLower( ir_sch.GetModule().GetExprs()[0]); } + // The last func is stored as a kernel on x86 + cond2func_bodies.emplace_back(ir::Expr(true), X86Expr); + // 3.Do post-processing, // including preparing function args and temporary variables, // applying low-level optimization passes, etc. @@ -296,10 +286,16 @@ BucketLoweredFuncsWrapper OpLowererImpl::BucketLower( "The size of funcs and cond2func_bodies should be " "the same.")); BucketLoweredFuncsWrapper funcs_wrapper; - for (int i = 0; i < funcs.size(); ++i) { + for (int i = 0; i < funcs.size() - 1; ++i) { funcs_wrapper.predicate2funcs.emplace_back(cond2func_bodies[i].first, funcs[i]); } + // The last func is x86 kernel. + for (size_t i = funcs.size() - 1; i < funcs.size(); ++i) { + funcs[i]->name = funcs[i]->name + "_CX86"; + funcs_wrapper.predicate2funcsCX86.emplace_back(cond2func_bodies[i].first, + funcs[i]); + } funcs_wrapper.infer_shape_func = GenerateInferShapeFunc(group, infer_shape_tensor_args, group_func_args); @@ -514,159 +510,6 @@ std::vector OpLowererImpl::LowerGroup( &infer_shape_args); } -void OpLowererImpl::BuildBroadcastInfo(const OpLoweringGroupPtr& group, - std::shared_ptr group_info) { - // TODO(phlrain): this is primary verion for loop aligment - // will be update by a new method - auto& align_info = group->mut_alignment_schedule_info(); - - auto& ops = group->ops(); - for (auto op1 : ops) { - auto it = align_info.find(op1); - if (it == align_info.end()) { - continue; - } - if (op1->name() == "cinn_op.generate_shape") { - continue; - } - - if (it->second.size() > 1) { - for (size_t i = 0; i < it->second.size(); ++i) { - } - // TODO(phlran): merge to factor info here - it->second.front().factor_info = it->second.back().factor_info; - it->second.resize(1); - } - - PADDLE_ENFORCE_EQ( - it->second.size(), - 1, - phi::errors::Unimplemented("%s, only suppopt one transform yet", - it->first->name())); - - if (it->second[0].type == ScheduleAlignType::kBroadcast) { - // get broadcast op - auto broadcast_axes = it->second[0].axis_info; - auto output_shape = it->second[0].factor_info; - - phi::DDim in_dim; - - if (it->first->name() == "cinn_op.reshape") { - // TODO(phlrain): deal with reshape in a better way - if (it->first->result(0).use_count() == 1 && - it->first->result(0).first_use().owner()->isa<::pir::YieldOp>()) { - continue; - } - } - - if ((it->first->name() != "cinn_op.reshape") && - (it->first->name() != "cinn_op.broadcast") && - (it->first->num_operands() == 1)) { - in_dim = it->first->operand_source(0) - .type() - .dyn_cast() - .dims(); - } else { - in_dim = it->first->result(0) - .type() - .dyn_cast() - .dims(); - } - - cinn::ir::BroadcastInfo info; - if (in_dim.size() == 1u && in_dim[0] == 1u) { - info.full_broadcast = true; - for (size_t i = 0; i < output_shape.size(); ++i) { - info.broadcast_axes.push_back(i); - info.output_shape.push_back(-1); - info.output_dim_expr.push_back(group->loop_ranges_expr()[i]); - } - } else if (in_dim.size() == broadcast_axes.size()) { - if (in_dim.size() != output_shape.size()) { - info.split_first = true; - - if (broadcast_axes.size() == 1) { - std::vector temp_shape(output_shape.size(), 1); - temp_shape[broadcast_axes[0]] = output_shape[broadcast_axes[0]]; - info.split_info.emplace_back(0, temp_shape); - - for (size_t i = 0; i < output_shape.size(); ++i) { - if (i != broadcast_axes[0]) { - info.broadcast_axes.push_back(i); - info.output_shape.push_back(output_shape[i]); - } - } - } else { - throw std::runtime_error("not support multi dim broadcast yet"); - } - } else { - for (size_t i = 0; i < broadcast_axes.size(); ++i) { - if (in_dim[i] < 0 || output_shape[broadcast_axes[i]] < 0) { - continue; - } - if (in_dim[i] != output_shape[broadcast_axes[i]]) { - if (in_dim[i] != 1) { - throw std::runtime_error("Only support 1 - D broadcast "); - } - info.broadcast_axes.push_back(i); - info.output_shape.push_back(output_shape[broadcast_axes[i]]); - } - } - } - } else { - // only deal with broadcast axes - std::set axes_set; - for (size_t i = 0; i < broadcast_axes.size(); ++i) { - axes_set.insert(broadcast_axes[i]); - if (in_dim[broadcast_axes[i]] != 1) { - throw std::runtime_error("Only support 1 - D broadcast "); - } - - info.broadcast_axes.push_back(broadcast_axes[i]); - info.output_shape.push_back(output_shape[broadcast_axes[i]]); - } - } - - for (size_t i = 0; i < it->first->num_operands(); ++i) { - if (!align_info.count(it->first->operand_source(i).defining_op())) { - info.first_broadcast = true; - break; - } - } - - auto op_out = it->first->result(0); - info.op_name = it->first->name(); - - if (op_out.use_count() == 1 && - op_out.first_use().owner()->name() == "cf.yield") { - info.with_constrain = true; - } - - if (erase_reshape.count(op_out.first_use().owner())) { - info.with_constrain = true; - } - - group_info->broadcast_info[ValueName(op_out)] = info; - - for (auto use_it = op_out.use_begin(); use_it != op_out.use_end(); - ++use_it) { - if (use_it->owner()->name() == "cf.yield") { - continue; - } - if (CompatibleInfo::OpKind(*(use_it->owner())) == - framework::kBroadcast) { - if (!info.full_broadcast) { - group_info->broadcast_to_elementwise[ValueName( - use_it->owner()->result(0))] = info; - } - } - } - } else { - throw std::runtime_error("only supportbroadcast type for now"); - } - } -} - std::vector OpLowererImpl::LowerCustomCall( const OpLoweringGroupPtr& group) { const auto& ops = group->ops(); @@ -777,10 +620,6 @@ std::vector OpLowererImpl::PostProcess( } } infer_shape_arg_tensor->push_back(tensor); - if ((op_result.defining_op()->name() == "cinn_op.reshape") && - erase_reshape.count(op_result.defining_op())) { - tensor = tensor_map.at(op_result.defining_op()->operand_source(0)); - } if (arg_name_set.count(tensor->buffer->name) != 0) { continue; @@ -846,18 +685,21 @@ std::vector OpLowererImpl::PostProcess( } } std::vector lowered_funcs; - for (ir::Expr func_body : func_bodies) { + for (int i = 0; i < func_bodies.size(); ++i) { + ir::Expr func_body = func_bodies[i]; optim::EliminateDeadScheduleBlock(&(func_body), group->output_names()); - cinn::common::DefaultDeviceTarget().arch.Match( - [&](std::variant) {}, - [&](common::NVGPUArch) { + if (i != func_bodies.size() - 1) { + cinn::common::DefaultDeviceTarget().arch.Match( + [&](std::variant) {}, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - optim::EliminateCommonGlobalMemoryRead(&(func_body)); - optim::OptimizeExprGPU(&(func_body)); + optim::EliminateCommonGlobalMemoryRead(&(func_body)); + optim::OptimizeExprGPU(&(func_body)); #endif - }); + }); + } // 2.Prepare temp buffers auto temp_buffers = @@ -869,8 +711,13 @@ std::vector OpLowererImpl::PostProcess( func->PrepareBufferCastExprs(); } // 4.Apply low level pass - func = optim::Optimize(Expr(func), target_, false).as_lowered_func_ref(); - optim::RearrangeLoadInstruction(&(func->body)); + if (i != func_bodies.size() - 1) { + func = optim::Optimize(Expr(func), target_, false).as_lowered_func_ref(); + optim::RearrangeLoadInstruction(&(func->body)); + } else { + func = optim::Optimize(Expr(func), common::DefaultHostTarget(), false) + .as_lowered_func_ref(); + } lowered_funcs.push_back(std::move(func)); } @@ -1327,6 +1174,73 @@ ir::LoweredFunc OpLowererImpl::GenerateInferShapeFunc( {}); return infer_shape_func; } +ir::Expr OpLowererImpl::LowerX86(const OpLoweringGroupPtr& group, + const std::vector<::pir::Operation*>& ops, + bool apply_op_schedule) { + std::vector group_func_arg_tensors; + std::unordered_map<::pir::Value, ir::Tensor> tensor_map; + // for some op, it will output more tmp value and regard as + // XX_0, XX_1, so we log them in tmp_tensor_info; + std::unordered_map tmp_tensor_info; + + auto need_lower_x86 = [&]() -> bool { + for (auto* op : ops) { + for (size_t i = 0; i < op->num_operands(); ++i) { + auto in = op->operand_source(i); + auto type_info = in.type().dyn_cast(); + auto dtype = type_info.dtype(); + const auto& dims = type_info.dims(); + std::vector sym_shape; + // 1. dynamic shape not need lower x86 + if (::common::contain_unknown_dim(dims)) { + return false; + } + // 2. size < 4 not need lower x86 + int64_t sym_shape_size = 1; + for (int i = 0; i < dims.size(); ++i) { + sym_shape_size *= dims[i]; + if (sym_shape_size > 4) { + return false; + } + } + } + + std::vector out_types; + std::vector> out_shapes; + CollectOutputInfo(op, &out_types, &out_shapes, group); + for (const auto& tt : out_types) { + // 3. float16 not need lower x86 + if (tt.is_float16()) { + return false; + } + } + } + return true; + }; + if (!need_lower_x86()) { + return ir::Expr(-1); + } + + this->target_ = common::DefaultHostTarget(); + cinn::runtime::CurrentTarget::SetCurrentTarget(this->target_); + + std::vector func_bodies = + LowerOps(group, + ops, + apply_op_schedule, + &OpLowererImpl::DyShapeScheduleDetermineFunction, + &group_func_arg_tensors, + &tensor_map, + &tmp_tensor_info); + this->target_ = common::DefaultNVGPUTarget(); + cinn::runtime::CurrentTarget::SetCurrentTarget(this->target_); + ir::ModuleExpr mod_expr(func_bodies); + ir::IRSchedule ir_sch( + mod_expr, -1, false, cinn::utils::ErrorMessageLevel::kGeneral, true); + ir_sch.MergeExprs(); + auto X86Expr = ir::ir_utils::IRCopy(ir_sch.GetModule().GetExprs().at(0)); + return X86Expr; +} } // namespace pir } // namespace framework diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.h b/paddle/cinn/hlir/framework/pir/op_lowering_impl.h index 838b70da20fa5..9edb88ec3e431 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.h +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.h @@ -57,10 +57,6 @@ struct GroupInfo { std::set shared_var_names; std::set direct_output_var_names; std::vector broadcast_output_names; - - std::unordered_map broadcast_info; - std::unordered_map - broadcast_to_elementwise; }; class OpLowererImpl : public OpLowererImplBase { @@ -296,12 +292,11 @@ class OpLowererImpl : public OpLowererImplBase { void BuildBroadcastInfo(const OpLoweringGroupPtr& group, std::shared_ptr group_info); - Target target_; - + ir::Expr LowerX86(const OpLoweringGroupPtr& group, + const std::vector<::pir::Operation*>& ops, + bool apply_op_schedule); PrettyNamer* name_gene_; - - std::unordered_set<::pir::Operation*> erase_reshape; }; } // namespace pir diff --git a/paddle/cinn/hlir/framework/pir/utils.h b/paddle/cinn/hlir/framework/pir/utils.h index c489e1847f26f..e3e4e8163cfb9 100644 --- a/paddle/cinn/hlir/framework/pir/utils.h +++ b/paddle/cinn/hlir/framework/pir/utils.h @@ -33,6 +33,7 @@ struct CINNKernelInfo { std::string fn_name; void* fn_ptr; void* infer_shape_fn_ptr; + void* CX86_fn_ptr; struct ArgDimIdx { int arg_idx; diff --git a/paddle/cinn/hlir/framework/pir_compiler.cc b/paddle/cinn/hlir/framework/pir_compiler.cc index 666ae3d340138..2b13f8a0a5d9c 100644 --- a/paddle/cinn/hlir/framework/pir_compiler.cc +++ b/paddle/cinn/hlir/framework/pir_compiler.cc @@ -77,6 +77,8 @@ std::vector PirCompiler::Build( auto worker_fn = [&](int index) { CompilationTask task(&group_compilation_contexts[index]); compilation_results[index] = task(); + // Triggering llvm compilation in thread + compilation_results[index]->GetKernelInfo(); }; utils::parallel_run(worker_fn, utils::SequenceDispatcher(0, task_size), diff --git a/paddle/cinn/hlir/pass/alterlayout.cc b/paddle/cinn/hlir/pass/alterlayout.cc index 74c8c0915e0af..a747c57dd77af 100644 --- a/paddle/cinn/hlir/pass/alterlayout.cc +++ b/paddle/cinn/hlir/pass/alterlayout.cc @@ -20,7 +20,7 @@ #include "paddle/cinn/hlir/pe/schedule.h" #include "paddle/cinn/ir/layout.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -119,10 +119,26 @@ std::vector UpdateInferInfos( CHECK(!infertypes.empty()) << node->op()->name << " finds no infertype"; CHECK(!inferlayouts.empty()) << node->op()->name << " finds no inferlayout"; auto outlinks = node->outlinks_in_order(); - CHECK_EQ(infershapes.size(), infertypes.size()); - CHECK_EQ(inferlayouts.size(), 2U); - CHECK_EQ(infertypes.size(), inferlayouts[0].size()); - CHECK_EQ(outlinks.size(), infershapes.size()); + PADDLE_ENFORCE_EQ( + infershapes.size(), + infertypes.size(), + phi::errors::InvalidArgument( + "The size of infershapes and infertypes should be equal")); + PADDLE_ENFORCE_EQ(inferlayouts.size(), + 2U, + phi::errors::InvalidArgument( + "The size of inferlayouts should be 2, but got %d", + inferlayouts.size())); + PADDLE_ENFORCE_EQ( + infertypes.size(), + inferlayouts[0].size(), + phi::errors::InvalidArgument( + "The size of infertypes and inferlayouts[0] should be equal")); + PADDLE_ENFORCE_EQ( + outlinks.size(), + infershapes.size(), + phi::errors::InvalidArgument( + "The size of outlinks and infershapes should be equal")); for (int i = 0; i < outlinks.size(); i++) { auto* sink = outlinks[i]->sink(); @@ -181,7 +197,11 @@ void AlterLayoutPass(Graph* graph) { node->attrs.attr_store.at("dilation")); } const auto& conv_inlinks = node->inlinks_in_order(); - CHECK_EQ(conv_inlinks.size(), 2U) << "conv2d should have 2 inputs"; + PADDLE_ENFORCE_EQ(conv_inlinks.size(), + 2U, + phi::errors::InvalidArgument( + "conv2d should have 2 inputs, but got %d", + conv_inlinks.size())); std::vector> inputs_shape; for (auto& link : conv_inlinks) { auto* source = link->source(); @@ -231,8 +251,11 @@ void AlterLayoutPass(Graph* graph) { input_nodes.push_back(source); } // get new layout: ic_bn, oc_bn - CHECK_EQ(input_nodes.size(), 2U) - << "conv2d should have 2 input nodes"; + PADDLE_ENFORCE_EQ(input_nodes.size(), + 2U, + phi::errors::InvalidArgument( + "conv2d should have 2 input nodes, but got %d", + input_nodes.size())); auto* input_node = input_nodes[0]; auto* weight_node = input_nodes[1]; CHECK(shape_dict.count(input_node->id())) @@ -347,8 +370,11 @@ void AlterLayoutPass(Graph* graph) { conv2d_NCHWc_inputtypes.push_back(trans_out_dtypes); conv2d_NCHWc_inputlayouts.push_back(dst_input_layout); } else { - CHECK_EQ(input_shape.size(), 5U) - << "conv2d_NCHWc op's input shape dim should be 5"; + PADDLE_ENFORCE_EQ( + input_shape.size(), + 5U, + phi::errors::InvalidArgument( + "conv2d_NCHWc op's input shape dim should be 5")); conv2d_NCHWc_inputshapes.push_back(input_shape); conv2d_NCHWc_inputtypes.push_back(input_type); CHECK(layout_dict.count(input_node->id())) @@ -395,8 +421,11 @@ void AlterLayoutPass(Graph* graph) { conv2d_NCHWc_inputtypes.push_back(trans_out_dtypes); conv2d_NCHWc_inputlayouts.push_back(dst_kernel_layout); } else { - CHECK_EQ(weight_shape.size(), 6U) - << weight_node->id() << " shape dim should be 6"; + PADDLE_ENFORCE_EQ( + weight_shape.size(), + 6U, + phi::errors::InvalidArgument( + "conv2d_NCHWc op's weight shape dim should be 6")); conv2d_NCHWc_inputshapes.push_back(weight_shape); conv2d_NCHWc_inputtypes.push_back(weight_type); CHECK(layout_dict.count(weight_node->id())) @@ -477,12 +506,29 @@ void AlterLayoutPass(Graph* graph) { input_shapes, input_layouts, node->attrs, graph->target_); // if input inferred layouts is different from original's, expand dims // or do transformation. - CHECK_EQ(inferlayouts.size(), 2U); + PADDLE_ENFORCE_EQ( + inferlayouts.size(), + 2U, + phi::errors::InvalidArgument( + "The size of inferlayouts should be 2, but got %d", + inferlayouts.size())); auto new_input_layouts = inferlayouts[1]; auto inlinks = node->inlinks_in_order(); - CHECK_EQ(input_layouts.size(), inlinks.size()); - CHECK_EQ(input_layouts.size(), new_input_layouts.size()); - CHECK_EQ(input_layouts.size(), input_shapes.size()); + PADDLE_ENFORCE_EQ( + input_layouts.size(), + inlinks.size(), + phi::errors::InvalidArgument( + "The size of input_layouts and inlinks should be equal")); + PADDLE_ENFORCE_EQ(input_layouts.size(), + new_input_layouts.size(), + phi::errors::InvalidArgument( + "The size of input_layouts and " + "new_input_layouts should be equal")); + PADDLE_ENFORCE_EQ( + input_layouts.size(), + input_shapes.size(), + phi::errors::InvalidArgument("The size of input_layouts and " + "input_shapes should be equal")); bool reset_axis = false; for (int i = 0; i < inlinks.size(); i++) { if (input_layouts[i] != new_input_layouts[i]) { diff --git a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc index 0326a4a5fce33..c0bccf285c730 100644 --- a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc +++ b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc @@ -27,7 +27,7 @@ #include "paddle/cinn/hlir/framework/visualize_helper.h" #include "paddle/cinn/hlir/pass/fusion_helper_base.h" #include "paddle/cinn/runtime/custom_function.h" - +#include "paddle/common/enforce.h" namespace cinn::hlir::pass { using framework::Graph; @@ -529,8 +529,10 @@ std::vector CheckFusionAccuracyPass::TopologicalOrder( } } - CHECK_EQ(ordered_nodes.size(), nodes.size()) - << "There has circle in group! Please check."; + PADDLE_ENFORCE_EQ( + ordered_nodes.size(), + nodes.size(), + phi::errors::InvalidArgument("There has circle in group! Please check.")); return ordered_nodes; } diff --git a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc index 10f5c83e6600d..447da47e147dc 100644 --- a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc +++ b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc @@ -19,7 +19,7 @@ #include "paddle/cinn/common/target.h" #include "paddle/cinn/frontend/decomposer/test_helper.h" - +#include "paddle/common/enforce.h" namespace cinn::frontend { using hlir::framework::Graph; @@ -96,7 +96,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -134,7 +138,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_1) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -175,7 +183,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_2) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D", "E", "F"}); } @@ -216,7 +228,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_3) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D", "E", "F"}); } @@ -257,7 +273,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_4) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D", "E", "F"}); } @@ -291,7 +311,11 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_5) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B"}); } @@ -328,7 +352,11 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_0) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -365,7 +393,11 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_2) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -404,7 +436,11 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_4) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D", "E"}); } @@ -443,7 +479,11 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_5) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D", "E"}); } @@ -479,7 +519,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_0) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B"}); } @@ -514,7 +558,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_1) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B"}); } @@ -552,7 +600,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_2) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C"}); } @@ -590,7 +642,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_3) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -629,7 +685,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_4) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B", "C", "D"}); } @@ -665,7 +725,11 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_5) { VLOG(1) << "After CheckFusionAccuracyPass:\n" << graph->DebugGroupedGraph(std::unordered_set{}); - CHECK_EQ(graph->fusion_groups.size(), group_size_after); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + group_size_after, + phi::errors::InvalidArgument( + "The number of fusion groups is not equal to the " + "number of groups after the pass.")); RunTest(target, graph, {"A", "B"}); } diff --git a/paddle/cinn/hlir/pass/constant_folding_pass_util.cc b/paddle/cinn/hlir/pass/constant_folding_pass_util.cc index 748948f2206fc..a6fb84f76b832 100644 --- a/paddle/cinn/hlir/pass/constant_folding_pass_util.cc +++ b/paddle/cinn/hlir/pass/constant_folding_pass_util.cc @@ -21,7 +21,7 @@ #include "paddle/cinn/hlir/op/op_util.h" #include "paddle/cinn/utils/functional.h" #include "paddle/cinn/utils/type_defs.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -238,7 +238,10 @@ void fold_expand_dims_fill_constant(const FusionHelperBase* helper, // [0, total_size-1]. check axes can't repeat. std::sort(axes.begin(), axes.end(), std::less()); for (int idx = 0; idx < axes_size - 1; ++idx) { - CHECK_NE(axes[idx], axes[idx + 1]); + PADDLE_ENFORCE_NE(axes[idx], + axes[idx + 1], + phi::errors::InvalidArgument( + "The axes of expand_dims should not repeat.")); } // insert 1 to new shape. std::vector n_shape(total_size, 1); diff --git a/paddle/cinn/hlir/pass/dce_pass.cc b/paddle/cinn/hlir/pass/dce_pass.cc index b17f8ee4de5d9..2a68e90bc342a 100644 --- a/paddle/cinn/hlir/pass/dce_pass.cc +++ b/paddle/cinn/hlir/pass/dce_pass.cc @@ -16,7 +16,7 @@ #include "paddle/cinn/common/type.h" #include "paddle/cinn/hlir/pass/op_fusion_pass_util.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -118,7 +118,10 @@ class DceHelper : public FusionHelperBase { }; void DCEPassInternal(Graph* graph) { - CHECK_GT(graph->outputs.size(), 0); + PADDLE_ENFORCE_GT(graph->outputs.size(), + 0, + phi::errors::InvalidArgument( + "The graph should have at least one output node.")); DceHelper dce_helper(graph); dce_helper(); } diff --git a/paddle/cinn/hlir/pass/dce_pass_test.cc b/paddle/cinn/hlir/pass/dce_pass_test.cc index bb9c5d7654851..1ebc0878ee2cb 100644 --- a/paddle/cinn/hlir/pass/dce_pass_test.cc +++ b/paddle/cinn/hlir/pass/dce_pass_test.cc @@ -15,7 +15,7 @@ #include #include "paddle/cinn/frontend/decomposer/test_helper.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace frontend { @@ -36,7 +36,10 @@ TEST(DCE, Test_0) { std::make_shared(program, fetch_ids, target); hlir::framework::ApplyPass(graph.get(), "DCE"); - CHECK_EQ(graph->nodes().size(), 4); + PADDLE_ENFORCE_EQ( + graph->nodes().size(), + 4, + phi::errors::InvalidArgument("The graph nodes's size should be 4.")); } TEST(DCE, Test_1) { @@ -59,7 +62,10 @@ TEST(DCE, Test_1) { auto graph = std::make_shared(program, fetch_ids, target); hlir::framework::ApplyPass(graph.get(), "DCE"); - CHECK_EQ(graph->nodes().size(), 8); + PADDLE_ENFORCE_EQ( + graph->nodes().size(), + 8, + phi::errors::InvalidArgument("The graph nodes's size should be 8.")); } } // namespace frontend diff --git a/paddle/cinn/hlir/pass/dense_merge_pass.cc b/paddle/cinn/hlir/pass/dense_merge_pass.cc index a726aa1a36c1a..1fc5e4a52b60d 100644 --- a/paddle/cinn/hlir/pass/dense_merge_pass.cc +++ b/paddle/cinn/hlir/pass/dense_merge_pass.cc @@ -15,7 +15,7 @@ #include "paddle/cinn/common/graph_utils.h" #include "paddle/cinn/common/type.h" #include "paddle/cinn/hlir/pass/fusion_helper_base.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -100,7 +100,13 @@ class DenseMergePassHelper : public FusionHelperBase { std::unordered_map> dense_op_map; for (auto dense_op : dense_ops) { const auto& in_links = dense_op->inlinks_in_order(); - CHECK_GT(in_links.size(), pos); + PADDLE_ENFORCE_GT(in_links.size(), + pos, + phi::errors::InvalidArgument( + "The input link size of dense op should be greater " + "than %d, but got %d.", + pos, + in_links.size())); auto sign = GenOpSign(in_links[pos]->source()->safe_as(), dense_op->attrs); if (dense_op_map.count(sign)) { @@ -131,7 +137,14 @@ class DenseMergePassHelper : public FusionHelperBase { const auto& in_links = op->inlinks_in_order(); node->UnLinkSingleTo(op); // link to new node - CHECK_GT(in_links.size(), pos); + PADDLE_ENFORCE_GT( + in_links.size(), + pos, + phi::errors::InvalidArgument("The input link size of dense " + "op should be greater than %d, " + "but got %d.", + pos, + in_links.size())); in_links[pos]->source()->LinkTo(node_tmp); // unlink old dense node in_links[pos]->source()->UnLinkSingleTo(op); diff --git a/paddle/cinn/hlir/pass/dot_merger.cc b/paddle/cinn/hlir/pass/dot_merger.cc index 941cf6b29b66c..6e4e4108ecd91 100644 --- a/paddle/cinn/hlir/pass/dot_merger.cc +++ b/paddle/cinn/hlir/pass/dot_merger.cc @@ -16,7 +16,7 @@ #include "paddle/cinn/hlir/framework/graph.h" #include "paddle/cinn/hlir/framework/pass.h" #include "paddle/cinn/hlir/pass/infershape.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -368,9 +368,12 @@ class DotMergerPass { input_operand(merge_nodes[i - 1], axis)->id()); auto shape_b = builder->shape_dict().at(input_operand(merge_nodes[i], axis)->id()); - CHECK_EQ(shape_a[1 - axis], shape_b[1 - axis]) - << "The shape of matmul is error. " << shape_a.size() << ", " - << shape_b.size(); + PADDLE_ENFORCE_EQ( + shape_a[1 - axis], + shape_b[1 - axis], + phi::errors::InvalidArgument("The shape of matmul is error. %d, %d", + shape_a.size(), + shape_b.size())); concat_nodes.push_back(input_operand(merge_nodes[i], axis)); } auto* concat_out = builder->Concat(axis, concat_nodes); @@ -444,9 +447,12 @@ class DotMergerPass { auto shape_shared = builder->shape_dict().at(shared_input->id()); auto shape_a = builder->shape_dict().at(input_a->id()); auto shape_b = builder->shape_dict().at(input_b->id()); - CHECK_EQ(shape_a[1 - axis], shape_b[1 - axis]) - << "The shape of matmul is error. " << shape_a.size() << ", " - << shape_b.size(); + PADDLE_ENFORCE_EQ( + shape_a[1 - axis], + shape_b[1 - axis], + phi::errors::InvalidArgument("The shape of matmul is error. %d, %d", + shape_a.size(), + shape_b.size())); auto* concat_out = builder->Concat(axis, {input_a, input_b}); NodeData* matmul_out{}; if (!lhs) { diff --git a/paddle/cinn/hlir/pass/fusion_helper_base.h b/paddle/cinn/hlir/pass/fusion_helper_base.h index 3437b334fa5df..79580815d91bf 100644 --- a/paddle/cinn/hlir/pass/fusion_helper_base.h +++ b/paddle/cinn/hlir/pass/fusion_helper_base.h @@ -23,7 +23,7 @@ #include "paddle/cinn/hlir/framework/pass.h" #include "paddle/cinn/hlir/pass/use_pass.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -104,7 +104,10 @@ class FusionHelperBase { shape_t GetNodeInputShape(const Node* node) const { auto node_datas = GetProducerNodeData(node); - CHECK_GT(node_datas.size(), 0); + PADDLE_ENFORCE_GT( + node_datas.size(), + 0, + phi::errors::InvalidArgument("The input node should not be empty!")); CHECK(shape_dict_.count(node_datas[0]->id())) << "Can't find " << node_datas[0]->id() << " 's shape!"; return shape_dict_.at(node_datas[0]->id()); @@ -168,7 +171,10 @@ class FusionHelperBase { int GetSharedSize(const Node* node) const { auto producers = GetProducerNodeData(node); - CHECK_GT(producers.size(), 0); + PADDLE_ENFORCE_GT( + producers.size(), + 0, + phi::errors::InvalidArgument("The input node should not be empty!")); auto inshape = shape_dict_.at(producers[0]->id()); auto axes = absl::get>(node->attrs.attr_store.at("dim")); if (WithoutLastDimInReduce(inshape, axes)) { diff --git a/paddle/cinn/hlir/pass/fusion_merge_pass.cc b/paddle/cinn/hlir/pass/fusion_merge_pass.cc index fd023662f9050..0d93dd1593c4f 100644 --- a/paddle/cinn/hlir/pass/fusion_merge_pass.cc +++ b/paddle/cinn/hlir/pass/fusion_merge_pass.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/cinn/hlir/pass/fusion_merge_pass_util.h" - +#include "paddle/common/enforce.h" PD_DECLARE_bool(enhance_vertical_fusion_with_recompute); namespace cinn { @@ -705,7 +705,11 @@ class FusionMergePassHelper : public FusionHelperBase { } } - CHECK_GE(producer->consumer_groups().size(), candidates.size()); + PADDLE_ENFORCE_GE(producer->consumer_groups().size(), + candidates.size(), + phi::errors::InvalidArgument( + "The number of candidates should be less than or " + "equal to the number of consumer groups!")); if (producer->consumer_groups().size() == 0 && candidates.size() == 0 && output_nodes_set_.count(producer->CollectNodes()[0]) == 0) { producer->belong_groups.insert(*fusionable_consumers->begin()); @@ -959,8 +963,16 @@ class FusionMergePassHelper : public FusionHelperBase { CHECK(consumer->belong_groups.size()); consumers.insert(*consumer->belong_groups.begin()); } - CHECK_EQ(group->producer_groups().size(), producers.size()); - CHECK_EQ(group->consumer_groups().size(), consumers.size()); + PADDLE_ENFORCE_EQ(group->producer_groups().size(), + producers.size(), + phi::errors::InvalidArgument( + "The number of producers should be equal to the " + "number of producer groups!")); + PADDLE_ENFORCE_EQ(group->consumer_groups().size(), + consumers.size(), + phi::errors::InvalidArgument( + "The number of consumers should be equal to the " + "number of consumer groups!")); (*group->mut_producer_groups()) = producers; (*group->mut_consumer_groups()) = consumers; } diff --git a/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc b/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc old mode 100755 new mode 100644 index f6f9ecee97c43..14cc221edaaf0 --- a/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc +++ b/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc @@ -15,7 +15,7 @@ #include #include "paddle/cinn/frontend/decomposer/test_helper.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace frontend { @@ -39,9 +39,15 @@ TEST(FusionMergePass, ElementWise_Fusion_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, ElementWise_Fusion_1) { @@ -65,9 +71,15 @@ TEST(FusionMergePass, ElementWise_Fusion_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 4); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 4, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 4.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, ElementWise_Fusion_2) { @@ -94,9 +106,15 @@ TEST(FusionMergePass, ElementWise_Fusion_2) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 5); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 5, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 5.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, ElementWise_Fusion_3) { @@ -123,9 +141,15 @@ TEST(FusionMergePass, ElementWise_Fusion_3) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 5); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 5, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 5.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, ElementWise_Fusion_4) { @@ -152,9 +176,15 @@ TEST(FusionMergePass, ElementWise_Fusion_4) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 5); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 5, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 5.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, ElementWise_Fusion_5) { @@ -174,9 +204,15 @@ TEST(FusionMergePass, ElementWise_Fusion_5) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, Broadcast_Test_0) { @@ -199,9 +235,15 @@ TEST(FusionMergePass, Broadcast_Test_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, Broadcast_Test_1) { @@ -224,9 +266,15 @@ TEST(FusionMergePass, Broadcast_Test_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } TEST(FusionMergePass, Broadcast_Test_2) { @@ -249,9 +297,15 @@ TEST(FusionMergePass, Broadcast_Test_2) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); } TEST(FusionMergePass, Broadcast_Test_3) { @@ -274,9 +328,15 @@ TEST(FusionMergePass, Broadcast_Test_3) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); } TEST(FusionMergePass, Broadcast_Test_4) { @@ -301,9 +361,15 @@ TEST(FusionMergePass, Broadcast_Test_4) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 4); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 4, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 4.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); } TEST(FusionMergePass, Broadcast_Test_5) { @@ -328,9 +394,15 @@ TEST(FusionMergePass, Broadcast_Test_5) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 4); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 4, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 4.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); } TEST(FusionMergePass, Reduce_Test_0) { @@ -352,7 +424,10 @@ TEST(FusionMergePass, Reduce_Test_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 4); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 4, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 4.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); // CHECK_EQ(graph->fusion_groups.size(), 2); } @@ -375,9 +450,15 @@ TEST(FusionMergePass, Reduce_Test_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); } TEST(FusionMergePass, Reduce_Test_2) { @@ -401,9 +482,15 @@ TEST(FusionMergePass, Reduce_Test_2) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 2.")); } TEST(FusionMergePass, Reduce_Test_3) { @@ -427,7 +514,10 @@ TEST(FusionMergePass, Reduce_Test_3) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 4); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 4, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 4.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); // CHECK_EQ(graph->fusion_groups.size(), 3); } @@ -454,7 +544,10 @@ TEST(FusionMergePass, Reduce_Test_4) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 5); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 5, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 5.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); // CHECK_EQ(graph->fusion_groups.size(), 3); } @@ -478,9 +571,15 @@ TEST(FusionMergePass, Reduce_Test_5) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 3); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 3, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 3.")); hlir::framework::ApplyPass(graph.get(), "FusionMergePass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ(graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument( + "The graph fusion groups's size should be 1.")); } } // namespace frontend diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc b/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc index b9d553019a459..b27565194f293 100644 --- a/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc +++ b/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc @@ -25,7 +25,7 @@ #include "paddle/cinn/hlir/pass/general_fusion_merge_pass/lightware_fuse_pass.h" #include "paddle/cinn/hlir/pass/general_fusion_merge_pass/lightware_fuse_pass_ctx.h" #include "paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h" - +#include "paddle/common/enforce.h" PD_DECLARE_bool(enhance_vertical_fusion_with_recompute); namespace cinn { @@ -840,7 +840,11 @@ class GeneralFusionMergePassHelper : public FusionHelperBase { } } - CHECK_GE(producer->consumer_groups().size(), candidates.size()); + PADDLE_ENFORCE_GE( + producer->consumer_groups().size(), + candidates.size(), + phi::errors::Fatal("The number of candidates should be less than or " + "equal to the number of consumers.")); if (producer->consumer_groups().size() == 0 && candidates.size() == 0 && output_nodes_set_.count(producer->CollectNodes()[0]) == 0) { producer->belong_groups.insert(*fusionable_consumers->begin()); @@ -1035,8 +1039,14 @@ class GeneralFusionMergePassHelper : public FusionHelperBase { CHECK(consumer->belong_groups.size()); consumers.insert(*consumer->belong_groups.begin()); } - CHECK_EQ(group->producer_groups().size(), producers.size()); - CHECK_EQ(group->consumer_groups().size(), consumers.size()); + PADDLE_ENFORCE_EQ( + group->producer_groups().size(), + producers.size(), + phi::errors::InvalidArgument("Producer size is not equal!")); + PADDLE_ENFORCE_EQ( + group->consumer_groups().size(), + consumers.size(), + phi::errors::InvalidArgument("Consumer size is not equal!")); (*group->mut_producer_groups()) = producers; (*group->mut_consumer_groups()) = consumers; } diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h b/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h index 2195d4a4f947b..a8ccbcef27a16 100644 --- a/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h +++ b/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h @@ -16,7 +16,7 @@ #include "paddle/cinn/api/op_group.h" #include "paddle/cinn/hlir/pass/fusion_merge_pass_util.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -135,7 +135,10 @@ inline bool WithoutLastDimInReduce(const api::Shape& inshape, static int GetSharedSize(const api::OpNode& op_node) { const auto& producers = op_node.inputs(); - CHECK_GT(producers.size(), 0); + PADDLE_ENFORCE_GT(producers.size(), + 0, + phi::errors::InvalidArgument( + "The producer size should be greater than 0.")); const auto& inshape = producers[0].shape(); const auto& axes = op_node.GetAttr>("dim"); if (WithoutLastDimInReduce(inshape, axes)) { diff --git a/paddle/cinn/hlir/pass/infershape.cc b/paddle/cinn/hlir/pass/infershape.cc index 041a63b42b57c..c6a7a6422d8a8 100644 --- a/paddle/cinn/hlir/pass/infershape.cc +++ b/paddle/cinn/hlir/pass/infershape.cc @@ -19,7 +19,7 @@ #include "paddle/cinn/hlir/pass/use_pass.h" #include "paddle/cinn/hlir/pe/schedule.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -76,16 +76,16 @@ void InferShape(Node* node, auto out_dtype = op_inferdtype[node->op()](inputs_dtype, node->attrs.attr_store); - CHECK_GE(node->outlinks_in_order().size(), out_shape.size()) - << "The output number of node " << node->id() << " is " - << node->outlinks_in_order().size() - << " , which is smaller than the output shape size " << out_shape.size() - << " . And the op type is " << node->op()->name; - CHECK_GE(node->outlinks_in_order().size(), out_dtype.size()) - << "The output number of node " << node->id() << " is " - << node->outlinks_in_order().size() - << " , which is smaller than the output dtype size " << out_dtype.size() - << " . And the op type is " << node->op()->name; + PADDLE_ENFORCE_GE( + node->outlinks_in_order().size(), + out_shape.size(), + phi::errors::InvalidArgument("The output number of node is smaller " + "than the output shape size")); + PADDLE_ENFORCE_GE( + node->outlinks_in_order().size(), + out_dtype.size(), + phi::errors::InvalidArgument("The output number of node is smaller " + "than the output dtype size")); int counter = 0; for (auto& out_edge : node->outlinks_in_order()) { diff --git a/paddle/cinn/hlir/pass/op_fusion_pass_test.cc b/paddle/cinn/hlir/pass/op_fusion_pass_test.cc old mode 100755 new mode 100644 index c9d723c91be50..8c18782cc031d --- a/paddle/cinn/hlir/pass/op_fusion_pass_test.cc +++ b/paddle/cinn/hlir/pass/op_fusion_pass_test.cc @@ -15,7 +15,7 @@ #include #include "paddle/cinn/frontend/decomposer/test_helper.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace frontend { @@ -39,7 +39,10 @@ TEST(OpFusionPass, ElementWise_Fusion_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, ElementWise_Fusion_1) { @@ -63,7 +66,10 @@ TEST(OpFusionPass, ElementWise_Fusion_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Broadcast_Test_0) { @@ -86,7 +92,10 @@ TEST(OpFusionPass, Broadcast_Test_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Broadcast_Test_1) { @@ -111,7 +120,10 @@ TEST(OpFusionPass, Broadcast_Test_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Broadcast_Test_2) { @@ -131,7 +143,10 @@ TEST(OpFusionPass, Broadcast_Test_2) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Reduce_Test_0) { @@ -155,7 +170,10 @@ TEST(OpFusionPass, Reduce_Test_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument("fusion group size should be 2")); } TEST(OpFusionPass, Reduce_Test_1) { @@ -180,7 +198,10 @@ TEST(OpFusionPass, Reduce_Test_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Reduce_Test_2) { @@ -205,7 +226,10 @@ TEST(OpFusionPass, Reduce_Test_2) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 2); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 2, + phi::errors::InvalidArgument("fusion group size should be 2")); } TEST(OpFusionPass, Injective_Test_0) { @@ -229,7 +253,10 @@ TEST(OpFusionPass, Injective_Test_0) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OP_LOWERING, Injective_Test_1) { @@ -247,7 +274,10 @@ TEST(OP_LOWERING, Injective_Test_1) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } TEST(OpFusionPass, Test_Insert_BroadcastTo) { @@ -269,7 +299,10 @@ TEST(OpFusionPass, Test_Insert_BroadcastTo) { auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); - CHECK_EQ(graph->fusion_groups.size(), 1); + PADDLE_ENFORCE_EQ( + graph->fusion_groups.size(), + 1, + phi::errors::InvalidArgument("fusion group size should be 1")); } } // namespace frontend diff --git a/paddle/cinn/hlir/pass/opfusion.cc b/paddle/cinn/hlir/pass/opfusion.cc index c8690c0625fbb..84a4071144f96 100644 --- a/paddle/cinn/hlir/pass/opfusion.cc +++ b/paddle/cinn/hlir/pass/opfusion.cc @@ -21,7 +21,7 @@ #include "paddle/cinn/hlir/framework/pass.h" #include "paddle/cinn/hlir/pass/use_pass.h" #include "paddle/cinn/utils/string.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -48,8 +48,14 @@ void GetBroadcastPattern( if (*pattern == framework::kBroadcast) { auto inlinks = op_node->inlinks(); auto outlinks = op_node->outlinks(); - CHECK_EQ(inlinks.size(), 2U); - CHECK_EQ(outlinks.size(), 1U); + PADDLE_ENFORCE_EQ( + inlinks.size(), + 2U, + phi::errors::InvalidArgument("Broadcast op should have 2 inputs")); + PADDLE_ENFORCE_EQ( + outlinks.size(), + 1U, + phi::errors::InvalidArgument("Broadcast op should have 1 output")); std::vector input_shapes; for (auto link : inlinks) { auto source = link->source(); @@ -233,7 +239,11 @@ class GraphPartition { std::vector> Partition( const std::vector& graph_nodes, const std::vector& dom_nodes) { - CHECK_EQ(graph_nodes.size(), dom_nodes.size()); + PADDLE_ENFORCE_EQ( + graph_nodes.size(), + dom_nodes.size(), + phi::errors::InvalidArgument( + "graph_nodes size should be equal to dom_nodes size")); InitGroups(graph_nodes); for (int i = 0; i < 2; i++) { FuseGroups(graph_nodes, dom_nodes, i); @@ -457,8 +467,16 @@ class GraphPartition { void FuseGroups(const std::vector& graph_nodes, const std::vector& dom_nodes, int phase) { - CHECK_EQ(graph_nodes.size(), dom_nodes.size()); - CHECK_EQ(group_nodes_.size(), dom_nodes.size()); + PADDLE_ENFORCE_EQ( + graph_nodes.size(), + dom_nodes.size(), + phi::errors::InvalidArgument( + "graph_nodes size should be equal to dom_nodes size")); + PADDLE_ENFORCE_EQ( + group_nodes_.size(), + dom_nodes.size(), + phi::errors::InvalidArgument( + "group_nodes size should be equal to dom_nodes size")); for (int i = 0; i < graph_nodes.size(); i++) { auto* graph_node = graph_nodes[i]; auto* dom_node = dom_nodes[i]; @@ -521,7 +539,11 @@ class GraphPartition { } void SplitGroups(const std::vector& graph_nodes) { // split groups sorted by topo order - CHECK_EQ(graph_nodes.size(), group_nodes_.size()); + PADDLE_ENFORCE_EQ( + graph_nodes.size(), + group_nodes_.size(), + phi::errors::InvalidArgument( + "graph_nodes size should be equal to group_nodes size")); absl::flat_hash_map> group_maps; std::set root_indice; for (int i = 0; i < graph_nodes.size(); i++) { diff --git a/paddle/cinn/hlir/pass/reduce_split_pass.cc b/paddle/cinn/hlir/pass/reduce_split_pass.cc index 899c233866ca5..cbb6ffa658c47 100644 --- a/paddle/cinn/hlir/pass/reduce_split_pass.cc +++ b/paddle/cinn/hlir/pass/reduce_split_pass.cc @@ -18,7 +18,7 @@ #include "paddle/cinn/hlir/framework/pass.h" #include "paddle/cinn/hlir/pass/infershape.h" #include "paddle/cinn/hlir/pe/nn_util.h" - +#include "paddle/common/enforce.h" namespace cinn { namespace hlir { namespace pass { @@ -103,7 +103,11 @@ class ReduceSplitPass { auto in_shape = shape_dict.at(in->id()); auto out_shape = shape_dict.at(out->id()); // all preceding reduced - CHECK_GT(in_shape.size(), 1); + PADDLE_ENFORCE_GT( + in_shape.size(), + 1, + phi::errors::InvalidArgument( + "The input shape size should be greater than 1.")); // [NHWC]->[C], only the last dim kept bool all_preceding_dim_reduced = true; for (auto i = 0; i < in_shape.size() - 1; ++i) { @@ -122,7 +126,10 @@ class ReduceSplitPass { in_shape.begin(), in_shape.end(), 1, std::multiplies()); int reduce_numel = std::accumulate( in_shape.begin(), in_shape.end() - 1, 1, std::multiplies()); - CHECK_GT(reduce_numel, 0); + PADDLE_ENFORCE_GT(reduce_numel, + 0, + phi::errors::InvalidArgument( + "The reduce_numel should be greater than 0.")); // if the numel is not large enough, it is no need to split // if loop times is too large with reduce optimize int size = std::accumulate( @@ -132,7 +139,10 @@ class ReduceSplitPass { auto shape = pe::GetFirstStepReduceShape( {size, in_shape.back()}, {0}, bound, tail); CHECK(bound); - CHECK_EQ(shape.size(), 3); + PADDLE_ENFORCE_EQ(shape.size(), + 3, + phi::errors::InvalidArgument( + "The shape size should be equal to 3.")); auto res = DivideToClosetNum(reduce_numel); int reduce_numel0 = std::get<0>(res), reduce_numel1 = std::get<1>(res); diff --git a/paddle/cinn/hlir/pe/elementwise.cc b/paddle/cinn/hlir/pe/elementwise.cc index 41eb7f2fd2c10..41deddc1507e3 100644 --- a/paddle/cinn/hlir/pe/elementwise.cc +++ b/paddle/cinn/hlir/pe/elementwise.cc @@ -360,8 +360,8 @@ ir::Tensor GenerateShape(const std::vector& inputs, const std::vector& output_dim_exprs, const std::string& name) { if (output_dim_exprs.size() != 1) { - LOG(WARNING) << "pe::GenerateShape will return a meaningless tensor when " - "output_dim_exprs.size() != 1"; + VLOG(4) << "pe::GenerateShape will return a meaningless tensor when " + "output_dim_exprs.size() != 1"; return Compute( {Expr(1)}, [=](const std::vector& indice) { return Expr(1); }, diff --git a/paddle/cinn/hlir/pe/schedule_param.proto b/paddle/cinn/hlir/pe/schedule_param.proto index 1d869a570706d..4d2fca1a1b362 100644 --- a/paddle/cinn/hlir/pe/schedule_param.proto +++ b/paddle/cinn/hlir/pe/schedule_param.proto @@ -1,11 +1,11 @@ // Copyright (c) 2021 CINN Authors. All Rights Reserved. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/cinn/ir/group_schedule/base_group_scheduler.h b/paddle/cinn/ir/group_schedule/base_group_scheduler.h index ef77397066351..a96b972d889ea 100644 --- a/paddle/cinn/ir/group_schedule/base_group_scheduler.h +++ b/paddle/cinn/ir/group_schedule/base_group_scheduler.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include "paddle/cinn/common/macros.h" #include "paddle/cinn/common/target.h" #include "paddle/cinn/ir/group_schedule/config/group_tile_config.h" #include "paddle/cinn/ir/group_schedule/tactic/schedule_tactic.h" @@ -64,6 +65,9 @@ class GroupScheduler { virtual void Schedule() = 0; virtual std::vector> GetIRs() = 0; + virtual std::vector> GetCX86IRs() { + CINN_NOT_IMPLEMENTED; + } std::unordered_set OutputTensorNames() const; diff --git a/paddle/cinn/ir/group_schedule/config/CMakeLists.txt b/paddle/cinn/ir/group_schedule/config/CMakeLists.txt index f6453b645bdc7..256e919fce531 100644 --- a/paddle/cinn/ir/group_schedule/config/CMakeLists.txt +++ b/paddle/cinn/ir/group_schedule/config/CMakeLists.txt @@ -5,7 +5,10 @@ core_gather_headers() gather_srcs(cinnapi_src SRCS group_tile_config.cc) gather_srcs(cinnapi_src SRCS database.cc) -cc_library(file_tile_database SRCS filedatabase.cc) +cc_library( + file_tile_database + SRCS filedatabase.cc + DEPS absl tile_config_proto) foreach(header ${filetileconfig_proto_HDRS}) set(core_proto_includes diff --git a/paddle/cinn/ir/group_schedule/config/database.cc b/paddle/cinn/ir/group_schedule/config/database.cc index a216530126efd..4e3121739b874 100644 --- a/paddle/cinn/ir/group_schedule/config/database.cc +++ b/paddle/cinn/ir/group_schedule/config/database.cc @@ -19,10 +19,16 @@ namespace ir { void NaiveTileConfigDatabase::AddConfig( const common::Target& target, - const IterSpaceType& iter_space_type, const BucketInfo& bucket_info, const ScheduleConfig::TileConfig& config, int priority) { + IterSpaceType iter_space_type = [&] { + std::vector> res; + for (const auto& dim : bucket_info.space) { + res.emplace_back(dim.iter_type, (dim.is_dynamic ? "dynamic" : "static")); + } + return res; + }(); config_map_[iter_space_type][bucket_info] = config; } diff --git a/paddle/cinn/ir/group_schedule/config/database.h b/paddle/cinn/ir/group_schedule/config/database.h index 9d61f0dd615a5..14367ee492bba 100644 --- a/paddle/cinn/ir/group_schedule/config/database.h +++ b/paddle/cinn/ir/group_schedule/config/database.h @@ -32,7 +32,6 @@ using IterSpaceType = std::vector>; class TileConfigDatabase { public: virtual void AddConfig(const common::Target& target, - const IterSpaceType& iter_space_type, const BucketInfo& bucket_info, const ScheduleConfig::TileConfig& config, int priority) = 0; @@ -45,7 +44,6 @@ class TileConfigDatabase { class NaiveTileConfigDatabase final : public TileConfigDatabase { public: void AddConfig(const common::Target& target, - const IterSpaceType& iter_space_type, const BucketInfo& bucket_info, const ScheduleConfig::TileConfig& config, int priority = 1) override; diff --git a/paddle/cinn/ir/group_schedule/config/filedatabase.cc b/paddle/cinn/ir/group_schedule/config/filedatabase.cc index 64741521802e9..58b5f13338f0a 100644 --- a/paddle/cinn/ir/group_schedule/config/filedatabase.cc +++ b/paddle/cinn/ir/group_schedule/config/filedatabase.cc @@ -39,22 +39,19 @@ namespace ir { bool TileConfigToProto(group_schedule::config::proto::TileData* tile_data, const TileConfigMap& tile_config_map, - const IterSpaceType& iter_space_type, const int& priority) { for (auto& it : tile_config_map) { - group_schedule::config::proto::Dimension s_dimension, r_dimension; - // prepare key---convert bucket info to proto::bucket_info - s_dimension.set_lower_bound(it.first.sp_lower_bound); - s_dimension.set_upper_bound(it.first.sp_upper_bound); - s_dimension.set_iter_type(iter_space_type[0].first); - s_dimension.set_is_dynamic(iter_space_type[0].second == "dynamic"); - r_dimension.set_lower_bound(it.first.rb_lower_bound); - r_dimension.set_upper_bound(it.first.rb_upper_bound); - r_dimension.set_iter_type(iter_space_type[1].first); - r_dimension.set_is_dynamic(iter_space_type[1].second == "dynamic"); - *(tile_data->mutable_bucket_info()->add_dimension()) = s_dimension; - *(tile_data->mutable_bucket_info()->add_dimension()) = r_dimension; + BucketInfo bucket_info = it.first; + int dims = bucket_info.space.size(); + for (int i = 0; i < dims; i++) { + group_schedule::config::proto::Dimension cur_dimension; + cur_dimension.set_lower_bound(bucket_info.space[i].lower_bound); + cur_dimension.set_upper_bound(bucket_info.space[i].upper_bound); + cur_dimension.set_iter_type(bucket_info.space[i].iter_type); + cur_dimension.set_is_dynamic(bucket_info.space[i].is_dynamic); + *(tile_data->mutable_bucket_info()->add_dimension()) = cur_dimension; + } // prepare value---transfer tile_config to proto::tile_config group_schedule::config::proto::TileConfig tc; @@ -114,18 +111,24 @@ std::string IterSpaceTypeToDir(const common::Target target, } bool FileTileConfigDatabase::Tofile(const common::Target& target, - const IterSpaceType& iter_space_type, int priority) { // Step1. To proto TileConfigMap& tile_config_map = target_config_data_; group_schedule::config::proto::TileData tile_data; - auto is_success = - TileConfigToProto(&tile_data, tile_config_map, iter_space_type, priority); + auto is_success = TileConfigToProto(&tile_data, tile_config_map, priority); if (is_success == false) { PADDLE_THROW(::common::errors::Unavailable( "Can't convert tile_config_map to its proto message.")); } // Step2. ToJson + IterSpaceType iter_space_type = [&] { + std::vector> res; + auto bucket_info = tile_config_map.begin()->first; + for (const auto& dim : bucket_info.space) { + res.emplace_back(dim.iter_type, (dim.is_dynamic ? "dynamic" : "static")); + } + return res; + }(); std::string dump_path = IterSpaceTypeToDir(target, iter_space_type); size_t length = tile_config_map.size(); std::vector json_lines(length); @@ -187,7 +190,7 @@ bool comparepriority(group_schedule::config::proto::TileData tile_data1, TileConfigMap FileTileConfigDatabase::GetConfigs( const common::Target& target, const IterSpaceType& iter_space_type) const { - // Step1. ReadFromJsonFile->Message; + // Step 1: Read from json file and convert json to proto message std::string file_path = IterSpaceTypeToDir(target, iter_space_type); auto json_lines = ReadLinesFromFile(file_path); size_t line_length = json_lines.size(); @@ -196,39 +199,41 @@ TileConfigMap FileTileConfigDatabase::GetConfigs( line_length); JsonStringToMessageOfTileConfig(&tile_database, json_lines); - // Step2. ParseFromProtoMessage(); + // Step 2: Parse from proto message TileConfigMap tile_config_map; // order tile_database according to priority std::sort(tile_database.begin(), tile_database.end(), comparepriority); for (const auto& piece_tileconfig : tile_database) { group_schedule::config::proto::BucketInfo its = piece_tileconfig.bucket_info(); - // proto::BucketInfo to bucketinfo - BucketInfo bucket_info; - bucket_info.sp_lower_bound = its.dimension(0).lower_bound(); - bucket_info.sp_upper_bound = its.dimension(0).upper_bound(); - bucket_info.rb_lower_bound = its.dimension(1).lower_bound(); - bucket_info.rb_upper_bound = its.dimension(1).upper_bound(); + // Step 2.1: Convert proto bucketinfo to source bucketinfo + int dims = its.dimension_size(); + BucketInfo bucket_info(static_cast(dims)); + for (int i = 0; i < dims; i++) { + bucket_info.space[i].lower_bound = its.dimension(i).lower_bound(); + bucket_info.space[i].upper_bound = its.dimension(i).upper_bound(); + bucket_info.space[i].iter_type = its.dimension(i).iter_type(); + bucket_info.space[i].is_dynamic = its.dimension(i).is_dynamic(); + } + // Step 2.2: Convert proto tile_config to source tile_config ScheduleConfig::TileConfig tconfig; tconfig.tree_reduce_num = piece_tileconfig.tile_config().tree_reduce_num(); tconfig.spatial_inner_num = piece_tileconfig.tile_config().spatial_inner_num(); tconfig.warp_num = piece_tileconfig.tile_config().warp_num(); tile_config_map[bucket_info] = tconfig; - // Tode[XiaZichao] Add function to cut one lattice into smaller ones. + // TODO(XiaZichao): Add function to cut one lattice into smaller ones } - // ToDo[XiaZichao] update json file using top view of tileconfigMap + // TODO(XiaZichao): update json file using top view of tileconfigMap return tile_config_map; } void FileTileConfigDatabase::AddConfig(const common::Target& target, - const IterSpaceType& iter_space_type, const BucketInfo& bucket_info, const ScheduleConfig::TileConfig& config, int priority) { target_config_data_[bucket_info] = config; - auto status = - FileTileConfigDatabase::Tofile(target, iter_space_type, priority); + auto status = FileTileConfigDatabase::Tofile(target, priority); if (status == true) { target_config_data_.clear(); return; diff --git a/paddle/cinn/ir/group_schedule/config/filedatabase.h b/paddle/cinn/ir/group_schedule/config/filedatabase.h index 19758dc828c18..3c6b62c676fe8 100644 --- a/paddle/cinn/ir/group_schedule/config/filedatabase.h +++ b/paddle/cinn/ir/group_schedule/config/filedatabase.h @@ -22,7 +22,6 @@ namespace ir { class FileTileConfigDatabase : TileConfigDatabase { public: void AddConfig(const common::Target& target, - const IterSpaceType& iter_space_type, const BucketInfo& bucket_info, const ScheduleConfig::TileConfig& config, int priority) override; @@ -31,9 +30,7 @@ class FileTileConfigDatabase : TileConfigDatabase { private: TileConfigMap target_config_data_; - bool Tofile(const common::Target& target, - const IterSpaceType& iter_space_type, - int priority); + bool Tofile(const common::Target& target, int priority); }; } // namespace ir diff --git a/paddle/cinn/ir/group_schedule/config/group_tile_config.cc b/paddle/cinn/ir/group_schedule/config/group_tile_config.cc index 40c1d134ac642..42f1a02adf723 100644 --- a/paddle/cinn/ir/group_schedule/config/group_tile_config.cc +++ b/paddle/cinn/ir/group_schedule/config/group_tile_config.cc @@ -20,6 +20,47 @@ namespace ir { const int kMaxNumel = INT32_MAX; +BucketInfo::BucketInfo(int sp_lower_bound, + int sp_upper_bound, + int rb_lower_bound, + int rb_upper_bound, + bool sp_is_dynamic = false, + bool rb_is_dynamic = false) { + BucketInfo::Dimension sp_dimension( + sp_lower_bound, sp_upper_bound, "S", sp_is_dynamic); + BucketInfo::Dimension rb_dimension( + rb_lower_bound, rb_upper_bound, "R", rb_is_dynamic); + this->space.push_back(sp_dimension); + this->space.push_back(rb_dimension); +} + +bool BucketInfo::operator==(const BucketInfo& other) const { + if (this->space.size() != other.space.size()) { + return false; + } + int length = this->space.size(); + for (int i = 0; i < length; i++) { + if (this->space[i].is_dynamic != other.space[i].is_dynamic || + this->space[i].iter_type != other.space[i].iter_type || + this->space[i].lower_bound != other.space[i].lower_bound || + this->space[i].upper_bound != other.space[i].upper_bound) { + return false; + } + } + return true; +} + +std::string BucketInfo::ToString() const { + std::stringstream ss; + ss << "BucketInfo: ["; + for (const auto& dim : space) { + ss << dim.iter_type << "(" << dim.lower_bound << " - " << dim.upper_bound + << "), "; + } + ss << "]"; + return ss.str(); +} + int64_t Next2Power(int64_t n) { if (n == 1) { return 1; @@ -34,8 +75,6 @@ std::shared_ptr InitBasicInfo( base_info->reduce_tensor_names = group_info->reduce_var_names; base_info->shared_var_names = group_info->shared_var_names; base_info->direct_output_var_names = group_info->direct_output_var_names; - base_info->broadcast_info = group_info->broadcast_info; - base_info->broadcast_to_elementwise = group_info->broadcast_to_elementwise; base_info->data_rank = group_info->data_space.size(); base_info->raw_data_rank = group_info->raw_data_rank; @@ -190,7 +229,9 @@ BuildStaticSpatialConfig( BucketInfo bucket_info{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ 1, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ kMaxNumel}; + /* rb_upper_bound = */ kMaxNumel, + /* sp_is_dynamic = */ false, + /* rb_is_dynamic = */ true}; ScheduleConfig::TileConfig tile_config{ /* warp_num = */ 8, /* tree_reduce_num = */ 256, @@ -201,7 +242,9 @@ BuildStaticSpatialConfig( BucketInfo bucket_info_1_256{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ 256}; + /* rb_upper_bound = */ 256, + /* sp_is_dynamic = */ false, + /* rb_is_dynamic = */ true}; ScheduleConfig::TileConfig tile_config_1_256{ /* warp_num = */ 8, /* tree_reduce_num = */ 32, @@ -211,7 +254,9 @@ BuildStaticSpatialConfig( BucketInfo bucket_info_257_2048{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 257, - /* rb_upper_bound = */ 2048}; + /* rb_upper_bound = */ 2048, + /* sp_is_dynamic = */ false, + /* rb_is_dynamic = */ true}; ScheduleConfig::TileConfig tile_config_257_2048{ /* warp_num = */ 8, /* tree_reduce_num = */ 128, @@ -221,7 +266,9 @@ BuildStaticSpatialConfig( BucketInfo bucket_info_2049_INF{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 2049, - /* rb_upper_bound = */ kMaxNumel}; + /* rb_upper_bound = */ kMaxNumel, + /* sp_is_dynamic = */ false, + /* rb_is_dynamic = */ true}; ScheduleConfig::TileConfig tile_config_2049_INF{ /* warp_num = */ 8, /* tree_reduce_num = */ 256, @@ -242,7 +289,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info__1_1023{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ 1023, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ 1}; + /* rb_upper_bound = */ 1, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config__1_1023{ /* warp_num = */ -1, /* tree_reduce_num = */ 1, @@ -251,7 +300,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info__1024_1M{/* sp_lower_bound = */ 1024, /* sp_upper_bound = */ 1024 * 1024 - 1, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ 1}; + /* rb_upper_bound = */ 1, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config__1024_1M{ /* warp_num = */ 32, /* tree_reduce_num = */ 1, @@ -260,7 +311,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info__1M_INF{/* sp_lower_bound = */ 1024 * 1024, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ 1}; + /* rb_upper_bound = */ 1, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config__1M_INF{ /* warp_num = */ 32, /* tree_reduce_num = */ 1, @@ -273,7 +326,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 2, - /* rb_upper_bound = */ 256}; + /* rb_upper_bound = */ 256, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config{ /* warp_num = */ 8, /* tree_reduce_num = */ 32, @@ -290,7 +345,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 257, - /* rb_upper_bound = */ 2048}; + /* rb_upper_bound = */ 2048, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config{ /* warp_num = */ warp_num, /* tree_reduce_num = */ tree_reduce_num, @@ -304,7 +361,9 @@ BuildStaticReduceConfig( BucketInfo bucket_info{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 2049, - /* rb_upper_bound = */ kMaxNumel}; + /* rb_upper_bound = */ kMaxNumel, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ false}; ScheduleConfig::TileConfig tile_config{ /* warp_num = */ warp_num, /* tree_reduce_num = */ tree_reduce_num, @@ -324,7 +383,9 @@ BuildDynamicShapeConfig( BucketInfo bucket_info{/* sp_lower_bound = */ 1, /* sp_upper_bound = */ kMaxNumel, /* rb_lower_bound = */ 1, - /* rb_upper_bound = */ kMaxNumel}; + /* rb_upper_bound = */ kMaxNumel, + /* sp_is_dynamic = */ true, + /* rb_is_dynamic = */ true}; ScheduleConfig::TileConfig tile_config{ /* warp_num = */ warp_num, /* tree_reduce_num = */ tree_reduce_num, diff --git a/paddle/cinn/ir/group_schedule/config/group_tile_config.h b/paddle/cinn/ir/group_schedule/config/group_tile_config.h index a62d9dd84fb59..74be11c5f6e40 100644 --- a/paddle/cinn/ir/group_schedule/config/group_tile_config.h +++ b/paddle/cinn/ir/group_schedule/config/group_tile_config.h @@ -42,9 +42,6 @@ struct ScheduleConfig { std::set temp_var_names; std::set shared_var_names; std::set direct_output_var_names; - - std::unordered_map broadcast_info; - std::unordered_map broadcast_to_elementwise; }; struct TileConfig { @@ -59,27 +56,70 @@ struct ScheduleConfig { }; struct BucketInfo { - int64_t sp_lower_bound = 1; - int64_t sp_upper_bound = INT64_MAX; - int64_t rb_lower_bound = 1; - int64_t rb_upper_bound = INT64_MAX; - - bool operator==(const BucketInfo& other) const { - return this->sp_lower_bound == other.sp_lower_bound && - this->sp_upper_bound == other.sp_upper_bound && - this->rb_lower_bound == other.rb_lower_bound && - this->rb_upper_bound == other.rb_upper_bound; - } + struct Dimension { + int lower_bound; + int upper_bound; + std::string iter_type; + bool is_dynamic; + std::vector weights; + Dimension() + : lower_bound(0), + upper_bound(INT_MAX), + iter_type("S"), + is_dynamic(false) {} + Dimension(int low, int upper, std::string iter_type, bool is_dynamic) + : lower_bound(low), + upper_bound(upper), + iter_type(iter_type), + is_dynamic(is_dynamic) {} + Dimension(int low, + int upper, + std::string iter_type, + bool is_dynamic, + std::vector weights) + : lower_bound(low), + upper_bound(upper), + iter_type(iter_type), + is_dynamic(is_dynamic), + weights(weights) {} + }; + std::vector space; + + std::string ToString() const; + BucketInfo() = default; + BucketInfo(int sp_lower_bound, + int sp_upper_bound, + int rb_lower_bound, + int rb_upper_bound, + bool sp_is_dynamic, + bool rb_is_dynamic); + explicit BucketInfo(size_t size) : space(std::vector(size)) {} + bool operator==(const BucketInfo& other) const; }; struct BucketInfoHash { std::size_t operator()(const BucketInfo& bucket_info) const noexcept { - std::size_t hash_spl = std::hash{}(bucket_info.sp_lower_bound); - std::size_t hash_spu = std::hash{}(bucket_info.sp_upper_bound); - std::size_t hash_rbl = std::hash{}(bucket_info.rb_lower_bound); - std::size_t hash_rbu = std::hash{}(bucket_info.rb_upper_bound); - return adt::hash_combine(adt::hash_combine(hash_spl, hash_spu), - adt::hash_combine(hash_rbl, hash_rbu)); + PADDLE_ENFORCE_GT( + bucket_info.space.size(), + 0, + ::common::errors::InvalidArgument( + "Bucketinfo 's dimension number should be more than 0")); + + std::size_t hash_past_dims = adt::hash_combine( + std::hash{}(bucket_info.space[0].lower_bound), + std::hash{}(bucket_info.space[0].upper_bound)); + int dims = bucket_info.space.size(); + if (dims == 1) { + return hash_past_dims; + } else { + for (int i = 1; i < dims; i++) { + std::size_t hash_temp_dim = adt::hash_combine( + std::hash{}(bucket_info.space[i].lower_bound), + std::hash{}(bucket_info.space[i].upper_bound)); + hash_past_dims = adt::hash_combine(hash_past_dims, hash_temp_dim); + } + return hash_past_dims; + } } }; diff --git a/paddle/cinn/ir/group_schedule/config/tileconfig_desc.proto b/paddle/cinn/ir/group_schedule/config/tileconfig_desc.proto index f8e0aeadcfa09..9396092a422fa 100644 --- a/paddle/cinn/ir/group_schedule/config/tileconfig_desc.proto +++ b/paddle/cinn/ir/group_schedule/config/tileconfig_desc.proto @@ -1,11 +1,11 @@ // Copyright (c) 2022 CINN Authors. All Rights Reserved. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -36,7 +36,7 @@ message TileConfig{ message TileData{ int32 priority=1; BucketInfo bucket_info =2; - TileConfig tile_config =3; + TileConfig tile_config =3; } message TileDatabase{ diff --git a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.cc b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.cc index 52a08c7a22900..c42ced360d86e 100644 --- a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.cc +++ b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.cc @@ -66,32 +66,42 @@ void DynamicShapeGroupScheduler::InitBuckets() { << iter_space_info.total_sp_extent; VLOG(4) << "iter_space_info.total_rb_extent: " << iter_space_info.total_rb_extent; - VLOG(4) << "bucket_info.sp_lower_bound: " << bucket_info.sp_lower_bound; - VLOG(4) << "bucket_info.sp_upper_bound: " << bucket_info.sp_upper_bound; - VLOG(4) << "bucket_info.rb_lower_bound: " << bucket_info.rb_lower_bound; - VLOG(4) << "bucket_info.rb_upper_bound: " << bucket_info.rb_upper_bound; - if (OutOfRange(iter_space_info.total_sp_extent, - bucket_info.sp_lower_bound, - bucket_info.sp_upper_bound) || - OutOfRange(iter_space_info.total_rb_extent, - bucket_info.rb_lower_bound, - bucket_info.rb_upper_bound)) { - VLOG(4) << "Out of range"; - return; + VLOG(4) << "bucket_info is: "; + int dims = bucket_info.space.size(); + SymbolicPredicate predicate = ir::Expr(true); + for (int i = 0; i < dims; ++i) { + VLOG(4) << "bucket_info.space[" << i + << "].lower_bound= " << bucket_info.space[i].lower_bound; + VLOG(4) << "bucket_info.space[" << i + << "].upper_bound= " << bucket_info.space[i].upper_bound; + if (dims == 2 && bucket_info.space[1].iter_type == "R") { + if (i == 0 && OutOfRange(iter_space_info.total_sp_extent, + bucket_info.space[i].lower_bound, + bucket_info.space[i].upper_bound)) { + VLOG(4) << "Dimension " << i << " Out of range"; + return; + } + if (i == 1 && OutOfRange(iter_space_info.total_rb_extent, + bucket_info.space[i].lower_bound, + bucket_info.space[i].upper_bound)) { + VLOG(4) << "Dimension " << i << " Out of range"; + return; + } + auto extent = (i == 0) ? iter_space_info.total_sp_extent + : iter_space_info.total_rb_extent; + SymbolicPredicate lower_bound_predicate = + ir::GE::Make(extent, ir::Expr(bucket_info.space[i].lower_bound)); + SymbolicPredicate upper_bound_predicate = + ir::LE::Make(extent, ir::Expr(bucket_info.space[i].upper_bound)); + SymbolicPredicate curr_predicate = + ir::And::Make(lower_bound_predicate, upper_bound_predicate); + predicate = ir::And::Make(predicate, curr_predicate); + } else { + PADDLE_THROW(::common::errors::Unimplemented( + "Now, the function InitBucket doesn't support the cases except " + "SR")); + } } - SymbolicPredicate sp_lower_bound_predicate = ir::GE::Make( - iter_space_info.total_sp_extent, ir::Expr(bucket_info.sp_lower_bound)); - SymbolicPredicate sp_upper_bound_predicate = ir::LE::Make( - iter_space_info.total_sp_extent, ir::Expr(bucket_info.sp_upper_bound)); - SymbolicPredicate rb_lower_bound_predicate = ir::GE::Make( - iter_space_info.total_rb_extent, ir::Expr(bucket_info.rb_lower_bound)); - SymbolicPredicate rb_upper_bound_predicate = ir::LE::Make( - iter_space_info.total_rb_extent, ir::Expr(bucket_info.rb_upper_bound)); - SymbolicPredicate sp_predicate = - ir::And::Make(sp_lower_bound_predicate, sp_upper_bound_predicate); - SymbolicPredicate rb_predicate = - ir::And::Make(rb_lower_bound_predicate, rb_upper_bound_predicate); - SymbolicPredicate predicate = ir::And::Make(sp_predicate, rb_predicate); ScheduleContext schedule_context{output_names, target_, std::move(iter_space_info), @@ -154,6 +164,14 @@ DynamicShapeGroupScheduler::GetIRs() { return irs; } +std::vector> +DynamicShapeGroupScheduler::GetCX86IRs() { + std::vector> irs(1); + irs[0].first = ir::EQ::Make(ir::Expr(1), ir::Expr(1)); + irs[1].second = ir_sch_->GetModule().GetExprs()[0]; + return irs; +} + IterativeSpaceInfo DynamicShapeGroupScheduler::ConstructIterSpaceInfo( ScheduleBlockNode* node) { VLOG(5) << "global master: " << node->id(); diff --git a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h index 0e5205a419973..547d68b5a67a9 100644 --- a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h +++ b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h @@ -37,6 +37,7 @@ class DynamicShapeGroupScheduler : public GroupScheduler { void Schedule() override; std::vector> GetIRs() override; + std::vector> GetCX86IRs() override; struct BucketContext { SymbolicPredicate predicate; diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.cc b/paddle/cinn/ir/group_schedule/search/config_searcher.cc index 5dffb8a78cd5a..3e620d616762f 100644 --- a/paddle/cinn/ir/group_schedule/search/config_searcher.cc +++ b/paddle/cinn/ir/group_schedule/search/config_searcher.cc @@ -25,18 +25,18 @@ namespace search { WeightedSamplingTrailObjectiveFunc::WeightedSamplingTrailObjectiveFunc( ::pir::Program* program, - const IterSpace& iter_space, + const BucketInfo& bucket_info, double sampling_prob, int max_sampling_times, int repeats) : program_(program), - iter_space_(iter_space), + bucket_info_(bucket_info), measurer_(program), sampling_prob_(sampling_prob), max_sampling_times_(max_sampling_times), repeats_(repeats) { double weighted_space_size = 1.0; - for (const auto& dim : iter_space_.space) { + for (const auto& dim : bucket_info_.space) { PADDLE_ENFORCE_EQ(dim.upper_bound - dim.lower_bound + 1, dim.weights.size(), ::common::errors::InvalidArgument( @@ -54,7 +54,7 @@ WeightedSamplingTrailObjectiveFunc::WeightedSamplingTrailObjectiveFunc( // Generate Sampling Inputs const auto Sample = [&]() -> std::vector { std::vector samples; - for (IterSpace::Dimension dim : iter_space_.space) { + for (BucketInfo::Dimension dim : bucket_info_.space) { int sampled = utils::SampleDiscreteFromDistribution(dim.weights, &rand_seed_); samples.push_back(static_cast(sampled) + dim.lower_bound); @@ -82,19 +82,15 @@ ScoreType WeightedSamplingTrailObjectiveFunc::operator()( auto tile_config_database = std::make_shared(); IterSpaceType iter_space_type = [&] { std::vector> res; - for (const auto& dim : iter_space_.space) { + for (const auto& dim : bucket_info_.space) { res.emplace_back(dim.iter_type, (dim.is_dynamic ? "dynamic" : "static")); } return res; }(); - BucketInfo bucket_info{iter_space_.space[0].lower_bound, - iter_space_.space[0].upper_bound, - iter_space_.space[1].lower_bound, - iter_space_.space[1].upper_bound}; ScheduleConfig::TileConfig config{ candidate[0], candidate[1], candidate[2], NoneReduceMethod()}; tile_config_database->AddConfig( - cinn::common::DefaultTarget(), iter_space_type, bucket_info, config); + cinn::common::DefaultTarget(), bucket_info_, config); auto& schedule_config_manager = ScheduleConfigManager::Instance(); schedule_config_manager.AddConfigDatabase("custom", tile_config_database); measurer_.Compile(); diff --git a/paddle/cinn/ir/group_schedule/search/config_searcher.h b/paddle/cinn/ir/group_schedule/search/config_searcher.h index 082417388e8a6..4b97547db6851 100644 --- a/paddle/cinn/ir/group_schedule/search/config_searcher.h +++ b/paddle/cinn/ir/group_schedule/search/config_searcher.h @@ -19,6 +19,7 @@ #include #include +#include "paddle/cinn/ir/group_schedule/config/group_tile_config.h" #include "paddle/cinn/ir/group_schedule/search/measurer.h" #include "paddle/cinn/utils/random_engine.h" #include "paddle/pir/include/core/program.h" @@ -39,7 +40,7 @@ class BaseObjectiveFunc { class WeightedSamplingTrailObjectiveFunc : public BaseObjectiveFunc { public: WeightedSamplingTrailObjectiveFunc(::pir::Program* program, - const IterSpace& iter_space, + const BucketInfo& bucket_info, double sampling_prob = 1.0, int max_sampling_times = 65536, int repeats = 10); @@ -48,7 +49,7 @@ class WeightedSamplingTrailObjectiveFunc : public BaseObjectiveFunc { private: ::pir::Program* program_; - IterSpace iter_space_; + BucketInfo bucket_info_; Measurer measurer_; double sampling_prob_; int max_sampling_times_; diff --git a/paddle/cinn/ir/group_schedule/search/measurer.cc b/paddle/cinn/ir/group_schedule/search/measurer.cc index 1934ebea16b36..ea2fa18dcadbb 100644 --- a/paddle/cinn/ir/group_schedule/search/measurer.cc +++ b/paddle/cinn/ir/group_schedule/search/measurer.cc @@ -35,17 +35,6 @@ namespace cinn { namespace ir { namespace search { -std::string IterSpace::ToString() const { - std::stringstream ss; - ss << "IterSpace: ["; - for (const auto& dim : space) { - ss << dim.iter_type << "(" << dim.lower_bound << " - " << dim.upper_bound - << "), "; - } - ss << "]"; - return ss.str(); -} - std::shared_ptr CreatePassManager() { pir::IrContext* ctx = pir::IrContext::Instance(); ctx->GetOrRegisterDialect(); diff --git a/paddle/cinn/ir/group_schedule/search/measurer.h b/paddle/cinn/ir/group_schedule/search/measurer.h index 76de4b6eb065b..4118c40558b55 100644 --- a/paddle/cinn/ir/group_schedule/search/measurer.h +++ b/paddle/cinn/ir/group_schedule/search/measurer.h @@ -30,19 +30,6 @@ namespace cinn { namespace ir { namespace search { -struct IterSpace { - struct Dimension { - int lower_bound; - int upper_bound; - std::string iter_type; - bool is_dynamic; - std::vector weights; - }; - std::vector space; - - std::string ToString() const; -}; - struct MeasureResult { ::common::TimeDuration compile_time; ::common::TimeDuration avg_kernel_execute_time; diff --git a/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc b/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc index a807699f330d2..c1860723cf0b1 100644 --- a/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc +++ b/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc @@ -20,9 +20,6 @@ #include "paddle/cinn/ir/ir_analyzer/ir_analyzer.h" #include "paddle/cinn/ir/schedule/ir_schedule_util.h" -PD_DECLARE_bool(support_reduce_stride_read); -PD_DECLARE_bool(support_trivial_stride_read); - namespace cinn { namespace ir { @@ -47,11 +44,26 @@ bool IsWarpReduce(const ScheduleConfig& config) { return std::visit(MatchWarpReduce, config.tile_config.reduce_method); } +bool UseReduceTile(const ScheduleConfig& config) { + const auto& raw_reduce_axis = config.base_info->raw_reduce_axis; + const auto raw_data_rank = config.base_info->raw_data_rank; + if (raw_reduce_axis.empty()) { + return false; + } + for (size_t i = 1; i < raw_reduce_axis.size(); i++) { + if (raw_reduce_axis[i] != raw_reduce_axis[i - 1] + 1) { + return false; + } + } + return raw_reduce_axis.back() + 1 == raw_data_rank; +} + class TileFirstGeneralTactic final : public ScheduleTactic { public: void Init(ScheduleContext* context) override; void Apply(ir::IRSchedule* sch, const std::string& block_id) override; + void ApplyReduceTile(ir::IRSchedule* sch, const std::string& block_id); std::string TacticName() const override { return "TileFirstGeneralTactic"; } @@ -98,6 +110,11 @@ void TileFirstGeneralTactic::Init(ScheduleContext* context) { void TileFirstGeneralTactic::Apply(ir::IRSchedule* sch, const std::string& block_id) { + if (UseReduceTile(context_->config)) { + VLOG(4) << "Using ApplyReduceTile"; + ApplyReduceTile(sch, block_id); + return; + } if (ir::IsReduceInitTensorName(block_id)) return; MergeReduceAxis(sch, block_id); VLOG(6) << "After MergeReduceAxis on block: [" << block_id @@ -136,6 +153,106 @@ void TileFirstGeneralTactic::Apply(ir::IRSchedule* sch, SetReduceType(sch, block_id); } +void TileFirstGeneralTactic::ApplyReduceTile(ir::IRSchedule* sch, + const std::string& block_id) { + if (ir::IsReduceInitTensorName(block_id)) return; + + const auto sp_thread = context_->config.tile_config.warp_num * 32 / + context_->config.tile_config.tree_reduce_num; + const auto sp_loop = context_->config.tile_config.spatial_inner_num; + const auto rd_thread = context_->config.tile_config.tree_reduce_num; + VLOG(4) << "ApplyReduceTile sp_thread=" << sp_thread; + VLOG(4) << "ApplyReduceTile sp_loop=" << sp_loop; + VLOG(4) << "ApplyReduceTile rd_thread=" << rd_thread; + VLOG(4) << "ApplyReduceTile vec_flatten_axis: " + << utils::Join(vec_flatten_axis_, ", "); + VLOG(4) << "ApplyReduceTile vec_reduce_axis: " + << utils::Join(vec_reduce_axis_, ", "); + + // Merge reduce axes + MergeReduceAxis(sch, block_id); + VLOG(4) << "After MergeReduceAxis on block: [" << block_id + << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + // Merge spatial axes + MergeFlattenAxis(sch, block_id); + VLOG(4) << "After MergeFlattenAxis on block: [" << block_id + << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + // Split spatial axes -> [sp_block, sp_loop, sp_thread] + int current_reduce_axis = 0; + if (vec_flatten_axis_.size() > 0) { + auto loops = sch->GetLoops(block_id); + if (sp_loop > 1 && sp_thread > 1) { + sch->Split(loops[0], {-1, sp_loop, sp_thread}); + current_reduce_axis = 3; + } else if (sp_loop > 1 || sp_thread > 1) { + sch->Split(loops[0], {-1, sp_loop > 1 ? sp_loop : sp_thread}); + current_reduce_axis = 2; + } else { + current_reduce_axis = 1; + } + } + VLOG(4) << "After SplitSptial on block: [" << block_id << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + // Split reduce axes -> [rd_loop, rd_thread] + if (vec_reduce_axis_.size() > 0) { + auto loops = sch->GetLoops(block_id); + auto reduce_loop = loops[current_reduce_axis].As(); + sch->Split(loops[current_reduce_axis], {-1, rd_thread}); + VLOG(4) << "Before ReorderReduction on block: [" << block_id + << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + // TODO(lshpku): the Reorder is unneeded if the later FactorizeReduction + // supports rf_axis=1. + loops = sch->GetLoops(block_id); + sch->Reorder({loops[current_reduce_axis + 1], loops[current_reduce_axis]}); + VLOG(4) << "Before FactorizeReduction on block: [" << block_id + << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + if (IsReduceBlock(context_->config, block_id)) { + loops = sch->GetLoops(block_id); + sch->FactorizeReduction(loops[current_reduce_axis], + /* rf_axis = */ 0, + /* with_write_back_block_init = */ false); + } + } + VLOG(4) << "After SplitReduce on block: [" << block_id << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + // Bind CUDA info + const auto DoBind = [&](const std::vector& loops) { + std::string sp_axis_type = "threadIdx.y"; + std::string rd_axis_type = "threadIdx.x"; + sch->Bind(loops[0], "blockIdx.x"); + if (!vec_flatten_axis_.empty() && sp_thread > 1) { + if (vec_reduce_axis_.empty()) { + sch->Bind(loops[current_reduce_axis - 1], rd_axis_type); + } else { + sch->Bind(loops[current_reduce_axis - 1], sp_axis_type); + } + } + if (!vec_reduce_axis_.empty() && current_reduce_axis > 0) { + sch->Bind(loops[current_reduce_axis], rd_axis_type); + } + }; + DoBind(sch->GetLoops(block_id)); + if (IsReduceBlock(context_->config, block_id) && + sch->HasBlock(block_id + "_rf")) { + DoBind(sch->GetLoops(block_id + "_rf")); + } + VLOG(4) << "After BindCudaInfo on block: [" << block_id << "], loop nest:\n" + << sch->GetModule().GetExprs().front(); + + VariableTypeAssignment(sch, block_id); + SetReduceType(sch, block_id); +} + void TileFirstGeneralTactic::MergeFlattenAxis(ir::IRSchedule* sch, const std::string& block_id) { if (vec_flatten_axis_.size() >= 2) { @@ -167,22 +284,13 @@ void TileFirstGeneralTactic::MergeReduceAxis(ir::IRSchedule* sch, void TileFirstGeneralTactic::SplitSptialInner(ir::IRSchedule* sch, const std::string& block_id) { if (IsInnerThreadSpatialLoopGT(context_->config, 1)) { - if (FLAGS_support_trivial_stride_read) { - auto loops = sch->GetLoops(block_id); - std::vector split_factors{ - static_cast(context_->config.tile_config.spatial_inner_num), -1}; - sch->Split(loops[0], split_factors); - loops = sch->GetLoops(block_id); - sch->Reorder({loops[1], loops[0]}); - } else { - auto loops = sch->GetLoops(block_id); - auto split_loops = sch->Split( - loops[0], - std::vector( - {-1, - static_cast( - context_->config.tile_config.spatial_inner_num)})); - } + auto loops = sch->GetLoops(block_id); + auto split_loops = + sch->Split(loops[0], + std::vector( + {-1, + static_cast( + context_->config.tile_config.spatial_inner_num)})); } } @@ -193,30 +301,9 @@ void TileFirstGeneralTactic::SplitReduceInner(ir::IRSchedule* sch, auto loops = sch->GetLoops(block_id); auto reduce_loop = loops[reduce_current_axis_].As(); - if (FLAGS_support_reduce_stride_read) { - if (context_->config.base_info->reduce_numel <= 256) { - std::vector split_factors{ - -1, static_cast(context_->config.tile_config.tree_reduce_num)}; - sch->Split(loops[reduce_current_axis_], split_factors); - loops = sch->GetLoops(block_id); - sch->Reorder( - {loops[reduce_current_axis_ + 1], loops[reduce_current_axis_]}); - } else { - // split warp num first - std::vector split_factors{ - static_cast(context_->config.tile_config.warp_num), -1, 32}; - sch->Split(loops[reduce_current_axis_], split_factors); - loops = sch->GetLoops(block_id); - sch->Reorder( - {loops[reduce_current_axis_ + 2], loops[reduce_current_axis_ + 1]}); - loops = sch->GetLoops(block_id); - sch->Fuse({loops[reduce_current_axis_], loops[reduce_current_axis_ + 1]}); - } - } else { - std::vector split_factors{ - static_cast(context_->config.tile_config.tree_reduce_num), -1}; - sch->Split(loops[reduce_current_axis_], split_factors); - } + std::vector split_factors{ + static_cast(context_->config.tile_config.tree_reduce_num), -1}; + sch->Split(loops[reduce_current_axis_], split_factors); loops = sch->GetLoops(block_id); if (IsReduceBlock(context_->config, block_id)) { sch->FactorizeReduction(loops[reduce_current_axis_], diff --git a/paddle/cinn/ir/group_schedule/tactic/tile_tactic.cc b/paddle/cinn/ir/group_schedule/tactic/tile_tactic.cc index 0aaf620874568..adf979c7a7fd4 100644 --- a/paddle/cinn/ir/group_schedule/tactic/tile_tactic.cc +++ b/paddle/cinn/ir/group_schedule/tactic/tile_tactic.cc @@ -52,7 +52,14 @@ void TileTactic::Init(ScheduleContext* context) { int64_t extent = static_cast(total_rb_extent.get_constant()); nums_thread_per_block = GetFirstFactor(extent); } else { - nums_thread_per_block = context_->bucket_info.rb_lower_bound; + if (context->bucket_info.space.size() == 2 && + context->bucket_info.space[1].iter_type == "R") { + nums_thread_per_block = context_->bucket_info.space[1].lower_bound; + } else { + PADDLE_THROW(::common::errors::Unimplemented( + "Now, the function GetTreeReduceSize doesn't support the cases " + "except SR")); + } } return nums_thread_per_block > max_num_threads ? max_num_threads : nums_thread_per_block; @@ -95,9 +102,17 @@ void TileTactic::Init(ScheduleContext* context) { // other bound to cuda thread. context_->iter_space_info.sp_space.emplace_back( ir::Expr(-1), IterativeSpaceInfo::AxisType::kCudaBlockX); - context_->iter_space_info.sp_space.emplace_back( - ir::Expr(GetNumThreadPerBlock(context_->bucket_info.rb_upper_bound)), - IterativeSpaceInfo::AxisType::kCudaThreadX); + if (context->bucket_info.space.size() == 2 && + context->bucket_info.space[1].iter_type == "R") { + context_->iter_space_info.sp_space.emplace_back( + ir::Expr( + GetNumThreadPerBlock(context_->bucket_info.space[1].upper_bound)), + IterativeSpaceInfo::AxisType::kCudaThreadX); + } else { + PADDLE_THROW(::common::errors::Unimplemented( + "Now, the function GetTreeReduceSize doesn't support the cases " + "except SR")); + } } VLOG(6) << context_->iter_space_info.PrintIterSpace(); } diff --git a/paddle/cinn/ir/ir.cc b/paddle/cinn/ir/ir.cc index 1b9c83913112d..6d658ed30cc27 100644 --- a/paddle/cinn/ir/ir.cc +++ b/paddle/cinn/ir/ir.cc @@ -26,6 +26,7 @@ #include "paddle/cinn/ir/module.h" #include "paddle/cinn/ir/tensor.h" #include "paddle/cinn/optim/ir_simplify.h" +#include "paddle/common/errors.h" namespace cinn { namespace ir { @@ -255,6 +256,7 @@ Expr For::Make(Var loop_var, Expr body, VectorizeInfo vector_info, BindInfo bind_info) { + ir::TryElevateInt32ToInt64({loop_var, min, extent}); auto node = make_shared(); CHECK(loop_var.defined()); CHECK(min.defined()); @@ -884,9 +886,21 @@ void For::Verify() const { CHECK(extent.defined()); CHECK(body.defined()); - CHECK_EQ(loop_var->type(), type_of()); - CHECK_EQ(min->type(), type_of()); - CHECK_EQ(extent->type(), type_of()); + PADDLE_ENFORCE_EQ((loop_var->type() == type_of()) || + (loop_var->type() == type_of()), + true, + ::common::errors::InvalidArgument( + "loop var's type must be int32 or int64")); + PADDLE_ENFORCE_EQ((min->type() == type_of()) || + (min->type() == type_of()), + true, + ::common::errors::InvalidArgument( + "loop min's type must be int32 or int64")); + PADDLE_ENFORCE_EQ((extent->type() == type_of()) || + (extent->type() == type_of()), + true, + ::common::errors::InvalidArgument( + "loop extent's type must be int32 or int64")); } void PolyFor::Verify() const { diff --git a/paddle/cinn/ir/ir_base.h b/paddle/cinn/ir/ir_base.h index eeba03a0978ea..84e14cc839c15 100644 --- a/paddle/cinn/ir/ir_base.h +++ b/paddle/cinn/ir/ir_base.h @@ -402,6 +402,11 @@ struct UnaryOpNode : public ExprNode { return v().type(); } + void replace(Expr old_op, Expr new_op) { + if (v() == old_op) { + v() = new_op; + } + } Expr& v() { return operands().front(); } const Expr& v() const { return operands().front(); } diff --git a/paddle/cinn/ir/schedule/impl/base.cc b/paddle/cinn/ir/schedule/impl/base.cc index 24583a67374e7..e68a5396578b0 100644 --- a/paddle/cinn/ir/schedule/impl/base.cc +++ b/paddle/cinn/ir/schedule/impl/base.cc @@ -92,7 +92,7 @@ void DyScheduleImpl::MergeExprs() { } } for (auto& block : merged_block) { - VLOG(3) << "in merged_block, it has " << block; + VLOG(3) << "in merged_block, it has \n" << block; } auto merged_expr = ir::Block::Make(merged_block); exprs[0] diff --git a/paddle/cinn/ir/schedule/schedule_desc.proto b/paddle/cinn/ir/schedule/schedule_desc.proto index 829478cf22dd4..ed6d8bef92dbb 100644 --- a/paddle/cinn/ir/schedule/schedule_desc.proto +++ b/paddle/cinn/ir/schedule/schedule_desc.proto @@ -1,11 +1,11 @@ // Copyright (c) 2022 CINN Authors. All Rights Reserved. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/cinn/ir/test/tensor_test.cc b/paddle/cinn/ir/test/tensor_test.cc index 4bf64f309735e..143b5fcd4d18a 100644 --- a/paddle/cinn/ir/test/tensor_test.cc +++ b/paddle/cinn/ir/test/tensor_test.cc @@ -59,7 +59,7 @@ function func_C (_A, _B, _D) { serial for (j, 0, 20) { - D[i, j] = (1.00000000f + ((2.00000000f * A[i, j]) + (2.00000000f * B[i, j]))) + D[i, j] = (((A[i, j] + B[i, j]) * 2.00000000f) + 1.00000000f) } } } @@ -117,7 +117,7 @@ void fn(void* _args, int32_t num_args) for (int32_t i = 0; i < 10; i += 1) { for (int32_t j = 0; j < 10; j += 1) { for (int32_t k = 0; k < 100; k += 1) { - B[((1000 * i) + ((100 * j) + k))] = (2.00000000f * A_reshape[((1000 * i) + ((100 * j) + k))]); + B[((1000 * i) + ((100 * j) + k))] = (A_reshape[((1000 * i) + ((100 * j) + k))] * 2.00000000f); }; }; }; @@ -175,7 +175,7 @@ void fn(void* _args, int32_t num_args) for (int32_t i = 0; i < 10; i += 1) { for (int32_t j = 0; j < 10; j += 1) { for (int32_t k = 0; k < 100; k += 1) { - B[((1000 * i) + ((100 * j) + k))] = (2.00000000f * A_copied_reshape[((1000 * i) + ((100 * j) + k))]); + B[((1000 * i) + ((100 * j) + k))] = (A_copied_reshape[((1000 * i) + ((100 * j) + k))] * 2.00000000f); }; }; }; diff --git a/paddle/cinn/lang/lower_test.cc b/paddle/cinn/lang/lower_test.cc index 25b0bb20f1956..abb9f96b6dd72 100644 --- a/paddle/cinn/lang/lower_test.cc +++ b/paddle/cinn/lang/lower_test.cc @@ -53,7 +53,7 @@ TEST(lower, basic) { { serial for (j, 0, 15) { - B[i, j] = (1.00000000f + A[i, j]) + B[i, j] = (A[i, j] + 1.00000000f) } } } diff --git a/paddle/cinn/optim/cache_read_write_replace_test.cc b/paddle/cinn/optim/cache_read_write_replace_test.cc index 86206d8515287..2769b7913eb00 100755 --- a/paddle/cinn/optim/cache_read_write_replace_test.cc +++ b/paddle/cinn/optim/cache_read_write_replace_test.cc @@ -105,7 +105,7 @@ function fn (_A, _B, _C1_write_cache) { serial for (j, 0, 100) { - C1_write_cache[i, j] = (3.00000000f + A[i, j]) + C1_write_cache[i, j] = (((A[i, j] + 1.00000000f) + 1.00000000f) + 1.00000000f) } } serial for (i, 0, 100) diff --git a/paddle/cinn/optim/eliminate_common_factor_of_local_index.cc b/paddle/cinn/optim/eliminate_common_factor_of_local_index.cc index 85f8153bb65d4..362e6bff8a113 100644 --- a/paddle/cinn/optim/eliminate_common_factor_of_local_index.cc +++ b/paddle/cinn/optim/eliminate_common_factor_of_local_index.cc @@ -357,10 +357,10 @@ void EliminateCommonFactorHelper(ir::Expr* expr) { } void EliminateCommonFactorOfLocalIndex(ir::Expr* expr) { - VLOG(2) << "Before EliminateCommonFactorOfLocalIndex, Expr = \n" << *expr; + VLOG(4) << "Before EliminateCommonFactorOfLocalIndex, Expr = \n" << *expr; EliminateCommonFactorHelper(expr); EliminateCommonFactorHelper(expr); - VLOG(2) << "After EliminateCommonFactorOfLocalIndex, Expr = \n" << *expr; + VLOG(4) << "After EliminateCommonFactorOfLocalIndex, Expr = \n" << *expr; } } // namespace optim diff --git a/paddle/cinn/optim/ir_simplify_test.cc b/paddle/cinn/optim/ir_simplify_test.cc index fd2b5be74d062..561b60119f078 100755 --- a/paddle/cinn/optim/ir_simplify_test.cc +++ b/paddle/cinn/optim/ir_simplify_test.cc @@ -41,7 +41,7 @@ TEST(IrSimplify, basic) { // get (((C[(i * 20)] + 0) + 100) + 24.5) Simplify(&B); LOG(INFO) << "simplified: " << B; - auto out = "(124.500000f + C[i, 0])"; + auto out = "(((C[i, 0] + 0.00000000f) + 100.000000f) + 24.5000000f)"; EXPECT_EQ(out, utils::GetStreamCnt(B)); } @@ -69,7 +69,7 @@ TEST(IrSimplify, basic) { { serial for (j, 0, 20) { - B[i, j] = (125.000000f + (X[i, j] + y[i, 0])) + B[i, j] = ((((((X[i, j] + (y[i, 0] * 1.00000000f)) + (0.00000000f * X[i, j])) + 25.0000000f) + 100.000000f) - 0.00000000f) + 0.00000000f) } } } @@ -104,7 +104,7 @@ TEST(IrSimplify, basic) { { serial for (j, 0, 20) { - B[i, j] = ((y[i, 0] / 3.00000000f) + (125.000000f + X[(1000 * i), 0])) + B[i, j] = ((((((X[(1000 * i), 0] + (y[i, 0] / 3.00000000f)) + (0.00000000f * X[i, j])) + 25.0000000f) + 100.000000f) - 0.00000000f) + 0.00000000f) } } } diff --git a/paddle/cinn/optim/optimize_test.cc b/paddle/cinn/optim/optimize_test.cc index bd1515fd7924c..db667a61cd86f 100755 --- a/paddle/cinn/optim/optimize_test.cc +++ b/paddle/cinn/optim/optimize_test.cc @@ -41,11 +41,11 @@ TEST(Optimize, Unroll) { { serial for (j_outer, 0, 4) { - C[i, (5 * j_outer)] = (1.00000000f + A[i, (5 * j_outer)]) - C[i, (1 + (5 * j_outer))] = (1.00000000f + A[i, (1 + (5 * j_outer))]) - C[i, (2 + (5 * j_outer))] = (1.00000000f + A[i, (2 + (5 * j_outer))]) - C[i, (3 + (5 * j_outer))] = (1.00000000f + A[i, (3 + (5 * j_outer))]) - C[i, (4 + (5 * j_outer))] = (1.00000000f + A[i, (4 + (5 * j_outer))]) + C[i, (5 * j_outer)] = (A[i, (5 * j_outer)] + 1.00000000f) + C[i, (1 + (5 * j_outer))] = (A[i, (1 + (5 * j_outer))] + 1.00000000f) + C[i, (2 + (5 * j_outer))] = (A[i, (2 + (5 * j_outer))] + 1.00000000f) + C[i, (3 + (5 * j_outer))] = (A[i, (3 + (5 * j_outer))] + 1.00000000f) + C[i, (4 + (5 * j_outer))] = (A[i, (4 + (5 * j_outer))] + 1.00000000f) } } } diff --git a/paddle/cinn/optim/resize_buffer.cc b/paddle/cinn/optim/resize_buffer.cc index 2ec4e172b3fc7..7faba111c2521 100644 --- a/paddle/cinn/optim/resize_buffer.cc +++ b/paddle/cinn/optim/resize_buffer.cc @@ -249,6 +249,7 @@ class ResizeBufferFromAnalyzedRange : public ir::IRMutator<> { ir::Store* store = expr->As(); ir::Tensor tensor = store->tensor.as_tensor_ref(); ResizeTensor(&tensor); + ReplaceTensorIndices(store); ir::IRMutator<>::Visit(op, expr); } @@ -264,11 +265,8 @@ class ResizeBufferFromAnalyzedRange : public ir::IRMutator<> { return; } - const std::string& buffer_name = load->tensor.as_tensor_ref()->buffer->name; - if (buffer_name_to_shape_.count(buffer_name) > 0) { - load->tensor.as_tensor_ref()->shape = - buffer_name_to_shape_.at(buffer_name); - } + ir::Tensor tensor = load->tensor.as_tensor_ref(); + ResizeTensor(&tensor); // For the moment, align the load tensor indices with the tensor shape using // the trick method. A better way would be to modify the FlattenLoop @@ -277,6 +275,7 @@ class ResizeBufferFromAnalyzedRange : public ir::IRMutator<> { for (int i = 0; i < cnt; i++) { load->indices.erase(load->indices.begin()); } + ReplaceTensorIndices(load); ir::IRMutator<>::Visit(op, expr); } @@ -304,6 +303,35 @@ class ResizeBufferFromAnalyzedRange : public ir::IRMutator<> { } } + template + void ReplaceTensorIndices(T* op) { + ir::Tensor tensor = op->tensor.as_tensor_ref(); + ir::Buffer buffer = tensor->buffer; + if (!buffer.defined()) return; + if (buffer->memory_type != ir::MemoryType::GPULocal) return; + + VLOG(4) << "replacing index of tensor: " << tensor->name; + ir::Expr index_expr = op->index(); + std::unordered_map var_name_to_expr; + ir::ir_utils::CollectIRNodes(index_expr, [&](const ir::Expr* x) { + const ir::_Var_* var = x->as_var(); + if (var) { + var_name_to_expr[var->name] = var->Copy(); + } + return false; + }); + if (var_name_to_expr.size() != 1) { + return; + } + + ir::Expr single_var = var_name_to_expr.begin()->second; + VLOG(4) << "found single var: " << single_var; + for (size_t i = 0; i + 1 < op->indices.size(); i++) { + op->indices[i] = ir::Expr(0); + } + op->indices.back() = single_var; + } + private: const std::unordered_map>& buffer_name_to_shape_; diff --git a/paddle/cinn/optim/transform_gpu_forloop.cc b/paddle/cinn/optim/transform_gpu_forloop.cc index 4e5d5f4c5ae8e..5d4629436d7e6 100644 --- a/paddle/cinn/optim/transform_gpu_forloop.cc +++ b/paddle/cinn/optim/transform_gpu_forloop.cc @@ -426,7 +426,7 @@ class ReplaceVarToZero : public ir::IRMutator<> { }; void OptimizeExprGPU(Expr *expr) { - VLOG(2) << "Before Optimize Expr:\n" << *expr; + VLOG(4) << "Before Optimize Expr:\n" << *expr; // copy var nodes to prevent one modification leading to multiple changes RestructureVarNodes restructure_var_nodes; @@ -458,7 +458,7 @@ void OptimizeExprGPU(Expr *expr) { ReplaceVarToZero replace_var_to_zero; replace_var_to_zero(expr); - VLOG(2) << "After Optimize Expr: \n" << *expr; + VLOG(4) << "After Optimize Expr: \n" << *expr; } } // namespace optim diff --git a/paddle/cinn/poly/schedule_test.cc b/paddle/cinn/poly/schedule_test.cc index af63ee34f4334..23710c93d4256 100755 --- a/paddle/cinn/poly/schedule_test.cc +++ b/paddle/cinn/poly/schedule_test.cc @@ -48,7 +48,7 @@ TEST(CreateStages, compute_at) { { serial for (j, 0, 100) { - B[i, j] = (1.00000000f + A[i, j]) + B[i, j] = (A[i, j] + 1.00000000f) serial for (k, 0, 100) { C[i, j, k] = (B[i, j] * B[j, k]) @@ -99,21 +99,21 @@ TEST(CreateStages, buffer_bind_to_multiple_tensors_schedule) { { serial for (j, 0, 100) { - B[i, j] = (1.00000000f + A[i, j]) + B[i, j] = (A[i, j] + 1.00000000f) } } serial for (i, 0, 100) { serial for (j, 0, 100) { - C[i, j] = (1.00000000f + A[i, j]) + C[i, j] = (A[i, j] + 1.00000000f) } } serial for (i, 0, 100) { serial for (j, 0, 100) { - D[i, j] = (1.00000000f + A[i, j]) + D[i, j] = (A[i, j] + 1.00000000f) } } } diff --git a/paddle/cinn/poly/stage_test.cc b/paddle/cinn/poly/stage_test.cc index e8cbf9dd8ff87..2c01b9b9de617 100644 --- a/paddle/cinn/poly/stage_test.cc +++ b/paddle/cinn/poly/stage_test.cc @@ -207,7 +207,7 @@ function fn (_A, _A1, _B) } serial for (j, 0, 32) { - B[((16 * i_outer) + i_inner), j] = (A1[((16 * i_outer) + i_inner), j] + (A1[(1 + ((16 * i_outer) + i_inner)), j] + A1[(2 + ((16 * i_outer) + i_inner)), j])) + B[((16 * i_outer) + i_inner), j] = ((A1[((16 * i_outer) + i_inner), j] + A1[(1 + ((16 * i_outer) + i_inner)), j]) + A1[(2 + ((16 * i_outer) + i_inner)), j]) } } } @@ -431,7 +431,7 @@ function fn (_A, _C) { serial for (j, 0, 200) { - C[i, j] = (6.00000000f + (2.00000000f * A[i, j])) + C[i, j] = ((((A[i, j] + 1.00000000f) + 1.00000000f) + 1.00000000f) * 2.00000000f) } } } @@ -475,21 +475,21 @@ function fn (_A, _C, _C1, _C2) { serial for (j, 0, 200) { - C2[i, j] = (6.00000000f + (2.00000000f * A[i, j])) + C2[i, j] = ((((A[i, j] + 1.00000000f) + 1.00000000f) + 1.00000000f) * 2.00000000f) } } serial for (i, 0, 100) { serial for (j, 0, 200) { - C1[i, j] = (4.00000000f + (2.00000000f * A[i, j])) + C1[i, j] = (((A[i, j] + 1.00000000f) + 1.00000000f) * 2.00000000f) } } serial for (i, 0, 100) { serial for (j, 0, 200) { - C[i, j] = (2.00000000f + (2.00000000f * A[i, j])) + C[i, j] = ((A[i, j] + 1.00000000f) * 2.00000000f) } } } diff --git a/paddle/cinn/pybind/backends.cc b/paddle/cinn/pybind/backends.cc index 4e589380223df..a0f51bc88aad8 100644 --- a/paddle/cinn/pybind/backends.cc +++ b/paddle/cinn/pybind/backends.cc @@ -61,7 +61,10 @@ void BindExecutionEngine(py::module *m) { &ExecutionEngine::Create)), py::arg("options") = ExecutionOptions()) .def("lookup", lookup) - .def("link", &ExecutionEngine::Link); + .def("link", + &ExecutionEngine::Link, + py::arg("module"), + py::arg("add_module") = true); { auto lookup = [](Compiler &self, absl::string_view name) { diff --git a/paddle/cinn/runtime/flags.cc b/paddle/cinn/runtime/flags.cc index a0250b0174c52..a76bb16706c52 100644 --- a/paddle/cinn/runtime/flags.cc +++ b/paddle/cinn/runtime/flags.cc @@ -88,14 +88,6 @@ PD_DEFINE_bool(group_schedule_tiling_first, BoolFromEnv("FLAGS_group_schedule_tiling_first", false), "Whether to enable new group scheduler tiling first strategy."); -PD_DEFINE_bool(support_reduce_stride_read, - BoolFromEnv("FLAGS_support_reduce_stride_read", false), - "Whether to enable stride read in reduced dim."); - -PD_DEFINE_bool(support_trivial_stride_read, - BoolFromEnv("FLAGS_support_trivial_stride_read", false), - "Whether to enable stride read in trivial dim."); - PD_DEFINE_bool(cinn_use_common_subexpression_elimination, BoolFromEnv("FLAGS_cinn_use_common_subexpression_elimination", false), diff --git a/paddle/common/flags.cc b/paddle/common/flags.cc index 9e4da57143980..1b888aeed0d66 100644 --- a/paddle/common/flags.cc +++ b/paddle/common/flags.cc @@ -1453,6 +1453,10 @@ PHI_DEFINE_EXPORTED_bool(logging_trunc_pir_py_code, "whether truncate the logging files under directory " "FLAGS_logging_pir_py_code_dir"); +PHI_DEFINE_EXPORTED_bool(logging_pir_py_code_dump_symbolic_dims, + false, + "whether dump symbolic dims into pir py code."); + /** * Using PIR API in Python * Name: enable_pir_api @@ -1612,6 +1616,11 @@ PHI_DEFINE_EXPORTED_bool(pir_apply_shape_optimization_pass, "Whether to apply shape_optimization pass " "to infer symbolic shape"); +PHI_DEFINE_EXPORTED_int64( + pir_broadcast_tree_limit, + 32, + "Maximum number of broadcast nodes allowed in a tree"); + PHI_DEFINE_EXPORTED_string( nvidia_package_dir, // NOLINT "", diff --git a/paddle/common/flags_native.cc b/paddle/common/flags_native.cc index 5801b32667d6f..12af71499dec2 100644 --- a/paddle/common/flags_native.cc +++ b/paddle/common/flags_native.cc @@ -25,8 +25,7 @@ #include #include -namespace paddle { -namespace flags { +namespace paddle::flags { std::stringstream& ErrorStream() { static std::stringstream err_ss; @@ -554,5 +553,4 @@ INSTANTIATE_GET_FROM_ENV(std::string); #undef INSTANTIATE_GET_FROM_ENV -} // namespace flags -} // namespace paddle +} // namespace paddle::flags diff --git a/paddle/fluid/distributed/collective/common.cc b/paddle/fluid/distributed/collective/common.cc index e60ecf9b8dcb5..159e9bd2dfdfb 100644 --- a/paddle/fluid/distributed/collective/common.cc +++ b/paddle/fluid/distributed/collective/common.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/collective/common.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { std::vector GetPlaceList(const std::vector& tensors) { std::vector places; @@ -65,5 +64,4 @@ bool CheckTensorsInXPUPlace(const std::vector& tensors) { }); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/collective/gloo_send_recv.cc b/paddle/fluid/distributed/collective/gloo_send_recv.cc index 970cb6ec93dc2..c7d4b67d6dd7a 100644 --- a/paddle/fluid/distributed/collective/gloo_send_recv.cc +++ b/paddle/fluid/distributed/collective/gloo_send_recv.cc @@ -20,8 +20,7 @@ #include "gloo/types.h" #include "paddle/fluid/distributed/collective/gloo_send_recv.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { void send_recv(SendRecvOptions* opts) { const auto& context = opts->context; @@ -38,5 +37,4 @@ void send_recv(SendRecvOptions* opts) { } } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/collective/process_group.cc b/paddle/fluid/distributed/collective/process_group.cc index f151c041c7412..4edbe8ca0e2f6 100644 --- a/paddle/fluid/distributed/collective/process_group.cc +++ b/paddle/fluid/distributed/collective/process_group.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/collective/process_group.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { bool ProcessGroup::Task::IsCompleted() { std::lock_guard lock(mutex_); @@ -53,5 +52,4 @@ void ProcessGroupIdMap::DestroyProcessGroup() { id_map.clear(); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/collective/process_group_gloo.cc b/paddle/fluid/distributed/collective/process_group_gloo.cc index 283409329ea93..2b6724d75fe90 100644 --- a/paddle/fluid/distributed/collective/process_group_gloo.cc +++ b/paddle/fluid/distributed/collective/process_group_gloo.cc @@ -32,8 +32,7 @@ #include "paddle/phi/api/lib/data_transform.h" #include "paddle/phi/core/distributed/comm_context_manager.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { #ifdef _WIN32 #define GENERATE_FUNC(type, func, ...) \ @@ -727,5 +726,4 @@ phi::distributed::GlooCommContext* ProcessGroupGloo::GetCommContext() { return comm_context; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/collective/process_group_nccl.cc b/paddle/fluid/distributed/collective/process_group_nccl.cc index f98cc9ac63bf0..7e6aaba62b92c 100644 --- a/paddle/fluid/distributed/collective/process_group_nccl.cc +++ b/paddle/fluid/distributed/collective/process_group_nccl.cc @@ -42,8 +42,7 @@ COMMON_DECLARE_bool(enable_async_trace); constexpr bool FLAGS_enable_nccl_dynamic_check = false; constexpr int64_t kWaitBlockTImeout = 10; -namespace paddle { -namespace distributed { +namespace paddle::distributed { using phi::distributed::CheckSizeOnEachRank; using phi::distributed::IsP2POP; @@ -1045,5 +1044,4 @@ phi::distributed::NCCLCommContext* ProcessGroupNCCL::GetCommContext( return comm_context; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/common/afs_warpper.cc b/paddle/fluid/distributed/common/afs_warpper.cc index af9dcd285500b..a733b8d985a14 100644 --- a/paddle/fluid/distributed/common/afs_warpper.cc +++ b/paddle/fluid/distributed/common/afs_warpper.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/framework/io/fs.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { // AfsClient impl int AfsClient::initialize(const FsClientParameter& fs_client_param) { // temporarily implemented with hdfs-client @@ -96,5 +95,4 @@ std::vector AfsClient::list(const std::string& path) { bool AfsClient::exist(const std::string& dir) { return paddle::framework::fs_exists(dir); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/amplifier_interceptor.cc b/paddle/fluid/distributed/fleet_executor/amplifier_interceptor.cc index a166ff0b6dfa2..68cf8cd13d255 100644 --- a/paddle/fluid/distributed/fleet_executor/amplifier_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/amplifier_interceptor.cc @@ -17,8 +17,7 @@ #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/operator.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { AmplifierInterceptor::AmplifierInterceptor(int64_t interceptor_id, TaskNode* node) @@ -56,5 +55,4 @@ void AmplifierInterceptor::ReplyCompletedToUpStream() { REGISTER_INTERCEPTOR(Amplifier, AmplifierInterceptor); -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/message_bus.cc b/paddle/fluid/distributed/fleet_executor/message_bus.cc index c90d1503947bf..d65145f556053 100644 --- a/paddle/fluid/distributed/fleet_executor/message_bus.cc +++ b/paddle/fluid/distributed/fleet_executor/message_bus.cc @@ -23,8 +23,7 @@ #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { void MessageBus::Init( int64_t rank, @@ -250,5 +249,4 @@ bool MessageBus::SendInterRank(int64_t dst_rank, #endif -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/runtime_graph.cc b/paddle/fluid/distributed/fleet_executor/runtime_graph.cc index a5f90062dcfd9..d2d46f31d2765 100644 --- a/paddle/fluid/distributed/fleet_executor/runtime_graph.cc +++ b/paddle/fluid/distributed/fleet_executor/runtime_graph.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/distributed/fleet_executor/task_node.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { std::string RuntimeGraph::DebugString() const { std::ostringstream os; @@ -29,5 +28,4 @@ std::string RuntimeGraph::DebugString() const { return os.str(); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/task_loop.cc b/paddle/fluid/distributed/fleet_executor/task_loop.cc index 270bce7786038..44e853a0d9684 100644 --- a/paddle/fluid/distributed/fleet_executor/task_loop.cc +++ b/paddle/fluid/distributed/fleet_executor/task_loop.cc @@ -17,8 +17,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { thread_local TaskLoop* TaskLoop::thread_local_loop_ = nullptr; @@ -81,5 +80,4 @@ void TaskLoop::AbortNotInLoopThread() { std::this_thread::get_id())); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc b/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc index 848096eb4442f..3ec9c50c05d98 100644 --- a/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc +++ b/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc @@ -18,8 +18,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { TaskLoopThread::TaskLoopThread() : start_(false), loop_(nullptr) {} @@ -56,5 +55,4 @@ void TaskLoopThread::Loop() { loop_ = nullptr; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc b/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc index 0a4e704590f0b..b0e0c498f63be 100644 --- a/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc +++ b/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc @@ -19,8 +19,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { TaskLoopThreadPool::TaskLoopThreadPool() : TaskLoopThreadPool(1) {} @@ -73,5 +72,4 @@ std::vector TaskLoopThreadPool::GetAllLoops() { return loops_; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps.proto b/paddle/fluid/distributed/ps.proto index 27a93a9787ff5..e7e708a2ee4f9 100644 --- a/paddle/fluid/distributed/ps.proto +++ b/paddle/fluid/distributed/ps.proto @@ -1,11 +1,11 @@ // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index b79dfaab3f200..22ce67c12a132 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -20,18 +20,15 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace distributed { +namespace paddle::distributed { framework::proto::VarType::Type VarMessageToVarType( VariableMessage::Type type) { @@ -343,5 +340,4 @@ std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port) { return int_ip_port; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/coordinator_client.cc b/paddle/fluid/distributed/ps/service/coordinator_client.cc index bf8233ec975fd..4f7381e6e8655 100644 --- a/paddle/fluid/distributed/ps/service/coordinator_client.cc +++ b/paddle/fluid/distributed/ps/service/coordinator_client.cc @@ -25,8 +25,7 @@ static const int MIN_PORT = 8500; static const int MAX_PORT = 65535; -namespace paddle { -namespace distributed { +namespace paddle::distributed { PD_DEFINE_uint64(total_fl_client_size, 100, "supported total fl client size"); PD_DEFINE_uint32(coordinator_wait_all_clients_max_time, 60, "uint32: s"); @@ -201,5 +200,4 @@ void CoordinatorClient::SendFLStrategy(const uint32_t& client_id) { return; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc index 9133b406424e4..99eccec948397 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc +++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc @@ -24,8 +24,7 @@ #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/framework/archive.h" #include "paddle/fluid/platform/profiler.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { #define CHECK_TABLE_EXIST(table, request, response) \ if (table == NULL) { \ @@ -704,5 +703,4 @@ int32_t GraphBrpcService::graph_set_node_feat(Table *table, return 0; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.cc b/paddle/fluid/distributed/ps/service/ps_local_client.cc index f98103fe28968..d864ab95724ca 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_local_client.cc @@ -15,8 +15,7 @@ #include "paddle/fluid/distributed/ps/service/ps_local_client.h" #include "paddle/fluid/distributed/ps/table/table.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int32_t PsLocalClient::Initialize() { const auto& downpour_param = _config.server_param().downpour_server_param(); TableManager::Instance().Initialize(); @@ -329,5 +328,4 @@ ::std::future PsLocalClient::SetDayId(size_t table_id, int day_id) { return done(); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc index ff4035a39d30f..342e113288a06 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc @@ -21,8 +21,7 @@ #include "paddle/fluid/distributed/ps/table/table.h" #include "paddle/fluid/framework/archive.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { std::vector GraphPyService::split(const std::string& str, const char pattern) { std::vector res; @@ -507,5 +506,4 @@ void GraphPyClient::StopServer() { if (status.get() == 0) stoped_ = true; } void GraphPyClient::FinalizeWorker() { this->worker_ptr->FinalizeWorker(); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/ps_service/service.cc b/paddle/fluid/distributed/ps/service/ps_service/service.cc index b4402bea10ed4..124ae0d8b0837 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/service.cc +++ b/paddle/fluid/distributed/ps/service/ps_service/service.cc @@ -25,8 +25,7 @@ using namespace std; // NOLINT -namespace paddle { -namespace distributed { +namespace paddle::distributed { ::paddle::distributed::PSParameter load_from_prototxt( const std::string& filename) { @@ -134,5 +133,4 @@ int PSCore::StopServer() { return 0; } ::paddle::distributed::PSParameter* PSCore::GetParam() { return &_ps_param; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/service/server.cc b/paddle/fluid/distributed/ps/service/server.cc index 10951ba5dc428..2937d691c980b 100755 --- a/paddle/fluid/distributed/ps/service/server.cc +++ b/paddle/fluid/distributed/ps/service/server.cc @@ -20,8 +20,7 @@ #include "paddle/fluid/distributed/ps/service/ps_local_server.h" #include "paddle/fluid/distributed/ps/table/table.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { REGISTER_PSCORE_CLASS(PSServer, BrpcPsServer); REGISTER_PSCORE_CLASS(PSServer, PsLocalServer); @@ -107,5 +106,4 @@ int32_t PSServer::Configure( return Initialize(); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/barrier_table.cc b/paddle/fluid/distributed/ps/table/barrier_table.cc index f665a024a78af..b00a283c6b754 100644 --- a/paddle/fluid/distributed/ps/table/barrier_table.cc +++ b/paddle/fluid/distributed/ps/table/barrier_table.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/ps/table/common_table.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int32_t BarrierTable::Initialize() { auto trainers = _config.common().trainer_num(); @@ -74,5 +73,4 @@ int32_t BarrierTable::SetTableMap( return 0; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_accessor.cc index 70954f0b7ad96..b5f185ed1f00e 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.cc @@ -18,8 +18,7 @@ #include "paddle/common/flags.h" #include "paddle/utils/string/string_helper.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int CtrCommonAccessor::Initialize() { auto name = _config.embed_sgd_param().name(); @@ -341,5 +340,4 @@ int CtrCommonAccessor::ParseFromString(const std::string& str, float* value) { return ret; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/graph/graph_edge.cc b/paddle/fluid/distributed/ps/table/graph/graph_edge.cc index 038dcf950ab50..dbd778cdcf055 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_edge.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/ps/table/graph/graph_edge.h" #include -namespace paddle { -namespace distributed { +namespace paddle::distributed { void GraphEdgeBlob::add_edge(int64_t id, float weight = 1) { id_arr.push_back(id); @@ -27,5 +26,4 @@ void WeightedGraphEdgeBlob::add_edge(int64_t id, float weight = 1) { weight_arr.push_back((half)weight); #endif } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/graph/graph_node.cc b/paddle/fluid/distributed/ps/table/graph/graph_node.cc index 31c098c49fba2..fa8fa61a23eab 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_node.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_node.cc @@ -15,8 +15,7 @@ #include "paddle/fluid/distributed/ps/table/graph/graph_node.h" #include -namespace paddle { -namespace distributed { +namespace paddle::distributed { GraphNode::~GraphNode() { if (sampler != nullptr) { @@ -122,5 +121,4 @@ void FeatureNode::recover_from_buffer(char* buffer) { feature.push_back(str); // NOLINT } } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc index 86871154ca23f..a27b82c812a55 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc @@ -19,8 +19,7 @@ #include #include "paddle/phi/core/generator.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { void RandomSampler::build(GraphEdgeBlob *edges) { this->edges = edges; } @@ -30,6 +29,7 @@ std::vector RandomSampler::sample_k( if (k >= n) { k = n; std::vector sample_result; + sample_result.reserve(k); for (int i = 0; i < k; i++) { sample_result.push_back(i); } @@ -116,6 +116,7 @@ std::vector WeightedSampler::sample_k( if (k >= count) { k = count; std::vector sample_result; + sample_result.reserve(k); for (int i = 0; i < k; i++) { sample_result.push_back(i); } @@ -164,5 +165,4 @@ int WeightedSampler::sample( subtract_count_map[this]++; return return_idx; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/memory_dense_table.cc b/paddle/fluid/distributed/ps/table/memory_dense_table.cc index 2e68bdce1931f..9f6abd17ef2bf 100644 --- a/paddle/fluid/distributed/ps/table/memory_dense_table.cc +++ b/paddle/fluid/distributed/ps/table/memory_dense_table.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int FLAGS_pslib_table_save_max_retry_dense = 3; @@ -416,5 +415,4 @@ int32_t MemoryDenseTable::Save(const std::string &path, return feasign_size; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc index 4fd627bbf807c..4614978d41e51 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc +++ b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int32_t MemorySparseGeoTable::Pull(TableContext& context) { CHECK(context.value_type == Sparse); @@ -242,5 +241,4 @@ int32_t MemorySparseGeoTable::_PushSparse(const uint64_t* keys, return 0; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/sparse_accessor.cc b/paddle/fluid/distributed/ps/table/sparse_accessor.cc index 5689ccfe7a594..91e83015b6631 100644 --- a/paddle/fluid/distributed/ps/table/sparse_accessor.cc +++ b/paddle/fluid/distributed/ps/table/sparse_accessor.cc @@ -18,8 +18,7 @@ #include "paddle/common/flags.h" #include "paddle/utils/string/string_helper.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { int SparseAccessor::Initialize() { auto name = _config.embed_sgd_param().name(); @@ -304,5 +303,4 @@ int SparseAccessor::ParseFromString(const std::string& str, float* value) { return ret; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc index d9b490a80bba6..fd6744df6edb7 100644 --- a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc +++ b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc @@ -20,8 +20,7 @@ PD_DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient"); -namespace paddle { -namespace distributed { +namespace paddle::distributed { void SparseNaiveSGDRule::LoadConfig(const SparseCommonSGDRuleParameter ¶m, size_t emb_dim) { @@ -395,5 +394,4 @@ void SparseAdaGradV2SGDRule::InitValueWork(float *value, sgd[G2SumIndex()] = 0; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/rpc/python_rpc_handler.cc b/paddle/fluid/distributed/rpc/python_rpc_handler.cc index 13322114def64..1daf9ffc1dace 100644 --- a/paddle/fluid/distributed/rpc/python_rpc_handler.cc +++ b/paddle/fluid/distributed/rpc/python_rpc_handler.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/distributed/rpc/python_rpc_handler.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { constexpr auto kInternalModule = "paddle.distributed.rpc.internal"; py::object getFunction(const py::object& module, const char* name) { @@ -63,5 +62,4 @@ std::shared_ptr PythonRpcHandler::GetInstance() { return python_rpc_handler_; } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/rpc/rpc.proto b/paddle/fluid/distributed/rpc/rpc.proto index 2da9e37ae88d9..d9bd22aa974fc 100644 --- a/paddle/fluid/distributed/rpc/rpc.proto +++ b/paddle/fluid/distributed/rpc/rpc.proto @@ -1,11 +1,11 @@ // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/distributed/test/barrier_table_test.cc b/paddle/fluid/distributed/test/barrier_table_test.cc index 31f0f0844345c..bb2885480d72a 100644 --- a/paddle/fluid/distributed/test/barrier_table_test.cc +++ b/paddle/fluid/distributed/test/barrier_table_test.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/table/table.h" #include "paddle/fluid/distributed/the_one_ps.pb.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { TEST(BarrierTable, Barrier) { int trainers = 2; @@ -63,5 +62,4 @@ TEST(BarrierTable, Barrier) { ASSERT_EQ(ret, 0); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 3c98dd7667ade..ac691307ae1e2 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -19,11 +19,9 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/phi/kernels/funcs/math_function.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/distributed/test/ctr_accessor_test.cc b/paddle/fluid/distributed/test/ctr_accessor_test.cc index 36ba1426fe3d4..9aa4f9fb56050 100644 --- a/paddle/fluid/distributed/test/ctr_accessor_test.cc +++ b/paddle/fluid/distributed/test/ctr_accessor_test.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h" #include "paddle/fluid/distributed/the_one_ps.pb.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule); @@ -315,5 +314,4 @@ TEST(downpour_feature_value_accessor_test, test_string_related) { ASSERT_FLOAT_EQ(value[i], 0); } } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc b/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc index 429248a6eb4eb..48724b9336804 100644 --- a/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc +++ b/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h" #include "paddle/fluid/distributed/the_one_ps.pb.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, StdAdaGradSGDRule); REGISTER_PSCORE_CLASS(SparseValueSGDRule, SparseAdamSGDRule); @@ -171,5 +170,4 @@ TEST(downpour_feature_value_accessor_test, test_string_related) { ASSERT_NE(acc->ParseFromString(str, value), 0); // make sure init_zero=true } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/test/feature_value_test.cc b/paddle/fluid/distributed/test/feature_value_test.cc index 6e848c3e2f4e4..1852293177641 100644 --- a/paddle/fluid/distributed/test/feature_value_test.cc +++ b/paddle/fluid/distributed/test/feature_value_test.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include "gtest/gtest.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { TEST(BENCHMARK, LargeScaleKV) { typedef SparseTableShard shard_type; @@ -46,5 +45,4 @@ TEST(BENCHMARK, LargeScaleKV) { ASSERT_FLOAT_EQ(value_data[3], 0.3); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/distributed/test/table_test.cc b/paddle/fluid/distributed/test/table_test.cc index 8908891d9f14f..8e1161a4944b0 100644 --- a/paddle/fluid/distributed/test/table_test.cc +++ b/paddle/fluid/distributed/test/table_test.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/fluid/distributed/the_one_ps.pb.h" // #include "paddle/fluid/distributed/ps/table/sparse_geo_table.h" -namespace paddle { -namespace distributed { +namespace paddle::distributed { TEST(Table, Initialize) { TableParameter table_config; @@ -29,5 +28,4 @@ TEST(Table, Initialize) { auto ret = table->Initialize(table_config, fs_config); ASSERT_EQ(ret, -1); } -} // namespace distributed -} // namespace paddle +} // namespace paddle::distributed diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index 5667a86876e19..041ea3ec3a286 100755 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -13,6 +13,10 @@ set(eager_deps grad_tensor_holder custom_operator_node) +if(WITH_GPU OR WITH_ROCM) + set(eager_deps ${eager_deps} phi_kernel_gpu) +endif() + if(NOT (NOT WITH_PYTHON AND ON_INFER)) set(eager_deps ${eager_deps} accumulation_node prim_utils) endif() diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 853a0c445797c..247651ae149f5 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -18,6 +18,7 @@ #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tensor_wrapper.h" #include "paddle/fluid/framework/executor_cache.h" +#include "paddle/fluid/framework/feed_hook.h" #include "paddle/fluid/framework/new_executor/interpretercore.h" #include "paddle/fluid/framework/tensor_ref_array.h" #include "paddle/fluid/framework/variable_helper.h" @@ -583,6 +584,7 @@ inline void PirRunProgramAPI( //} } + paddle::framework::RunFeedHooks(*forward_program, *global_inner_scope); // interpretercore run if (!forward_program->block()->empty()) { paddle::platform::RecordEvent record_event( @@ -869,7 +871,6 @@ inline void RunProgramGradAPI( auto *backward_global_block = PADDLE_GET_CONST( paddle::framework::BlockDesc *, attrs.at("backward_global_block")); auto *backward_program = backward_global_block->Program(); - details::Trans2ContiguousTensorsInplace(out_grad); auto out_grad_names = details::GetTensorsName(out_grad); @@ -1155,6 +1156,7 @@ inline void PirRunProgramGradAPI( } } + paddle::framework::RunFeedHooks(*backward_program, *global_inner_scope); if (!backward_program->block()->empty()) { paddle::platform::RecordEvent record_event( "interpreter_core_run", diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 041339fe597c3..c8f3dc0d673f1 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -515,6 +515,12 @@ cc_library( feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) + +cc_library( + feed_hook + SRCS feed_hook.cc + DEPS lod_tensor scope glog pir) + cc_library( variable_helper SRCS variable_helper.cc @@ -529,6 +535,7 @@ set(NAIVE_EXECUTOR_DEPS glog lod_rank_table feed_fetch_method + feed_hook graph_to_program_pass standalone_executor variable_helper) @@ -598,6 +605,7 @@ if(WITH_DISTRIBUTE) lodtensor_printer lod_rank_table feed_fetch_method + feed_hook collective_helper ${GLOB_DISTRIBUTE_DEPS} graph_to_program_pass @@ -628,7 +636,7 @@ if(WITH_DISTRIBUTE) # pull_dense_worker.cc section_worker.cc heter_section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry # device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog # index_sampler index_wrapper sampler index_dataset_proto - # lod_rank_table framework_io fleet_wrapper heter_wrapper box_wrapper metrics lodtensor_printer feed_fetch_method + # lod_rank_table framework_io fleet_wrapper heter_wrapper box_wrapper metrics lodtensor_printer feed_fetch_method feed_hook # graph_to_program_pass variable_helper timer monitor # heter_service_proto fleet heter_server brpc fleet_executor # graph_gpu_wrapper) @@ -677,6 +685,7 @@ if(WITH_DISTRIBUTE) metrics lodtensor_printer feed_fetch_method + feed_hook graph_to_program_pass variable_helper timer @@ -750,6 +759,7 @@ if(WITH_DISTRIBUTE) metrics lodtensor_printer feed_fetch_method + feed_hook graph_to_program_pass variable_helper timer @@ -808,6 +818,7 @@ elseif(WITH_PSLIB) box_wrapper lodtensor_printer feed_fetch_method + feed_hook graph_to_program_pass variable_helper timer @@ -854,6 +865,7 @@ else() box_wrapper lodtensor_printer feed_fetch_method + feed_hook graph_to_program_pass variable_helper timer diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index c01d845b6e145..595841d11170a 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -31,8 +31,7 @@ limitations under the License. */ USE_INT_STAT(STAT_total_feasign_num_in_mem); COMMON_DECLARE_bool(enable_ins_parser_file); -namespace paddle { -namespace framework { +namespace paddle::framework { DLManager& global_dlmanager_pool() { static DLManager manager; @@ -3267,5 +3266,4 @@ void MiniBatchGpuPack::transfer_to_gpu() { } #endif -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index 7dbd1c8484926..b4e1dcca1cf0b 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -24,8 +24,7 @@ using float16 = paddle::platform::float16; using bfloat16 = paddle::platform::bfloat16; using pstring = phi::dtype::pstring; -namespace paddle { -namespace framework { +namespace paddle::framework { struct DataTypeMap { std::unordered_map cpp_to_proto_; @@ -163,5 +162,4 @@ proto::VarType::Type PromoteTypesIfComplexExists( return promote_types_table[type_an][type_bn]; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 4c78b12fd4ac4..5e4edb1ca2870 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -24,9 +24,7 @@ COMMON_DECLARE_bool(sync_nccl_allreduce); #endif -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, @@ -335,6 +333,4 @@ void AllReduceOpHandle::SyncNCCLAllReduce() { #endif std::string AllReduceOpHandle::Name() const { return "all_reduce"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 7f4a37a18cbb1..896b70b8b9156 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { void BroadcastOpHandle::RunImpl() { platform::RecordEvent record_event( @@ -266,6 +264,4 @@ void BroadcastOpHandle::InitOutputValue( } std::string BroadcastOpHandle::Name() const { return "broadcast"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 79578e5653a22..e0a03099a881d 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -28,9 +28,7 @@ COMMON_DECLARE_bool(use_mkldnn); PD_DECLARE_bool(use_cinn); #endif -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) { // Should fix the allreduce op order if scheduling @@ -503,9 +501,7 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, return graph; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details USE_PASS(sync_batch_norm_pass); USE_PASS(fuse_relu_depthwise_conv_pass); diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index fdc12b697ac02..19f7ef1114b6f 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -18,9 +18,7 @@ COMMON_DECLARE_bool(allreduce_record_one_event); -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { struct VarHandleBase; ComputationOpHandle::ComputationOpHandle(ir::Node *node, @@ -55,6 +53,4 @@ bool ComputationOpHandle::NeedWait(VarHandleBase *in_var) { } std::string ComputationOpHandle::Name() const { return op_->Type(); } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.cc b/paddle/fluid/framework/details/eager_deletion_op_handle.cc index 4dbff851f00e2..b8db1e321257b 100644 --- a/paddle/fluid/framework/details/eager_deletion_op_handle.cc +++ b/paddle/fluid/framework/details/eager_deletion_op_handle.cc @@ -21,15 +21,11 @@ #endif #include -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { EagerDeletionOpHandle::EagerDeletionOpHandle( ir::Node *node, @@ -213,6 +209,4 @@ std::vector EagerDeletionOpHandle::VarsToDelete() const { return var_names; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index ab45fc7d061db..f947794ccdd05 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -26,9 +26,7 @@ #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( const ExecutionStrategy &strategy, @@ -390,6 +388,4 @@ bool FastThreadedSSAGraphExecutor::RunOpSync(OpHandleBase *op) { } } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc index ee78d36671107..e09b6ec2a5719 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { FetchAsyncOpHandle::FetchAsyncOpHandle(ir::Node *node, FetchResultType *data, @@ -306,6 +304,4 @@ bool FetchAsyncOpHandle::IsMultiDeviceTransfer() { return true; } std::string FetchAsyncOpHandle::Name() const { return "FetchAsync"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 2ca24a6bbbb88..39a91d0e8e645 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { FetchOpHandle::FetchOpHandle(ir::Node *node, FetchResultType *data, @@ -182,6 +180,4 @@ bool FetchOpHandle::IsMultiDeviceTransfer() { return true; } std::string FetchOpHandle::Name() const { return "Fetch"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index 738a74d1d846f..c8117653d12bf 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -19,14 +19,12 @@ #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -#include "paddle/phi/backends/device_memory_aligment.h" +#include "paddle/phi/backends/device_memory_alignment.h" PD_DEFINE_bool(skip_fused_all_reduce_check, false, ""); // NOLINT COMMON_DECLARE_bool(allreduce_record_one_event); -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { typedef std::vector< std::vector>> @@ -407,6 +405,4 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( } std::string FusedAllReduceOpHandle::Name() const { return "fused_all_reduce"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc index c446d3502e4e4..01c833474bc1a 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { void FusedBroadcastOpHandle::RunImpl() { platform::RecordEvent record_event( @@ -58,6 +56,4 @@ void FusedBroadcastOpHandle::RunImpl() { std::string FusedBroadcastOpHandle::Name() const { return "fused_broadcast"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc index 3e58662be1b82..49a8b3904374d 100644 --- a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc @@ -20,9 +20,7 @@ COMMON_DECLARE_bool(sync_nccl_allreduce); #endif -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) GradMergeAllReduceOpHandle::GradMergeAllReduceOpHandle( @@ -136,6 +134,4 @@ std::string FusedGradMergeAllReduceOpHandle::Name() const { return "fused_grad_merge_all_reduce"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index b917c161193fb..45660331c1202 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/framework/ir/graph_helper.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { static std::vector> SeparateMultiDevicesGraph( ir::Graph *graph, size_t place_num) { @@ -332,6 +330,4 @@ FetchResultType ParallelSSAGraphExecutor::Run( } } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index fe43126ca8abe..05e1693eb650e 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -26,9 +26,7 @@ PADDLE_DEFINE_EXPORTED_bool( false, "Whether to make the result of computation deterministic in CPU side."); -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { std::once_flag CollectiveContext::init_flag_; std::unique_ptr CollectiveContext::context_; @@ -318,6 +316,4 @@ std::vector ReduceOpHandle::GetInputValues( } std::string ReduceOpHandle::Name() const { return "reduce"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index 8b486be9cc686..2cdfcf5687f93 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -23,9 +23,7 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { ScaleLossGradOpHandle::ScaleLossGradOpHandle(ir::Node *node, size_t num_dev, Scope *scope, @@ -126,6 +124,4 @@ void ScaleLossGradOpHandle::RunOnVar(Variable *var, bool record_event) { } std::string ScaleLossGradOpHandle::Name() const { return "ScaleLossGrad"; } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/scope_buffered_monitor.cc b/paddle/fluid/framework/details/scope_buffered_monitor.cc index 14e109bb5381b..e3b3446209584 100644 --- a/paddle/fluid/framework/details/scope_buffered_monitor.cc +++ b/paddle/fluid/framework/details/scope_buffered_monitor.cc @@ -17,17 +17,13 @@ #include "paddle/common/flags.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework COMMON_DECLARE_double(local_exe_sub_scope_limit); -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { static constexpr double kMB = 1.0 / (1024.0 * 1024.0); @@ -208,6 +204,4 @@ void ScopeBufferedMonitor::ClearHistoryLocalExecScopes() { history_local_exec_scopes_.clear(); } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index 02a68fb697efb..fe516be34c93d 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -18,17 +18,11 @@ #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class MemOptVarInfo; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { class ComputationOpHandle; @@ -103,6 +97,4 @@ void ShareTensorBufferOpHandle::InitCUDA() { void ShareTensorBufferOpHandle::RunImpl() { functor_(local_exec_scopes_[0]); } -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details diff --git a/paddle/fluid/framework/device_worker_factory.cc b/paddle/fluid/framework/device_worker_factory.cc index 5c920fa3e318f..64180ec9a9b98 100644 --- a/paddle/fluid/framework/device_worker_factory.cc +++ b/paddle/fluid/framework/device_worker_factory.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include #include -namespace paddle { -namespace framework { +namespace paddle::framework { class DeviceWorker; @@ -86,5 +85,4 @@ REGISTER_DEVICE_WORKER_CLASS(PSGPUWorker); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) REGISTER_DEVICE_WORKER_CLASS(SectionWorker); #endif -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index bd4530f906fac..1f369b869b105 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -33,8 +33,7 @@ limitations under the License. */ PD_DECLARE_bool(benchmark); COMMON_DECLARE_bool(use_mkldnn); -namespace paddle { -namespace framework { +namespace paddle::framework { namespace { // block id starts from 0. This id is used to represent the codeblock // wrapping the first block 0. @@ -609,5 +608,4 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { << "'MKLDNN' is not supported, Please re-compile with WITH_ONEDNN option"; #endif } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/executor_cache.cc b/paddle/fluid/framework/executor_cache.cc index 9045ca0f6a17d..97bcf41845039 100644 --- a/paddle/fluid/framework/executor_cache.cc +++ b/paddle/fluid/framework/executor_cache.cc @@ -32,16 +32,11 @@ DECLARE_FILE_SYMBOLS(print_statistics); COMMON_DECLARE_bool(pir_apply_inplace_pass); COMMON_DECLARE_bool(print_ir); -namespace paddle { -namespace framework { +namespace paddle::framework { class ProgramDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { - -namespace details { +namespace paddle::framework::details { static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) { framework::ExecutionStrategy execution_strategy; @@ -208,7 +203,8 @@ std::set ParseSafeEagerDeletionSkipVarsSet( VLOG(1) << "Found skip_eager_delete_vars: " << skip_eager_delete_vars.size(); return skip_eager_delete_vars; } -} // namespace details +} // namespace paddle::framework::details +namespace paddle::framework { // C++11 removes the need for manual locking. Concurrent execution shall wait if // a static local variable is already being initialized. @@ -588,5 +584,4 @@ std::unique_ptr<::pir::Program> ConstructBackwardIrProgram( return res; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 46543e7cba9bd..3e7f8d77bf93d 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -25,8 +25,7 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; @@ -130,5 +129,4 @@ phi::DenseTensor& GetVariableTensor(const Scope& scope, return *var->GetMutable(); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/feed_hook.cc b/paddle/fluid/framework/feed_hook.cc new file mode 100644 index 0000000000000..180f51d7fcaf3 --- /dev/null +++ b/paddle/fluid/framework/feed_hook.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/feed_hook.h" +#include +#include +#include +#include +#include "paddle/common/flags.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/pir/include/core/program.h" + +COMMON_DECLARE_string(logging_pir_py_code_dir); +COMMON_DECLARE_bool(logging_trunc_pir_py_code); + +namespace paddle::framework { + +namespace { + +std::optional GetLoggingFilePath() { + if (FLAGS_logging_pir_py_code_dir.empty()) return std::nullopt; + const std::string file_path = + FLAGS_logging_pir_py_code_dir + "/programs_example_input_tensor_meta.py"; + return file_path; +} + +void TryTruncateLoggingFile() { + if (!FLAGS_logging_trunc_pir_py_code) return; + std::optional file_path = GetLoggingFilePath(); + if (!file_path.has_value()) return; + static std::once_flag once_flag; + std::call_once(once_flag, [&] { + std::ofstream ofs; + ofs.open(file_path.value().c_str(), std::ios::out | std::ios::trunc); + ofs.close(); + }); +} + +template +void VisitFeedName(const pir::Program& program, + const DoEachFeadNameT& DoEachFeadName) { + auto module_op = program.module_op(); + const auto& block = module_op.block(); + const auto& IsDataOp = [](const pir::Operation& op) -> bool { + return op.isa(); + }; + const auto& GetDataOpName = [](const pir::Operation& op) -> std::string { + return op.attributes().at("name").dyn_cast().AsString(); + }; + for (const auto& op : block) { + if (IsDataOp(op)) { + DoEachFeadName(GetDataOpName(op)); + } + } + for (const auto& [name, _] : block.kwargs()) { + DoEachFeadName(name); + } +} + +std::string GetLoggingShapeOrDataForName(int64_t program_id, + const std::string& name, + const phi::DenseTensor& tensor) { + int64_t random_id = [&] { + std::random_device rd{}; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dis( + 0, std::numeric_limits::max()); + return dis(gen); + }(); + std::ostringstream ss; + ss << "class PirProgram_example_input_tensor_meta_" << random_id << ":"; + ss << "\n\tprogram_id = " << program_id; + ss << "\n\tinput_name = " << std::quoted(name); + ss << "\n\tshape = ["; + int i = 0; + for (int dim : ::common::vectorize(tensor.dims())) { + if (i++ > 0) { + ss << ", "; + } + ss << dim; + } + ss << "]"; + ss << "\n\n"; + return ss.str(); +} + +void AppendToLoggingFile(const std::string& logging_str) { + std::optional file_path = GetLoggingFilePath(); + if (!file_path.has_value()) return; + std::ofstream ofs; + ofs.open(file_path.value().c_str(), std::ios::out | std::ios::app); + if (!ofs.is_open()) return; + ofs << logging_str << std::endl; + ofs.close(); +} + +void AppendLoggingShapeOrDataForName(int64_t uid, + const std::string& name, + const phi::DenseTensor& tensor) { + static std::mutex mutex; + std::unique_lock lock(mutex); + using Name2OnceFlag = std::unordered_map; + static std::unordered_map once_flags; + std::call_once(once_flags[uid][name], [&] { + AppendToLoggingFile(GetLoggingShapeOrDataForName(uid, name, tensor)); + }); +} + +void SaveLoggingShapeOrData(const pir::Program& program, const Scope& scope) { + if (FLAGS_logging_pir_py_code_dir.empty()) return; + TryTruncateLoggingFile(); + VisitFeedName(program, [&](const std::string& name) { + Variable* variable = scope.FindVar(name); + if (variable == nullptr) return; + if (!variable->IsType()) return; + const phi::DenseTensor& tensor = variable->Get(); + AppendLoggingShapeOrDataForName(program.id(), name, tensor); + }); +} + +} // namespace + +void RunFeedHooks(const pir::Program& program, const Scope& scope) { + SaveLoggingShapeOrData(program, scope); +} + +} // namespace paddle::framework diff --git a/paddle/fluid/pybind/parallel_executor.h b/paddle/fluid/framework/feed_hook.h similarity index 70% rename from paddle/fluid/pybind/parallel_executor.h rename to paddle/fluid/framework/feed_hook.h index 3c3acace033a7..3a8584e3899b6 100644 --- a/paddle/fluid/pybind/parallel_executor.h +++ b/paddle/fluid/framework/feed_hook.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,12 +14,16 @@ #pragma once -#include "pybind11/pybind11.h" +namespace pir { -namespace paddle { -namespace pybind { +class Program; -void BindParallelExecutor(pybind11::module& m); // NOLINT +} -} // namespace pybind -} // namespace paddle +namespace paddle::framework { + +class Scope; + +void RunFeedHooks(const pir::Program& program, const Scope& scope); + +} // namespace paddle::framework diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index 3583e352956b5..c4b457a20a0ed 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -24,8 +24,7 @@ COMMON_DECLARE_double(eager_delete_tensor_gb); COMMON_DECLARE_double(memory_fraction_of_eager_deletion); COMMON_DECLARE_bool(fast_eager_deletion_mode); -namespace paddle { -namespace framework { +namespace paddle::framework { GarbageCollector::GarbageCollector(const platform::Place &place, size_t max_memory_size) @@ -249,5 +248,4 @@ std::unique_ptr CreateGarbageCollector( return std::unique_ptr(gc.release()); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/io/crypto/cipher.cc b/paddle/fluid/framework/io/crypto/cipher.cc index 134931778dd51..e81e03ae17138 100644 --- a/paddle/fluid/framework/io/crypto/cipher.cc +++ b/paddle/fluid/framework/io/crypto/cipher.cc @@ -18,8 +18,7 @@ #include "paddle/fluid/framework/io/crypto/cipher_utils.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { +namespace paddle::framework { std::shared_ptr CipherFactory::CreateCipher( const std::string& config_file) { @@ -57,5 +56,4 @@ std::shared_ptr CipherFactory::CreateCipher( return nullptr; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/io/save_load_tensor.cc b/paddle/fluid/framework/io/save_load_tensor.cc index b8a52e9c44fbf..9b5beb5ce9c45 100644 --- a/paddle/fluid/framework/io/save_load_tensor.cc +++ b/paddle/fluid/framework/io/save_load_tensor.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/phi/common/port.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void SaveTensor(const phi::DenseTensor& x, const std::string& file_path, @@ -54,5 +53,4 @@ void LoadTensor(const std::string& file_path, phi::DenseTensor* out) { framework::DeserializeFromStream(fin, out); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/io/save_paddle2cinn_varmap.cc b/paddle/fluid/framework/io/save_paddle2cinn_varmap.cc index f4debede0a616..ab922a9b400c6 100644 --- a/paddle/fluid/framework/io/save_paddle2cinn_varmap.cc +++ b/paddle/fluid/framework/io/save_paddle2cinn_varmap.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/common/port.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void save_paddle2cinn_varmap( std::unordered_map paddle2cinn_var_map, @@ -45,5 +44,4 @@ void save_paddle2cinn_varmap( outfile.close(); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index b3ff3ac35d96d..a5f1d3bea2e7d 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -334,6 +334,8 @@ if(WITH_XPU) DEPS ${XPU_PASS_DEPS}) pass_library(weight_only_linear_xpu_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(block_multihead_attention_xpu_pass inference DIR xpu DEPS + ${XPU_PASS_DEPS}) endif() cc_library( diff --git a/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.cc b/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.cc index 18c7dcc196b5a..376d8f88c015f 100644 --- a/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.cc +++ b/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { AdaptivePool2dConvertGlobalPass::AdaptivePool2dConvertGlobalPass() { // NOLINT AddOpCompat(OpCompat("pool2d")) @@ -99,9 +97,7 @@ void AdaptivePool2dConvertGlobalPass::ApplyImpl(ir::Graph* graph) const { AddStatis(num); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(adaptive_pool2d_convert_global_pass, paddle::framework::ir::AdaptivePool2dConvertGlobalPass); diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index 8a2541de5aae4..f54a86ab97bd5 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -26,9 +26,7 @@ #include "paddle/phi/backends/device_manager.h" #endif -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { namespace { @@ -1023,9 +1021,7 @@ void AutoMixedPrecisionPass::InsertCastOp() const { VLOG(4) << "insert number of cast op: " << cache.size(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(auto_mixed_precision_pass, paddle::framework::ir::AutoMixedPrecisionPass); diff --git a/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc b/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc index 966f4ea14967d..1f70e732a7fe2 100644 --- a/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc +++ b/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc @@ -20,12 +20,10 @@ #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/flags.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class ProgramDesc; class VarDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework PADDLE_DEFINE_EXPORTED_double( fuse_parameter_memory_size, @@ -46,9 +44,7 @@ PADDLE_DEFINE_EXPORTED_int32( "-1, it means that there are only one group. The default value is 3, it is " "an experimental value."); -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { // unit of the FLAGS_fuse_parameter_memory_size. static constexpr double kMB = 1048576.0; @@ -600,9 +596,7 @@ class CoalesceGradTensorPass : public ir::Pass { op_desc->SetAttr("persist_output", persistable); } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(coalesce_grad_tensor_pass, paddle::framework::ir::CoalesceGradTensorPass) diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc index cd823afa96dd4..403aa38102945 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/platform/device/gpu/gpu_info.h" #endif -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); #define GET_NODES \ @@ -236,9 +234,7 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conv_elementwise_add2_act_fuse_pass, paddle::framework::ir::ConvElementwiseAdd2ActFusePass); diff --git a/paddle/fluid/framework/ir/delete_assign_op_pass_test.cc b/paddle/fluid/framework/ir/delete_assign_op_pass_test.cc index 92477747fe2be..a1148e1caa7ce 100644 --- a/paddle/fluid/framework/ir/delete_assign_op_pass_test.cc +++ b/paddle/fluid/framework/ir/delete_assign_op_pass_test.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { TEST(delete_assign_op_pass, basic) { ProgramDesc program; @@ -43,8 +41,6 @@ TEST(delete_assign_op_pass, basic) { assign_num)); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(delete_assign_op_pass); diff --git a/paddle/fluid/framework/ir/delete_cast_op_pass.cc b/paddle/fluid/framework/ir/delete_cast_op_pass.cc index 59fd42241e0d4..c96603f03ad30 100644 --- a/paddle/fluid/framework/ir/delete_cast_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_cast_op_pass.cc @@ -26,16 +26,11 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct CastWritePattern : public PatternBase { CastWritePattern(PDPattern* pattern, const std::string& name_scope); @@ -75,7 +70,8 @@ CastWritePattern::CastWritePattern(PDPattern* pattern, cast0->LinksFrom({cast0_in}).LinksTo({cast0_out}); write_to_array->LinksFrom({cast0_out}).LinksTo({write_to_array_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { static std::vector FindOpNodeWithInputName( ir::Graph* graph, const std::string& input_name) { @@ -218,7 +214,8 @@ int DeleteCastOpPass::ApplyCastWriteReadPass(ir::Graph* graph) const { return found_subgraph_count; } -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct CastLodResetWritePattern : public PatternBase { CastLodResetWritePattern(PDPattern* pattern, const std::string& name_scope); @@ -267,7 +264,8 @@ CastLodResetWritePattern::CastLodResetWritePattern( lod_reset->LinksFrom({cast0_out}).LinksTo({lod_reset_out}); write_to_array->LinksFrom({lod_reset_out}).LinksTo({write_to_array_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int DeleteCastOpPass::ApplyCastLodResetWriteReadPass(ir::Graph* graph) const { if (graph->SubGraphsSize() != 2) { @@ -418,7 +416,8 @@ int DeleteCastOpPass::ApplyCastLodResetWriteReadPass(ir::Graph* graph) const { return found_subgraph_count; } -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct CastIndexSamplePattern : public PatternBase { CastIndexSamplePattern(PDPattern* pattern, const std::string& name_scope); @@ -475,7 +474,8 @@ CastIndexSamplePattern::CastIndexSamplePattern(PDPattern* pattern, index_sample->LinksFrom({cast0_out}).LinksTo({index_sample_out}); cast1->LinksFrom({index_sample_out}).LinksTo({cast1_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int DeleteCastOpPass::ApplyCastIndexSamplePass(ir::Graph* graph) const { GraphPatternDetector gpd; @@ -509,7 +509,8 @@ int DeleteCastOpPass::ApplyCastIndexSamplePass(ir::Graph* graph) const { return found_subgraph_count; } -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct CastScatterPattern : public PatternBase { CastScatterPattern(PDPattern* pattern, const std::string& name_scope); @@ -587,7 +588,8 @@ CastScatterPattern::CastScatterPattern(PDPattern* pattern, scatter->LinksFrom({cast0_out, cast1_out}).LinksTo({scatter_out}); cast2->LinksFrom({scatter_out}).LinksTo({cast2_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int DeleteCastOpPass::ApplyCastScatterPass(ir::Graph* graph) const { GraphPatternDetector gpd; @@ -625,7 +627,8 @@ int DeleteCastOpPass::ApplyCastScatterPass(ir::Graph* graph) const { return found_subgraph_count; } -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct CastLookupTablePattern : public PatternBase { CastLookupTablePattern(PDPattern* pattern, const std::string& name_scope); @@ -666,7 +669,8 @@ CastLookupTablePattern::CastLookupTablePattern(PDPattern* pattern, lookup_table->LinksFrom({lookup_table_w}).LinksTo({lookup_table_out}); cast->LinksFrom({lookup_table_out}).LinksTo({cast_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int DeleteCastOpPass::ApplyCastLookupTablePass(ir::Graph* graph) const { GraphPatternDetector gpd; @@ -712,7 +716,8 @@ int DeleteCastOpPass::ApplyCastLookupTablePass(ir::Graph* graph) const { return found_subgraph_count; } -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct CastPattern : public PatternBase { CastPattern(PDPattern* pattern, const std::string& name_scope); @@ -741,7 +746,8 @@ CastPattern::CastPattern(PDPattern* pattern, const std::string& name_scope) cast->LinksFrom({cast_in}).LinksTo({cast_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int DeleteCastOpPass::ApplyCastPass(ir::Graph* graph) const { GraphPatternDetector gpd; @@ -826,9 +832,7 @@ void DeleteCastOpPass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(delete_cast_op_pass, paddle::framework::ir::DeleteCastOpPass); diff --git a/paddle/fluid/framework/ir/delete_cast_op_pass_test.cc b/paddle/fluid/framework/ir/delete_cast_op_pass_test.cc index 17f0c642a60d1..c5480db1ca466 100644 --- a/paddle/fluid/framework/ir/delete_cast_op_pass_test.cc +++ b/paddle/fluid/framework/ir/delete_cast_op_pass_test.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void AddVarToScope(Scope* param_scope, const std::string& name, @@ -315,8 +313,6 @@ TEST(ApplyCastPass, basic) { cast_num_in_graph)); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(delete_cast_op_pass); diff --git a/paddle/fluid/framework/ir/delete_dropout_op_pass_test.cc b/paddle/fluid/framework/ir/delete_dropout_op_pass_test.cc index d8cc2210645ea..7a748a8ab8013 100644 --- a/paddle/fluid/framework/ir/delete_dropout_op_pass_test.cc +++ b/paddle/fluid/framework/ir/delete_dropout_op_pass_test.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/delete_dropout_op_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { TEST(DeleteDropoutOpsPass, dropout) { for (std::string dropout_implementation : @@ -89,8 +87,6 @@ TEST(DeleteDropoutOpsPass, dropout) { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(delete_dropout_op_x_pass); diff --git a/paddle/fluid/framework/ir/delete_op_device_pass.cc b/paddle/fluid/framework/ir/delete_op_device_pass.cc index cc5523abd8e62..625a0c9023028 100644 --- a/paddle/fluid/framework/ir/delete_op_device_pass.cc +++ b/paddle/fluid/framework/ir/delete_op_device_pass.cc @@ -19,15 +19,11 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { // "op_device" attr is only used in model training. "op_device" attr will change // place of op kernel, so we use "delete_op_device_pass" to remove it. @@ -50,8 +46,6 @@ void DeleteOpDevicePass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(delete_op_device_pass, paddle::framework::ir::DeleteOpDevicePass); diff --git a/paddle/fluid/framework/ir/delete_op_device_pass_test.cc b/paddle/fluid/framework/ir/delete_op_device_pass_test.cc index 2b0ac27782b60..0e4f39495a338 100644 --- a/paddle/fluid/framework/ir/delete_op_device_pass_test.cc +++ b/paddle/fluid/framework/ir/delete_op_device_pass_test.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { TEST(delete_op_device_pass, relu) { ProgramDesc program; @@ -44,8 +42,6 @@ TEST(delete_op_device_pass, relu) { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(delete_op_device_pass); diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc index 2a7071d54843d..e93f1a3c9950c 100644 --- a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc @@ -20,9 +20,7 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); #define GET_NODES \ @@ -107,9 +105,7 @@ void DeleteQuantDequantOpPass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(delete_quant_dequant_op_pass, paddle::framework::ir::DeleteQuantDequantOpPass); diff --git a/paddle/fluid/framework/ir/dense_fc_to_sparse_pass_tester.cc b/paddle/fluid/framework/ir/dense_fc_to_sparse_pass_tester.cc index 7fb315de928a6..9a5fc3ddc997c 100644 --- a/paddle/fluid/framework/ir/dense_fc_to_sparse_pass_tester.cc +++ b/paddle/fluid/framework/ir/dense_fc_to_sparse_pass_tester.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/ir/fc_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void AddVarToScope(Scope* param_scope, const std::string& name, @@ -105,9 +103,7 @@ TEST(FCFusePass, basic) { num_sparse_fc_nodes_after)); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(fc_fuse_pass); USE_PASS(dense_fc_to_sparse_pass); diff --git a/paddle/fluid/framework/ir/dense_multihead_matmul_to_sparse_pass.cc b/paddle/fluid/framework/ir/dense_multihead_matmul_to_sparse_pass.cc index ef123166a9fca..e9d9f5c9d8d6a 100644 --- a/paddle/fluid/framework/ir/dense_multihead_matmul_to_sparse_pass.cc +++ b/paddle/fluid/framework/ir/dense_multihead_matmul_to_sparse_pass.cc @@ -18,10 +18,7 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { PDNode *patterns::DenseMultiheadMatmul::operator()() { auto *multihead_matmul = pattern->NewNode(multihead_matmul_repr()) ->assert_is_op("multihead_matmul"); @@ -61,7 +58,8 @@ PDNode *patterns::DenseMultiheadMatmul::operator()() { return multihead_matmul_out; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { DenseMultiheadMatmulToSparsePass::DenseMultiheadMatmulToSparsePass() { AddOpCompat(OpCompat("multihead_matmul")) .AddInput("Input") @@ -170,9 +168,7 @@ void DenseMultiheadMatmulToSparsePass::ApplyImpl(Graph *graph) const { AddStatis(found_multihead_matmul_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(dense_multihead_matmul_to_sparse_pass, paddle::framework::ir::DenseMultiheadMatmulToSparsePass); diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc index 68ec0492a42da..958ea7c272432 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc @@ -18,18 +18,11 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static PDNode* create_emb_vars(PDPattern* pattern, const std::string& name, @@ -139,7 +132,8 @@ void SkipLayerNorm::operator()() { .LinksTo({layer_norm_out, layer_norm_mean_var, layer_norm_variance_var}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int EmbeddingEltwiseLayerNormFusePass::BuildFusion( Graph* graph, const std::string& name_scope @@ -474,9 +468,7 @@ void EmbeddingEltwiseLayerNormFusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(embedding_eltwise_layernorm_fuse_pass, paddle::framework::ir::EmbeddingEltwiseLayerNormFusePass); diff --git a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc index 05d43788fb20d..a9b406ed5d7ac 100644 --- a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { TEST(FCElementwiseLayerNormFusePass, basic) { // inputs operator output @@ -72,8 +70,6 @@ TEST(FCElementwiseLayerNormFusePass, basic) { num_fused_nodes_after)); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(fc_elementwise_layernorm_fuse_pass); diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index a037793e59190..e4c5b13d90a23 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { FCFusePass::FCFusePass() { AddOpCompat(OpCompat("mul")) @@ -301,9 +299,7 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const { return found_fc_count; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fc_fuse_pass, paddle::framework::ir::FCFusePass) .RequirePassAttr("use_gpu"); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc index 876e949fdc3d3..ef1044a60ab98 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc @@ -14,11 +14,7 @@ #include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" -namespace paddle { -namespace framework { -namespace ir { - -namespace fc_lstm_test { +namespace paddle::framework::ir::fc_lstm_test { TEST(FcLstmFusePass, basic) { std::unique_ptr graph = PrepareGraph(); @@ -50,9 +46,6 @@ TEST(FcLstmFusePass, basic) { "The number of fusion_gru nodes does " "not meet expectations after fuse")); } -} // namespace fc_lstm_test -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir::fc_lstm_test USE_PASS(fc_lstm_fuse_pass); diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc index 3c550ca84042d..60d83f0b5edfb 100644 --- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc +++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc @@ -193,6 +193,8 @@ ir::Graph *FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( desc.SetOutput(GradVarName("IntermediateOut"), std::vector({d_itermediate_out_n})); + desc.SetAttr("axis", -1); + desc.SetAttr("scale", 0.0f); desc.SetAttr("save_intermediate_out", false); desc.SetAttr("functor_list", std::vector( @@ -273,6 +275,8 @@ ir::Graph *FuseElewiseAddActPass::FuseActElewiseAddInplaceGrad( desc.SetOutput(GradVarName("IntermediateOut"), std::vector({d_intermediate_var_n})); + desc.SetAttr("axis", -1); + desc.SetAttr("scale", 0.0f); desc.SetAttr("save_intermediate_out", false); desc.SetAttr("functor_list", std::vector({ele_add_grad_op->Op()->Type(), @@ -315,6 +319,8 @@ Node *FuseElewiseAddActPass::CreateFuseElewiseAddActNode( desc.SetOutput("Out", std::vector({act_out_n})); desc.SetOutput("IntermediateOut", std::vector({ele_out_n})); desc.SetType("fused_elemwise_add_activation"); + desc.SetAttr("axis", -1); + desc.SetAttr("scale", 0.0f); desc.SetAttr("save_intermediate_out", true); desc.SetAttr( "functor_list", diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index 15c5b0b379b13..9ba4b6d9d816d 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -22,9 +22,7 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -342,8 +340,6 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { return scale_node; } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fuse_adam_op_pass, paddle::framework::ir::FuseAdamOpPass); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index d24322ede7e75..523b2a2e5eaf2 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -21,9 +21,7 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -113,8 +111,6 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fuse_momentum_op_pass, paddle::framework::ir::FuseMomentumOpPass); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 4a9e316f30b2b..52dee2bef4e64 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/phi/core/kernel_factory.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { ir::Graph &result = *graph; @@ -688,6 +686,4 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode( graph->RemoveNode(ctrl_var_node); } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc index a3ec33d8b2fb6..cefcb56634aba 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -68,8 +66,6 @@ class FuseSgdOpPass : public FuseOptimizerOpPass { return graph->CreateOpNode(&Sgd_desc); } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fuse_sgd_op_pass, paddle::framework::ir::FuseSgdOpPass); diff --git a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc index d08da8813f17b..d179547c1b409 100644 --- a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc +++ b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc @@ -22,9 +22,7 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void FuseReluDepthwiseConvPass::ApplyImpl(ir::Graph *graph) const { graph = FuseReluDepthwiseConv(graph, true); @@ -186,9 +184,7 @@ ir::Graph *FuseReluDepthwiseConvPass::FuseReluDepthwiseConv( return graph; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fuse_relu_depthwise_conv_pass, paddle::framework::ir::FuseReluDepthwiseConvPass); diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc index db281b64f9299..619206f77df50 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc @@ -20,16 +20,11 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static const std::unordered_set FFN_ACTS{"relu", "gelu"}; @@ -1089,7 +1084,8 @@ PDNode* MultiDevicesFusedMultiTransformerDecoderFuseQKVPattern::operator()() { return ffn_output; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { inline Node* CreatePersistableVarNode(Graph* graph, const std::string& name) { auto var_desc = VarDesc(name); @@ -3361,9 +3357,7 @@ MultiDevicesFusedMultiTransformerDecoderFuseQKVPass:: .End(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fused_multi_transformer_decoder_pass, paddle::framework::ir::FusedMultiTransformerDecoderPass); diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc index 6c08bd2941ff1..f38534468337b 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void AddVarToScope(Scope* param_scope, const std::string& name, @@ -550,9 +548,7 @@ TEST(MultiDevicesFusedMultiTransformerDecoderFuseQKVPass, "multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass")); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(fused_multi_transformer_decoder_pass); USE_PASS(fused_multi_transformer_decoder_fuse_qkv_pass); diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc index 3b4f475df5f36..370cb3e73bcbd 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc @@ -17,9 +17,7 @@ limitations under the License. */ #ifndef UNUSED #define UNUSED __attribute__((unused)) #endif -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void AddVarToScope(Scope* param_scope, const std::string& name, @@ -711,9 +709,7 @@ TEST(MultiDevicesFusedMultiTransformerEncoderFuseQKVPass, "multi_devices_fused_multi_transformer_encoder_fuse_qkv_pass")); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(fused_multi_transformer_encoder_pass); USE_PASS(fused_multi_transformer_encoder_fuse_qkv_pass); diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator.cc b/paddle/fluid/framework/ir/fusion_group/code_generator.cc index 2e5c2b5be4ac3..defc320495064 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator.cc @@ -17,10 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/fusion_group/code_generator_helper.h" #include "paddle/fluid/framework/ir/fusion_group/cuda_resources.h" -namespace paddle { -namespace framework { -namespace ir { -namespace fusion_group { +namespace paddle::framework::ir::fusion_group { std::string ExtractDataType(const std::vector& nodes) { std::string dtype_str = ""; @@ -373,7 +370,4 @@ std::unordered_map CodeGenerator::EncodeVarNodes( return var_ids; } -} // namespace fusion_group -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir::fusion_group diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index b66ee6f31cdb0..799b18e3e4fc7 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -26,9 +26,7 @@ PADDLE_DEFINE_EXPORTED_bool(all_blocks_convert_trt, false, "Convert all blocks'Ops into TensorRT Ops"); -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { Graph::Graph(const ProgramDesc &program) : Graph( @@ -402,6 +400,4 @@ std::unique_ptr Graph::CloneSubGraph(const size_t idx) { bool IsControlDepVar(const ir::Node &var) { return var.Name().find(ir::Node::kControlDepVarName) != std::string::npos; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc index 53e2697daa868..46e8e91971ba0 100644 --- a/paddle/fluid/framework/ir/graph_helper.cc +++ b/paddle/fluid/framework/ir/graph_helper.cc @@ -40,9 +40,7 @@ PADDLE_DEFINE_EXPORTED_string(print_sub_graph_dir, "FLAGS_print_sub_graph_dir is used " "to print the nodes of sub_graphs."); -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { namespace { template @@ -964,6 +962,4 @@ std::vector>> GetOpDependencies( return deps; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/graph_helper_test.cc b/paddle/fluid/framework/ir/graph_helper_test.cc index 5972cd40817ac..c77b694e90f24 100644 --- a/paddle/fluid/framework/ir/graph_helper_test.cc +++ b/paddle/fluid/framework/ir/graph_helper_test.cc @@ -18,9 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/program_desc.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void BuildCircleGraph(Graph* g) { ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation); @@ -222,6 +220,4 @@ TEST(GraphHelperTest, GraphNum) { ASSERT_EQ(GraphNum(g3), 2UL); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.cc b/paddle/fluid/framework/ir/graph_to_program_pass.cc index 7d0cb815c9af7..61a971dd9501f 100644 --- a/paddle/fluid/framework/ir/graph_to_program_pass.cc +++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc @@ -19,15 +19,11 @@ limitations under the License. */ #include "paddle/common/flags.h" #include "paddle/fluid/framework/op_proto_maker.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class ProgramDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void GraphToProgramPass::ApplyImpl(ir::Graph* graph) const { auto& program = Get("program"); @@ -39,8 +35,6 @@ void GraphToProgramPass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(graph_to_program_pass, paddle::framework::ir::GraphToProgramPass); diff --git a/paddle/fluid/framework/ir/groupnorm_act_pass.cc b/paddle/fluid/framework/ir/groupnorm_act_pass.cc index 397a7437757cc..ff2df5887d6f3 100644 --- a/paddle/fluid/framework/ir/groupnorm_act_pass.cc +++ b/paddle/fluid/framework/ir/groupnorm_act_pass.cc @@ -19,18 +19,11 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct GroupNormAct : public PatternBase { GroupNormAct(PDPattern *pattern, const std::string &name_scope) @@ -80,7 +73,8 @@ void GroupNormAct::operator()(PDNode *x) { act->LinksFrom({group_norm_out_var}).LinksTo({act_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int GroupNormActFusePass::ApplyGNSiluPattern(ir::Graph *graph) const { PADDLE_ENFORCE_NOT_NULL( @@ -155,9 +149,7 @@ void GroupNormActFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(groupnorm_act_pass, paddle::framework::ir::GroupNormActFusePass); REGISTER_PASS_CAPABILITY(groupnorm_act_pass) diff --git a/paddle/fluid/framework/ir/is_test_pass.cc b/paddle/fluid/framework/ir/is_test_pass.cc index 338e8227228d4..aceea7aae8d82 100644 --- a/paddle/fluid/framework/ir/is_test_pass.cc +++ b/paddle/fluid/framework/ir/is_test_pass.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "glog/logging.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; @@ -52,8 +50,6 @@ void IsTestPass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(is_test_pass, paddle::framework::ir::IsTestPass); diff --git a/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc b/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc index c9c9a271d439b..a197af6e41c1f 100644 --- a/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc +++ b/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc @@ -22,9 +22,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -237,9 +235,7 @@ void MatmulV2ScaleFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(matmul_scale_fuse_pass, paddle::framework::ir::MatmulScaleFusePass); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/conditional_block_op_eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/conditional_block_op_eager_deletion_pass.cc index 97588757280cf..817ab6f3f85ef 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/conditional_block_op_eager_deletion_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/conditional_block_op_eager_deletion_pass.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" #include "paddle/fluid/operators/controlflow/op_variant.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using OpVariant = operators::OpVariant; class ConditionalOpEagerDeletionPass : public Pass { protected: @@ -94,9 +92,7 @@ class ConditionalOpEagerDeletionPass : public Pass { } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conditional_block_op_eager_deletion_pass, paddle::framework::ir::ConditionalOpEagerDeletionPass); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc index 924938c7d00cb..f01d87025a343 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc @@ -14,17 +14,11 @@ #include "paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h" -namespace paddle { -namespace framework { -namespace details { +namespace paddle::framework::details { class OpHandleBase; -} // namespace details -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::details -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { OpGraphView::OpGraphView(const std::vector &ops) : preceding_ops_(), pending_ops_() { @@ -94,6 +88,4 @@ OpGraphView::GetPrecedingDepNum() const { size_t OpGraphView::OpNumber() const { return preceding_ops_.size(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/merge_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/merge_layernorm_fuse_pass.cc index 2e6aaa37808ae..1fbe22ff33021 100644 --- a/paddle/fluid/framework/ir/merge_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/merge_layernorm_fuse_pass.cc @@ -39,9 +39,7 @@ GET_IR_NODE(layernorm_40_in_bias); \ GET_IR_NODE(layernorm_40_in_scale); \ GET_IR_NODE(layernorm_40_out); -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { MergeLayernormFusePass::MergeLayernormFusePass() { AddOpCompat(OpCompat("reshape2")) .AddInput("X") @@ -176,9 +174,7 @@ void MergeLayernormFusePass::ApplyImpl(ir::Graph* graph) const { gpd(graph, handler); AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(merge_layernorm_fuse_pass, paddle::framework::ir::MergeLayernormFusePass); REGISTER_PASS_CAPABILITY(merge_layernorm_fuse_pass) diff --git a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc index e35e5d297db9b..5ffdaee331c6d 100644 --- a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc +++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { static const char kNumRepeats[] = "num_repeats"; // NOLINT typedef std::unordered_map> SSAVarList; @@ -335,9 +333,7 @@ void BatchMergePass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(multi_batch_merge_pass, paddle::framework::ir::BatchMergePass) .RequirePassAttr(paddle::framework::ir::kNumRepeats); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc index b907869b4a38e..e0b96b69116a4 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/framework/ir/pass.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; @@ -106,9 +104,7 @@ void AddReaderDependencyPass::ApplyImpl(Graph *graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(add_reader_dependency_pass, paddle::framework::ir::AddReaderDependencyPass); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc index a4feed4693a62..1913888dc316e 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/op_proto_maker.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class BackWardOpDepsPass : public ir::Pass { protected: @@ -195,9 +193,7 @@ class BackWardOpDepsPass : public ir::Pass { } } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(backward_optimizer_op_deps_pass, paddle::framework::ir::BackWardOpDepsPass); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc index d7d18f6e8469c..1c3e4c03e561f 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc @@ -24,9 +24,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/utils/string/string_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { static std::string kSep(1, static_cast(1)); // NOLINT @@ -269,8 +267,6 @@ class FixOpRunOrderPass : public Pass { } }; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(fix_op_run_order_pass, paddle::framework::ir::FixOpRunOrderPass); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index cc20f52180871..b16548c545ef0 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -41,9 +41,7 @@ #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" #endif -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { namespace { // TODO(panyx0718): Clean this up as well. @@ -1377,9 +1375,7 @@ static int MultiDevSSAGraphBuilderRegister(const std::string &builder_mode) { return 0; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir #define REGISTER_MULTI_DEVICES_PASS(pass_name, pass_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc index f4f0e393c2499..72e8baaba5017 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { static std::unordered_set ReaderOpSet() { return {"create_py_reader"}; @@ -78,6 +76,4 @@ void SetReaderOpDeviceInfo(Graph *graph, size_t dev_cnt, size_t dev_idx) { VLOG(10) << "Found op number " << found_op_num; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index ebf273a8d1c2e..008f1e95cd4f7 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -22,16 +22,11 @@ #include "paddle/fluid/platform/float16.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static void ReplaceOutputVar(Node* op, Node* old_var, Node* new_var) { if (op->IsOp() && op->Op()) { @@ -635,7 +630,8 @@ PDNode* MultiHeadMatmulV3Pattern::operator()() { return transpose2_2_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { namespace { template @@ -1615,9 +1611,7 @@ void MultiHeadMatmulV3FusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(multihead_matmul_fuse_pass, paddle::framework::ir::MultiHeadMatmulFusePass); diff --git a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc index 1f91b6955aadf..2bcc0de82c015 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc @@ -19,10 +19,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static void ReplaceOutputVar(Node* op, Node* old_var, Node* new_var) { if (op->IsOp() && op->Op()) { @@ -310,7 +307,8 @@ PDNode* MultiHeadMatmulRoformerPattern::operator()() { return transpose2_2_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { MultiHeadMatmulRoformerFusePass::MultiHeadMatmulRoformerFusePass() { AddOpCompat(OpCompat("elementwise_add")) @@ -758,9 +756,7 @@ void MultiHeadMatmulRoformerFusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(multihead_matmul_roformer_fuse_pass, paddle::framework::ir::MultiHeadMatmulRoformerFusePass); diff --git a/paddle/fluid/framework/ir/node_test.cc b/paddle/fluid/framework/ir/node_test.cc index 2d84162e13aa6..26ca77f3b00ce 100644 --- a/paddle/fluid/framework/ir/node_test.cc +++ b/paddle/fluid/framework/ir/node_test.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/fluid/framework/var_desc.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -103,6 +101,4 @@ TEST(NodeTest, ToString) { EXPECT_EQ(n3->ToString(), "{n2} = n3(n1)"); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc index 1c733636ca7b0..cf17f00fa4080 100644 --- a/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc @@ -21,9 +21,7 @@ #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void ComputePropagateScalesMkldnnPass::GetTensorFromVector( const std::vector& data_v, phi::DenseTensor* tensor) const { @@ -516,9 +514,7 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { graph, "has_quant_info", "var_quant_scales", var_quant_scales); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(compute_propagate_scales_onednn_pass, paddle::framework::ir::ComputePropagateScalesMkldnnPass); diff --git a/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc index 9664647fd4214..f8cc1ca17c99a 100644 --- a/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/framework/naive_executor.h" #include "paddle/phi/common/place.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { const std::array positive_and_negative_values = {-0.0482659, -0.0102493, @@ -347,6 +345,4 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) { BuildConv2dReluProgramDesc(), &var_quant_scales, {"conv_out"}); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc index 61c0457f7c740..bfe0296640dfb 100644 --- a/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using string::PrettyLogDetail; @@ -373,9 +371,7 @@ ConvActivationMkldnnFusePass::ConvActivationMkldnnFusePass() { .End(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conv_activation_onednn_fuse_pass, paddle::framework::ir::ConvActivationMkldnnFusePass); diff --git a/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc index 5ee6e361bcc92..e04314f399be5 100644 --- a/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc @@ -23,15 +23,11 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -309,9 +305,7 @@ void ConvAffineChannelFusePass::FuseConvAffineChannel( AddStatis(found_conv_ac_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conv_affine_channel_onednn_fuse_pass, paddle::framework::ir::ConvAffineChannelFusePass); diff --git a/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc index 1cf663d13deef..c63cf3ed74b2f 100644 --- a/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc @@ -22,9 +22,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { ConvBiasFusePass::ConvBiasFusePass() { AddOpCompat(OpCompat("conv2d")) @@ -445,9 +443,7 @@ void ConvBiasFusePass::FuseConvBias(ir::Graph* graph, } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conv_bias_onednn_fuse_pass, paddle::framework::ir::ConvBiasFusePass); REGISTER_PASS_CAPABILITY(conv_bias_onednn_fuse_pass) diff --git a/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc index 7733730f7d605..14857f3c550d8 100644 --- a/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { ResidualConnectionMKLDNNFusePass::ResidualConnectionMKLDNNFusePass() { AddOpCompat(OpCompat("conv2d")) @@ -305,9 +303,7 @@ void ResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(graph_with_stats.second); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(conv_elementwise_add_onednn_fuse_pass, paddle::framework::ir::ResidualConnectionMKLDNNFusePass); diff --git a/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc index c31e59b39216a..e5d2ae598b81d 100644 --- a/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h" #include "paddle/fluid/imperative/type_defs.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void SetOp(ProgramDesc* prog, const std::string& type, @@ -230,8 +228,6 @@ TEST(CpuBfloat16Pass, double_outputs_ops) { added_nodes); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(cpu_bfloat16_pass); diff --git a/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc index a512f4b8021f4..a7256cdfe9404 100644 --- a/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc @@ -22,9 +22,7 @@ #include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using EigenVectorArrayMap = Eigen::Map>; using EigenVectorArrayMapFloat = @@ -1313,9 +1311,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeFusionLSTM(graph); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(cpu_quantize_pass, paddle::framework::ir::CPUQuantizePass) .RequirePassAttr("quant_var_scales"); diff --git a/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc index 3f0423870d366..427de4d610754 100644 --- a/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc @@ -20,9 +20,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using string::PrettyLogDetail; @@ -81,9 +79,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( act_type); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(elementwise_act_onednn_fuse_pass, paddle::framework::ir::ElementwiseActivationOneDNNPass); diff --git a/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc index 499a7734d71d6..33b512dc7669c 100644 --- a/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { Int8ScaleCalculationMkldnnPass::Int8ScaleCalculationMkldnnPass() { // NOLINT AddOpCompat(OpCompat("conv2d")) @@ -210,9 +208,7 @@ void Int8ScaleCalculationMkldnnPass::Int8ScaleImpl( AddStatis(found_int8_scales_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(int8_scale_calculation_onednn_pass, paddle::framework::ir::Int8ScaleCalculationMkldnnPass); diff --git a/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc index 8f384931a589c..ad0add6dd3c0c 100644 --- a/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc @@ -19,15 +19,11 @@ #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; @@ -65,9 +61,7 @@ void InterpolateOneDNNPass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(interpolate_onednn_pass, paddle::framework::ir::InterpolateOneDNNPass); diff --git a/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc index 214b8e12fd0b1..63dc9cd677a4f 100644 --- a/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc @@ -26,9 +26,7 @@ #include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using EigenVectorArrayMap = Eigen::Map>; using string::PrettyLogDetail; @@ -179,9 +177,7 @@ MultiGruSeqFusePass::MultiGruSeqFusePass() { .End(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(multi_gru_seq_fuse_pass, paddle::framework::ir::MultiGruSeqFusePass); diff --git a/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc index a21ddd579be3c..f937a1c681b17 100644 --- a/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc @@ -19,9 +19,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using string::PrettyLogDetail; @@ -132,9 +130,7 @@ void FuseOperatorReshape2OneDNNPass::FuseReshape2(Graph *graph, op_type); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(operator_reshape2_onednn_fuse_pass, paddle::framework::ir::FuseOperatorReshape2OneDNNPass); diff --git a/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc index 36ff2110e582f..716419434933d 100755 --- a/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/imperative/type_defs.h" #include "paddle/phi/common/place.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { namespace { struct Data { Data() = default; @@ -380,8 +378,6 @@ TEST_F(ParamsQuantizationMkldnnPassTestFixture, conv_with_bias_2g2o2i1h1ws) { } } // namespace -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(params_quantization_onednn_pass); diff --git a/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc index f3250c32604c6..3b22bea8205b4 100644 --- a/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc @@ -18,9 +18,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void ReshapeTransposeMatmulMkldnnFusePass::ApplyImpl(Graph *graph) const { auto matmul_types = {"matmul", "matmul_v2", "fused_matmul"}; @@ -264,9 +262,7 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() { .End(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(reshape_transpose_matmul_onednn_fuse_pass, paddle::framework::ir::ReshapeTransposeMatmulMkldnnFusePass); diff --git a/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc index 7ae647c6d28f7..7c8930f9fccc8 100644 --- a/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc @@ -22,9 +22,7 @@ limitations under the License. */ #include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; @@ -141,9 +139,7 @@ void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const { found_scale_matmul_fuse_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(scale_matmul_fuse_pass, paddle::framework::ir::ScaleMatmulFusePass); diff --git a/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc index 7bce1813fed8a..2ae5301258d65 100644 --- a/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); #define GET_NODES \ @@ -231,9 +229,7 @@ void ShuffleChannelMKLDNNDetectPass::ApplyImpl(ir::Graph* graph) const { gpd(graph, handler); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(shuffle_channel_onednn_detect_pass, paddle::framework::ir::ShuffleChannelMKLDNNDetectPass); diff --git a/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc index da389d3a1353c..4cfa4c637bc34 100644 --- a/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void AddVarToScope(Scope* param_scope, const std::string& name, @@ -78,8 +76,6 @@ TEST(ShuffleChannelOneDNNDetectPass, ShuffleChannelOneDNNDetectPassTest) { MainTest(); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(shuffle_channel_onednn_detect_pass); diff --git a/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc index d18765ff27bdd..3d6821d5bd79b 100644 --- a/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc @@ -21,9 +21,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using string::PrettyLogDetail; @@ -76,9 +74,7 @@ void SoftplusActivationOneDNNPass::FuseSoftplusActivation( found_softplus_activation_count, act_type); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(softplus_activation_onednn_fuse_pass, paddle::framework::ir::SoftplusActivationOneDNNPass); diff --git a/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc index 4af9c6a770436..7ac8edbb6005c 100644 --- a/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc @@ -17,9 +17,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { using string::PrettyLogDetail; @@ -77,9 +75,7 @@ void FuseSqueeze2Transpose2OneDNNPass::ApplyImpl(Graph *graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(squeeze2_transpose2_onednn_fuse_pass, paddle::framework::ir::FuseSqueeze2Transpose2OneDNNPass); diff --git a/paddle/fluid/framework/ir/pass_test_util.cc b/paddle/fluid/framework/ir/pass_test_util.cc index ee75794d7ccc4..6007bfc64929b 100644 --- a/paddle/fluid/framework/ir/pass_test_util.cc +++ b/paddle/fluid/framework/ir/pass_test_util.cc @@ -28,10 +28,7 @@ #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" -namespace paddle { -namespace framework { -namespace ir { -namespace test { +namespace paddle::framework::ir::test { OpDesc* CreateOp(ProgramDesc* prog, const std::string& op_type_name, @@ -236,7 +233,4 @@ OpDesc* GetOp(const BlockDesc& block_desc, return nullptr; } -} // namespace test -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir::test diff --git a/paddle/fluid/framework/ir/placement_pass_base.cc b/paddle/fluid/framework/ir/placement_pass_base.cc index ccf2bf22ab57b..718e15b01fd72 100644 --- a/paddle/fluid/framework/ir/placement_pass_base.cc +++ b/paddle/fluid/framework/ir/placement_pass_base.cc @@ -18,9 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void PlacementPassBase::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Applies " << GetPlacementName() << " placement strategy."; @@ -43,6 +41,4 @@ void PlacementPassBase::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/preln_elementwise_groupnorm_act_pass.cc b/paddle/fluid/framework/ir/preln_elementwise_groupnorm_act_pass.cc index 7cbb5c169f63c..3917423754ba4 100644 --- a/paddle/fluid/framework/ir/preln_elementwise_groupnorm_act_pass.cc +++ b/paddle/fluid/framework/ir/preln_elementwise_groupnorm_act_pass.cc @@ -18,18 +18,11 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct PrelnGroupNormAct : public PatternBase { PrelnGroupNormAct(PDPattern *pattern, const std::string &name_scope) @@ -92,7 +85,8 @@ void PrelnGroupNormAct::operator()(PDNode *x, PDNode *y, bool with_act) { } } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int PrelnGroupNormActFusePass::ApplyAddGNPattern(ir::Graph *graph, bool with_act) const { @@ -203,9 +197,7 @@ void PrelnGroupNormActFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(preln_elementwise_groupnorm_act_pass, paddle::framework::ir::PrelnGroupNormActFusePass); diff --git a/paddle/fluid/framework/ir/preln_embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/preln_embedding_eltwise_layernorm_fuse_pass.cc index 1734e7d675755..4d7a4b6d8406a 100644 --- a/paddle/fluid/framework/ir/preln_embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/preln_embedding_eltwise_layernorm_fuse_pass.cc @@ -18,18 +18,11 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static PDNode* create_emb_vars(PDPattern* pattern, const std::string& name, @@ -147,7 +140,8 @@ void PrelnSkipLayerNorm::operator()() { .LinksTo({layer_norm_out, layer_norm_mean_var, layer_norm_variance_var}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int PrelnEmbeddingEltwiseLayerNormFusePass::BuildFusion( Graph* graph, const std::string& name_scope @@ -455,9 +449,7 @@ void PrelnEmbeddingEltwiseLayerNormFusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(preln_embedding_eltwise_layernorm_fuse_pass, paddle::framework::ir::PrelnEmbeddingEltwiseLayerNormFusePass); diff --git a/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc b/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc index 48baf1f4b102f..efe7321874b8f 100644 --- a/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc +++ b/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc @@ -19,18 +19,11 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct PrelnResidualBias : public PatternBase { PrelnResidualBias(PDPattern *pattern, @@ -127,7 +120,8 @@ void PrelnResidualBias::operator()(PDNode *x, PDNode *y) { {layer_norm_out_var, layer_norm_mean_var, layer_norm_variance_var}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { void setIntermediateOut(OpDesc *desc, const std::string &out_name, @@ -300,9 +294,7 @@ void PrelnResidualBiasFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(preln_residual_bias_fuse_pass, paddle::framework::ir::PrelnResidualBiasFusePass); diff --git a/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc index 8547fceed0773..1b7ed93e8bfcb 100644 --- a/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc @@ -19,18 +19,11 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct PrelnSkipLayerNorm : public PatternBase { PrelnSkipLayerNorm(PDPattern *pattern, const std::string &name_scope) @@ -102,7 +95,8 @@ void PrelnSkipLayerNorm::operator()(PDNode *x, PDNode *y) { {layer_norm_out_var, layer_norm_mean_var, layer_norm_variance_var}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { void PrelnSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { PADDLE_ENFORCE_NOT_NULL( @@ -220,9 +214,7 @@ void PrelnSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(preln_skip_layernorm_fuse_pass, paddle::framework::ir::PrelnSkipLayerNormFusePass); diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc index 519be82a0025f..63cbe6218ead7 100644 --- a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc +++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { QuantDequantFusePass::QuantDequantFusePass() { AddOpCompat(OpCompat("fake_quantize_range_abs_max")) .AddInput("X") @@ -625,9 +623,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(quant_conv2d_dequant_fuse_pass, paddle::framework::ir::QuantDequantFusePass); diff --git a/paddle/fluid/framework/ir/quantize_helper.cc b/paddle/fluid/framework/ir/quantize_helper.cc index c4b06651f1bbb..b424212c0bdb2 100644 --- a/paddle/fluid/framework/ir/quantize_helper.cc +++ b/paddle/fluid/framework/ir/quantize_helper.cc @@ -14,9 +14,7 @@ #include "paddle/fluid/framework/ir/quantize_helper.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void SaveQuantInfoInTheGraph( ir::Graph* graph, @@ -74,6 +72,4 @@ std::vector GetScaleVecValueForNode( return var_quant_scales->at(node->Name()); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc index d4e8a1683ed18..cb5f23d7d39be 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc @@ -17,9 +17,7 @@ #include "paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void SetOp(ProgramDesc* prog, const std::string& type, @@ -213,8 +211,6 @@ TEST(SeqPoolConcatFusePass, more_inputs) { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(seqpool_concat_fuse_pass); diff --git a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc index eeef9c73db3d7..2e0810571ebdf 100644 --- a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; class Node; @@ -197,9 +195,7 @@ void SeqPoolCVMConcatFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(seqpool_cvm_concat_fuse_pass, paddle::framework::ir::SeqPoolCVMConcatFusePass); diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index 7626c1e9142f9..c0e31259f7771 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); #define GET_NODES \ @@ -230,9 +228,7 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { gpd(graph, handler); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(shuffle_channel_detect_pass, paddle::framework::ir::ShuffleChannelDetectPass); diff --git a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc index 93a1008838558..1879150235e5c 100644 --- a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc +++ b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { /* * This pass is to simplify the Graph, it may contains: @@ -237,9 +235,7 @@ void SimplifyWithBasicOpsPass::ReplaceOutputVar(Node* op, } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(simplify_with_basic_ops_pass, paddle::framework::ir::SimplifyWithBasicOpsPass); diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc index f29df2961d995..5afc03db69b3b 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc @@ -19,18 +19,11 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct SkipLayerNorm : public PatternBase { SkipLayerNorm(PDPattern *pattern, const std::string &name_scope) @@ -99,7 +92,8 @@ PDNode *SkipLayerNorm::operator()(PDNode *x, PDNode *y) { return layer_norm_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { void SkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { PADDLE_ENFORCE_NOT_NULL( @@ -194,9 +188,7 @@ void SkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(skip_layernorm_fuse_pass, paddle::framework::ir::SkipLayerNormFusePass); diff --git a/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc b/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc index 9097eb6572521..d5461037435e5 100644 --- a/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc +++ b/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc @@ -24,9 +24,7 @@ #include "paddle/utils/string/pretty_log.h" #include "paddle/utils/string/printf.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { // cpplint complaints (wrong!) for not included header in below line. using string::PrettyLogDetail; // NOLINT @@ -440,9 +438,7 @@ void SplitLayerNormPass::ApplyImpl(Graph* graph) const { AddStatis(found_layer_norm_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(split_layernorm_to_math_ops_pass, paddle::framework::ir::SplitLayerNormPass); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc index b300dcd76119c..c2a73a3aac512 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, const std::string& name_scope) { @@ -489,9 +487,7 @@ void SquaredMatSubFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(squared_mat_sub_fuse_pass, paddle::framework::ir::SquaredMatSubFusePass); diff --git a/paddle/fluid/framework/ir/subgraph_detector.cc b/paddle/fluid/framework/ir/subgraph_detector.cc index 79df75bd780d5..82e4000179bcc 100644 --- a/paddle/fluid/framework/ir/subgraph_detector.cc +++ b/paddle/fluid/framework/ir/subgraph_detector.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "glog/logging.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Graph; class Node; @@ -485,6 +483,4 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) { return node.inputs.size() == n; } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir diff --git a/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc b/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc index 4a443dc70860c..eafdcfdc75803 100644 --- a/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc +++ b/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { void SetOp(ProgramDesc* prog, const std::string& type, @@ -92,8 +90,6 @@ TEST(IsTestPass, basic) { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir USE_PASS(sync_batch_norm_pass); diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index 6774a6baae023..338bcf5c50e11 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { TransposeFlattenConcatFusePass::TransposeFlattenConcatFusePass() { AddOpCompat(OpCompat("transpose2")) @@ -215,9 +213,7 @@ void TransposeFlattenConcatFusePass::ApplyImpl(ir::Graph *graph) const { } } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(transpose_flatten_concat_fuse_pass, paddle::framework::ir::TransposeFlattenConcatFusePass); diff --git a/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc b/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc index 6bc9cb324d80d..673f1d3bfb83d 100644 --- a/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc +++ b/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc @@ -20,9 +20,7 @@ #include #include -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { #define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); #define GET_NODES \ @@ -373,9 +371,7 @@ void TrtDeleteWeightQuantDequantLinearOpPass::ApplyImpl( AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_delete_weight_dequant_linear_op_pass, paddle::framework::ir::TrtDeleteWeightQuantDequantLinearOpPass); diff --git a/paddle/fluid/framework/ir/trt_map_ops_to_matrix_multiply_pass.cc b/paddle/fluid/framework/ir/trt_map_ops_to_matrix_multiply_pass.cc index 0ca812bad7662..d0062ec632b16 100644 --- a/paddle/fluid/framework/ir/trt_map_ops_to_matrix_multiply_pass.cc +++ b/paddle/fluid/framework/ir/trt_map_ops_to_matrix_multiply_pass.cc @@ -22,9 +22,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; @@ -118,9 +116,7 @@ void TrtMapOpsToMatrixMultiplyPass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_map_ops_to_matrix_multiply_pass, paddle::framework::ir::TrtMapOpsToMatrixMultiplyPass); diff --git a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc index 0bee108064d08..c8d49cdfaedca 100644 --- a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc @@ -19,16 +19,11 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static void ReplaceOutputVar(Node* op, Node* old_var, Node* new_var) { if (op->IsOp() && op->Op()) { @@ -628,7 +623,8 @@ PDNode* TrtMultiHeadMatmulV3Pattern::operator()() { return transpose2_2_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { void TrtMultiHeadMatmulFusePass::ApplyImpl(Graph* graph) const { FusePassBase::Init(name_scope_, graph); @@ -1541,9 +1537,7 @@ void TrtMultiHeadMatmulV3FusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_multihead_matmul_fuse_pass, paddle::framework::ir::TrtMultiHeadMatmulFusePass); diff --git a/paddle/fluid/framework/ir/trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass.cc index f93a42a7dbab8..6772612134783 100644 --- a/paddle/fluid/framework/ir/trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass.cc @@ -18,18 +18,11 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { static PDNode* create_emb_vars(PDPattern* pattern, const std::string& name, @@ -201,7 +194,8 @@ void TrtPromptTuningSkipLayerNorm::operator()() { .LinksTo({layer_norm_out}); } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int TrtPromptTuningEmbeddingEltwiseLayerNormFusePass::BuildFusion( Graph* graph, const std::string& name_scope @@ -580,9 +574,7 @@ void TrtPromptTuningEmbeddingEltwiseLayerNormFusePass::ApplyImpl( AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS( trt_prompt_tuning_embedding_eltwise_layernorm_fuse_pass, diff --git a/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc index d68694106b5c7..c6a22c143fb66 100644 --- a/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc @@ -22,10 +22,7 @@ #endif #include "paddle/phi/kernels/funcs/blas/blas.h" -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { // input_qk input_v // |q |k v @@ -249,7 +246,8 @@ PDNode* TrtQKMultiHeadMatmulPattern::operator()() { return reshape2_qkv_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { int TrtQkMultiHeadMatmulFusePass::BuildQkFusion(Graph* graph, const std::string& name_scope, @@ -575,9 +573,7 @@ void TrtQkMultiHeadMatmulFusePass::ApplyImpl(Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_qk_multihead_matmul_fuse_pass, paddle::framework::ir::TrtQkMultiHeadMatmulFusePass); diff --git a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc index 0708218dbd07c..e90cadc782a61 100644 --- a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc @@ -22,18 +22,11 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/helper.h" #endif -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir -namespace paddle { -namespace framework { -namespace ir { -namespace patterns { +namespace paddle::framework::ir::patterns { struct TrtSkipLayerNorm : public PatternBase { TrtSkipLayerNorm(PDPattern *pattern, const std::string &name_scope) @@ -102,7 +95,8 @@ PDNode *TrtSkipLayerNorm::operator()(PDNode *x, PDNode *y) { return layer_norm_out_var; } -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { PADDLE_ENFORCE_NOT_NULL( @@ -271,9 +265,7 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_skip_layernorm_fuse_pass, paddle::framework::ir::TrtSkipLayerNormFusePass); diff --git a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc index d9907555a17b5..6b49a99c02364 100644 --- a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc +++ b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc @@ -26,9 +26,7 @@ #include "paddle/fluid/framework/ir/node.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { namespace { @@ -383,8 +381,6 @@ void TrtSupportNHWCPass::ApplyImpl(Graph *graph) const { AddStatis(transposed_ops.size()); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(trt_support_nhwc_pass, paddle::framework::ir::TrtSupportNHWCPass); diff --git a/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc b/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc index 382e1c60ee989..a547301b835c9 100644 --- a/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc +++ b/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc @@ -52,9 +52,7 @@ GET_IR_NODE(reshape2_op); \ GET_IR_NODE(reshape2_out); -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { bool HasScale(OpDesc* const op_ptr, std::string* name, @@ -160,9 +158,7 @@ void VitAttentionFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(fusion_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(vit_attention_fuse_pass, paddle::framework::ir::VitAttentionFusePass); diff --git a/paddle/fluid/framework/ir/xpu/block_multihead_attention_xpu_pass.cc b/paddle/fluid/framework/ir/xpu/block_multihead_attention_xpu_pass.cc new file mode 100644 index 0000000000000..3d4c78896f7e2 --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/block_multihead_attention_xpu_pass.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/ir/xpu/quant_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { + +class BlockMultiHeadAttentionXPUPass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + void InplaceBlockMultiHeadAttentionXPU(ir::Graph* graph) const; + + const std::string name_scope_{"block_multihead_attention_xpu_pass"}; +}; + +void BlockMultiHeadAttentionXPUPass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + + InplaceBlockMultiHeadAttentionXPU(graph); +} + +void BlockMultiHeadAttentionXPUPass::InplaceBlockMultiHeadAttentionXPU( + ir::Graph* graph) const { + const int64_t max_batch_size = 10; + auto* scope = param_scope(); + for (auto* node : graph->Nodes()) { + if (node->IsOp() && node->Op()->Type() == "block_multihead_attention") { + auto* op_desc = node->Op(); + op_desc->SetType("block_multihead_attention_xpu"); + phi::DenseTensor cache_k_per_batch_maxs; + auto base_name = op_desc->Input("qkv")[0]; + int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1); + std::string cache_k_per_batch_maxs_name = base_name + "_max_cache_k"; + VarDesc cache_k_per_batch_maxs_desc(cache_k_per_batch_maxs_name); + cache_k_per_batch_maxs_desc.SetPersistable(true); + cache_k_per_batch_maxs_desc.SetShape( + {max_batch_size, static_cast(max_ptr_size)}); + cache_k_per_batch_maxs_desc.SetDataType( + proto::VarType::Type::VarType_Type_FP32); + Node* cache_k_per_batch_maxs_in = + graph->CreateVarNode(&cache_k_per_batch_maxs_desc); + phi::DenseTensor cpu_tensor; + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + cpu_tensor.set_type(phi::DataType::FLOAT32); + cpu_tensor.Resize({max_batch_size, max_ptr_size}); + std::vector tmp(max_batch_size * max_ptr_size, 0); + memcpy(cpu_ctx->Alloc(&cpu_tensor), + tmp.data(), + max_batch_size * max_ptr_size * sizeof(float)); + Assign(cpu_tensor, + scope->Var(cache_k_per_batch_maxs_name) + ->GetMutable()); + op_desc->SetInput("cache_k_per_batch_maxs", + {cache_k_per_batch_maxs_name}); + + std::string cache_v_per_batch_maxs_name = base_name + "_max_cache_v"; + VarDesc cache_v_per_batch_maxs_desc(cache_v_per_batch_maxs_name); + cache_v_per_batch_maxs_desc.SetPersistable(true); + cache_v_per_batch_maxs_desc.SetShape( + {max_batch_size, static_cast(max_ptr_size)}); + cache_v_per_batch_maxs_desc.SetDataType( + proto::VarType::Type::VarType_Type_FP32); + Node* cache_v_per_batch_maxs_in = + graph->CreateVarNode(&cache_v_per_batch_maxs_desc); + Assign(cpu_tensor, + scope->Var(cache_v_per_batch_maxs_name) + ->GetMutable()); + op_desc->SetInput("cache_v_per_batch_maxs", + {cache_v_per_batch_maxs_name}); + + IR_NODE_LINK_TO(cache_k_per_batch_maxs_in, node); + IR_NODE_LINK_TO(cache_v_per_batch_maxs_in, node); + } + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(block_multihead_attention_xpu_pass, + paddle::framework::ir::BlockMultiHeadAttentionXPUPass); + +REGISTER_PASS_CAPABILITY(block_multihead_attention_xpu_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "block_multihead_attention_xpu", 0)); diff --git a/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc b/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc index 7e2ba4dcabee2..9399bf743d6e3 100644 --- a/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc +++ b/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc @@ -22,13 +22,12 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { -namespace ir { +namespace paddle::framework::ir { class Node; -namespace patterns { +} // namespace paddle::framework::ir +namespace paddle::framework::ir::patterns { struct YoloBoxPattern : public PatternBase { YoloBoxPattern(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, name_scope) { @@ -147,7 +146,8 @@ struct YoloBoxPattern : public PatternBase { PATTERN_DECL_NODE(nms_out_index); PATTERN_DECL_NODE(nms_out_rois_num); }; -} // namespace patterns +} // namespace paddle::framework::ir::patterns +namespace paddle::framework::ir { YoloBoxFusePass::YoloBoxFusePass() = default; @@ -300,8 +300,6 @@ void YoloBoxFusePass::ApplyImpl(ir::Graph* graph) const { AddStatis(found_subgraph_count); } -} // namespace ir -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::ir REGISTER_PASS(yolo_box_fuse_pass, paddle::framework::ir::YoloBoxFusePass); diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 6b257385c9b06..8eba8bb026430 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/version.h" -namespace paddle { -namespace framework { +namespace paddle::framework { std::string LoDToString(const LoD &lod) { std::ostringstream stream; @@ -520,5 +519,4 @@ void MergeLoDTensor(phi::DenseTensor *target, } } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 095e0401fcad5..d0da7e7e1817d 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -33,8 +33,7 @@ #include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h" #endif -namespace paddle { -namespace framework { +namespace paddle::framework { void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc, int block_id) { @@ -331,5 +330,4 @@ void NaiveExecutor::ResetTrtOps(int num) { #endif } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/executor_statistics.cc b/paddle/fluid/framework/new_executor/executor_statistics.cc index be1f8bbfcf8a6..4b830e7b05e55 100644 --- a/paddle/fluid/framework/new_executor/executor_statistics.cc +++ b/paddle/fluid/framework/new_executor/executor_statistics.cc @@ -35,8 +35,7 @@ PADDLE_DEFINE_EXPORTED_string(static_executor_perfstat_filepath, "enables performance statistics for the static " "graph executor."); -namespace paddle { -namespace framework { +namespace paddle::framework { class StatisticsEngine { public: @@ -632,5 +631,4 @@ void StaticGraphExecutorPerfStatistics( } } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc index f82350ec6d103..3d328372514d7 100644 --- a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc +++ b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc @@ -19,8 +19,7 @@ #include "paddle/fluid/framework/new_executor/feed_fetch_utils.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void SetColAttrForFeedFetchOps(std::shared_ptr program_desc, const int64_t micro_batch_num, @@ -253,5 +252,4 @@ void MergeTensors(const std::vector& tensors, } } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc index a3dd897ff52c6..17bfc8bc30853 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc +++ b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc @@ -21,8 +21,7 @@ #include #endif // !_WIN32 -namespace paddle { -namespace framework { +namespace paddle::framework { InterpreterCoreEventGarbageCollector::InterpreterCoreEventGarbageCollector( const std::vector& vec_instruction) @@ -214,5 +213,4 @@ void InterpreterCoreEventGarbageCollector::FreeGarbages() { events_.clear(); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc index 0da7138d24b9d..3c4eb57b6bee0 100644 --- a/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/custom_kernel_instruction.cc @@ -22,8 +22,7 @@ #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void CustomKernelInstruction::BuildCustomContext( const paddle::dialect::OpYamlInfoParser& op_yaml_info) { @@ -509,5 +508,4 @@ void CustomKernelInstruction::Run() { VLOG(6) << "Run custom op " << custom_op_name_ << " kernel."; kernel_func_(&custom_kernel_ctx_); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc index 609fd78106747..e7a05d75f6e99 100644 --- a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc +++ b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc @@ -42,8 +42,7 @@ COMMON_DECLARE_bool(dynamic_static_unified_comm); #endif -namespace paddle { -namespace framework { +namespace paddle::framework { std::vector GetValueIds(pir::Value value, const ValueExecutionInfo& value_exec_info) { @@ -407,5 +406,4 @@ bool GetCondData(const phi::DenseTensor& cond) { return cpu_cond->data()[0]; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc index 56ebe4673caa1..2f723c8ed686a 100644 --- a/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc @@ -29,8 +29,7 @@ #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/type_defs.h" -namespace paddle { -namespace framework { +namespace paddle::framework { LegacyKernelInstruction::LegacyKernelInstruction( size_t id, @@ -189,5 +188,4 @@ void LegacyKernelInstruction::Run() { VLOG(6) << "Run op " << legacy_op_name_ << " kernel."; (*(phi_kernel_))((kernel_context_)); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.cc index 9af41b9e8c08b..b8a56321b9e66 100644 --- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.cc @@ -405,9 +405,12 @@ OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() { } void OneDNNPhiKernelInstruction::Run() { + std::vector> tmp_holders; + auto tmp_kernel_context = kernel_context_; + auto tmp_infer_meta_context_ = infer_meta_context_; // Step1. TransLayout - auto inputs = kernel_context_.InputsBetween( - size_t(0), kernel_context_.InputsSize()); + auto inputs = tmp_kernel_context.InputsBetween( + size_t(0), tmp_kernel_context.InputsSize()); for (size_t i = 0; i < inputs.size(); ++i) { auto input = inputs[i]; if (input == nullptr) { @@ -419,10 +422,12 @@ void OneDNNPhiKernelInstruction::Run() { if (skip_format_tensors_.count(i)) { continue; } - VLOG(6) << "input[" << i << "].layout() = " << input->layout(); + VLOG(6) << "input[" << i << "].layout() = " << input->layout() + << ", shape = " << input->dims(); if (input->layout() != phi::DataLayout::ONEDNN) { phi::DataLayout from_layout = input->layout(); - auto transed_tensor = const_cast(input); + tmp_holders.emplace_back(std::make_shared(*input)); + auto transed_tensor = tmp_holders.back().get(); std::set elementwise_kernels = { "add", "subtract", "multiply", "divide"}; @@ -461,8 +466,24 @@ void OneDNNPhiKernelInstruction::Run() { } dnnl::memory::desc out_mem_desc = - phi::funcs::make_memory_desc(*input, from_layout); + phi::funcs::make_memory_desc(*transed_tensor, from_layout); transed_tensor->set_mem_desc(out_mem_desc); + tmp_kernel_context.UpdataInput(i, transed_tensor); + auto meta_tensor = phi::MetaTensor(transed_tensor); + auto input_meta_tensor = phi::MetaTensor(input); + if (tmp_infer_meta_context_.InputsSize() > i && + tmp_infer_meta_context_.InputAt(i).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(i, meta_tensor); + } else { + for (size_t j = 0; j < tmp_infer_meta_context_.InputsSize(); ++j) { + if (tmp_infer_meta_context_.InputAt(j).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(j, meta_tensor); + break; + } + } + } } } @@ -470,7 +491,7 @@ void OneDNNPhiKernelInstruction::Run() { // SetDnnAttrIntoDeviceContext // SetInputsName SetOutputsName auto one_dnn_ctx = const_cast( - &kernel_context_.GetDeviceContext()); + &tmp_kernel_context.GetDeviceContext()); for (auto& attr : extra_attr_) { one_dnn_ctx->SetDnnAttr(attr.first, attr.second); } @@ -482,12 +503,12 @@ void OneDNNPhiKernelInstruction::Run() { // Step3. InferMeta if (infer_meta_interface_) { - infer_meta_interface_->infer_meta_(&(infer_meta_context_)); + infer_meta_interface_->infer_meta_(&(tmp_infer_meta_context_)); } // Step4. Run kernel VLOG(6) << "Run op " << phi_op_name_ << " infer meta."; - (*(phi_kernel_))(&(kernel_context_)); + (*(phi_kernel_))(&(tmp_kernel_context)); VLOG(6) << "Run op " << phi_op_name_ << " kernel."; // Step5. ClearDnnAttr diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.cc index 0115f2f4b9f31..3f72973e37a3e 100644 --- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.cc @@ -58,6 +58,7 @@ OneDNNMixedPhiKernelInstruction::OneDNNMixedPhiKernelInstruction( } void OneDNNMixedPhiKernelInstruction::Run() { + std::vector> tmp_holders; // Step1. Mixed Dynamic Choose Kernel if (!has_choose_kernel_) { has_choose_kernel_ = true; @@ -76,9 +77,11 @@ void OneDNNMixedPhiKernelInstruction::Run() { if (use_onednn_kernel_) { OneDNNPhiKernelInstruction::Run(); } else { + auto tmp_kernel_context = kernel_context_; + auto tmp_infer_meta_context_ = infer_meta_context_; // TransLayout first - auto inputs = kernel_context_.InputsBetween( - size_t(0), kernel_context_.InputsSize()); + auto inputs = tmp_kernel_context.InputsBetween( + size_t(0), tmp_kernel_context.InputsSize()); for (size_t i = 0; i < inputs.size(); ++i) { auto input = inputs[i]; @@ -89,30 +92,66 @@ void OneDNNMixedPhiKernelInstruction::Run() { // NOTE(zhiqiu): to handle the special case in ApplyDataTransform() in // data_transfer.cc if (!input->IsInitialized() && tmp_layout == DataLayout::NHWC) { - auto transed_tensor = const_cast(input); + tmp_holders.emplace_back(std::make_shared(*input)); + auto transed_tensor = tmp_holders.back().get(); transed_tensor->set_layout(tmp_layout); phi::funcs::MatchShapeToLayout( transed_tensor, phi::DataLayout::ONEDNN, tmp_layout); + dnnl::memory::desc out_mem_desc = + phi::funcs::make_memory_desc(*transed_tensor, tmp_layout); + transed_tensor->set_mem_desc(out_mem_desc); + tmp_kernel_context.UpdataInput(i, transed_tensor); + auto meta_tensor = phi::MetaTensor(transed_tensor); + auto input_meta_tensor = phi::MetaTensor(input); + if (tmp_infer_meta_context_.InputsSize() > i && + tmp_infer_meta_context_.InputAt(i).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(i, meta_tensor); + } else { + for (size_t j = 0; j < tmp_infer_meta_context_.InputsSize(); ++j) { + if (tmp_infer_meta_context_.InputAt(j).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(j, meta_tensor); + break; + } + } + } } else { - phi::DenseTensor transed_tensor; - transed_tensor.set_meta(input->meta()); + tmp_holders.emplace_back(std::make_shared()); + auto transed_tensor = tmp_holders.back().get(); + transed_tensor->set_meta(input->meta()); phi::funcs::TransDataLayoutFromOneDNN(phi::DataLayout::ONEDNN, tmp_layout, *input, - &transed_tensor, + transed_tensor, phi::CPUPlace()); - *(const_cast(input)) = transed_tensor; + tmp_kernel_context.UpdataInput(i, transed_tensor); + auto meta_tensor = phi::MetaTensor(transed_tensor); + auto input_meta_tensor = phi::MetaTensor(input); + if (tmp_infer_meta_context_.InputsSize() > i && + tmp_infer_meta_context_.InputAt(i).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(i, meta_tensor); + } else { + for (size_t j = 0; j < tmp_infer_meta_context_.InputsSize(); ++j) { + if (tmp_infer_meta_context_.InputAt(j).is_same_tensor( + input_meta_tensor)) { + tmp_infer_meta_context_.UpdataInput(j, meta_tensor); + break; + } + } + } } } } VLOG(6) << "Begin run op " << phi_op_name_ << " infer meta."; if (infer_meta_interface_) { - infer_meta_interface_->infer_meta_(&(infer_meta_context_)); + infer_meta_interface_->infer_meta_(&(tmp_infer_meta_context_)); } VLOG(6) << "End run op " << phi_op_name_ << " infer meta."; VLOG(6) << "Begin run op " << phi_op_name_ << " kernel."; - (*(phi_kernel_))(&(kernel_context_)); + (*(phi_kernel_))(&(tmp_kernel_context)); VLOG(6) << "End run op " << phi_op_name_ << " kernel."; } } diff --git a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc index 11f29ba5c5a45..909dfefcfde08 100644 --- a/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc +++ b/paddle/fluid/framework/new_executor/interpreter/dependency_builder.cc @@ -45,9 +45,7 @@ PADDLE_DEFINE_EXPORTED_bool(enable_dependency_builder_debug_info, false, "Enable dependency builder debug info"); -namespace paddle { -namespace framework { -namespace interpreter { +namespace paddle::framework::interpreter { size_t CountDownstreamMap( const std::map>& downstream_map) { @@ -1482,6 +1480,4 @@ void DependencyBuilderSimplify::AddDownstreamOp(size_t prior_op_idx, } } -} // namespace interpreter -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::interpreter diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.cc b/paddle/fluid/framework/new_executor/interpreter/execution_config.cc index e8bcfbc736a9e..2b6a3918ba239 100644 --- a/paddle/fluid/framework/new_executor/interpreter/execution_config.cc +++ b/paddle/fluid/framework/new_executor/interpreter/execution_config.cc @@ -24,9 +24,7 @@ PD_DECLARE_bool(new_executor_serial_run); -namespace paddle { -namespace framework { -namespace interpreter { +namespace paddle::framework::interpreter { static constexpr size_t kHostNumThreads = 4; static constexpr size_t kDeviceNumThreads = 1; @@ -151,6 +149,4 @@ void ExecutionConfig::Log(int log_level) { VLOG(log_level) << log_str.str(); } -} // namespace interpreter -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::interpreter diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 52516d69794c8..a3c445cac3c2a 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -53,9 +53,7 @@ COMMON_DECLARE_bool(check_nan_inf); COMMON_DECLARE_string(static_runtime_data_save_path); COMMON_DECLARE_bool(save_static_runtime_data); -namespace paddle { -namespace framework { -namespace interpreter { +namespace paddle::framework::interpreter { using VariableIdMap = std::map>; @@ -1464,6 +1462,4 @@ const std::vector GetInstructionCallStack( } return vec_str; } -} // namespace interpreter -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::interpreter diff --git a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc index 54ee746726e7e..ee28442be2f56 100644 --- a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc @@ -28,9 +28,7 @@ COMMON_DECLARE_bool(dynamic_static_unified_comm); #endif -namespace paddle { -namespace framework { -namespace interpreter { +namespace paddle::framework::interpreter { using DeviceContext = platform::DeviceContext; using DeviceEvent = platform::DeviceEvent; @@ -431,12 +429,24 @@ void analyse_event_info_for_two_instructions( if (has_data_dependency( instructions[cur_instr_id], instructions[next_instr_id]) || - !run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty() || instructions[next_instr_id]->OpBase()->Type() == "depend") { waiter_instr_ids->insert(next_instr_id); return; } + if (!run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty()) { + auto& next_next_instructor_ids = + run_type_info[next_instr_id][DownstreamRunType::kEventRun]; + for (auto& id : next_next_instructor_ids) { + if (has_data_dependency( + instructions[cur_instr_id], instructions[id])) { + waiter_instr_ids->insert(next_instr_id); + return; + } + } + return; + } + // NOTE(Ruibiao): If no data dependency from cur_instr to next_instr, and // simultaneously next_instr has no event_run downstream instr, we try to // recursively add events between cur_instr and next_instr's @@ -491,12 +501,25 @@ void analyse_event_info_for_two_instructions< if (has_data_dependency( instructions[cur_instr_id], instructions[next_instr_id]) || - !run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty() || instructions[next_instr_id]->Name() == "pd_op.depend") { waiter_instr_ids->insert(next_instr_id); return; } + if (!run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty()) { + auto& next_next_instructor_ids = + run_type_info[next_instr_id][DownstreamRunType::kEventRun]; + for (auto& id : next_next_instructor_ids) { + if (has_data_dependency( + instructions[cur_instr_id], instructions[id])) { + waiter_instr_ids->insert(next_instr_id); + return; + } + } + + return; + } + // NOTE(Ruibiao): If no data dependency from cur_instr to next_instr, and // simultaneously next_instr has no event_run downstream instr, we try to // recursively add events between cur_instr and next_instr's @@ -847,6 +870,4 @@ PirStreamAnalyzer::GetEventInfo() const { return event_info_; } -} // namespace interpreter -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::interpreter diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 416d46c01e1f2..d5fe408d53401 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -35,8 +35,7 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope, "Use local_scope in new executor(especially used " "in UT), can turn off for better performance"); -namespace paddle { -namespace framework { +namespace paddle::framework { InterpreterCore::InterpreterCore(const platform::Place& place, const BlockDesc& block, @@ -170,5 +169,4 @@ Variable* InterpreterCore::DebugVar(const std::string& name) const { return impl_->DebugVar(name); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 183abe646a293..6cdbb6834d6d8 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -23,8 +23,7 @@ #include "paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h" #include "paddle/fluid/platform/profiler/event_tracing.h" -namespace paddle { -namespace framework { +namespace paddle::framework { VariableScope::VariableScope(Scope* scope) : var_list_(), name2id_(), vec_meta_info_(), data_transfer_added_vars_() { @@ -353,5 +352,4 @@ void Instruction::UpdateRecordStreamForGcInfo() { } #endif -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index ddce703dab665..59bf34700359b 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -91,8 +91,7 @@ COMMON_DECLARE_int32(low_precision_op_list); vec_instruction_base_.emplace_back(std::make_unique( \ op_idx++, place_, &op, value_exe_info_.get())); -namespace paddle { -namespace framework { +namespace paddle::framework { void RecordLowPrecisionOp(const InstructionBase* instr_node) { if (FLAGS_low_precision_op_list) { @@ -2024,5 +2023,4 @@ void PirInterpreter::SetCopyProgram(std::shared_ptr prog) { "SetCopyProgram is not implemented in PirInterpreter.")); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index 4e4b41579f4fe..1acec52134046 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -13,14 +13,14 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/standalone_executor.h" #include "paddle/common/flags.h" +#include "paddle/fluid/framework/feed_hook.h" #include "paddle/fluid/framework/new_executor/feed_fetch_utils.h" #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h" #include "paddle/fluid/framework/new_executor/pir_interpreter.h" #include "paddle/fluid/framework/new_executor/program_interpreter.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" -#include "paddle/fluid/platform/profiler/event_tracing.h" - #include "paddle/fluid/pir/transforms/pd_op_to_kernel_pass.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/ir_adaptor/translator/translate.h" #include "paddle/fluid/pir/transforms/general/inplace_pass.h" @@ -32,8 +32,7 @@ COMMON_DECLARE_bool(enable_pir_in_executor); COMMON_DECLARE_bool(enable_pir_api); COMMON_DECLARE_bool(pir_apply_inplace_pass); -namespace paddle { -namespace framework { +namespace paddle::framework { StandaloneExecutor::StandaloneExecutor(const platform::Place& place, const interpreter::Plan& plan, Scope* scope) @@ -66,6 +65,7 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, std::shared_ptr<::pir::Program> ir_program = nullptr; if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { // NOLINT ir_program = plan_.IrProgram(job_type); + RunFeedHooks(*ir_program, *scope); } else { // NOTE (liuchenghao): std::make_shared will duplicate ProgramDesc object, // maybe std::make_unique is better? @@ -303,5 +303,4 @@ std::shared_ptr StandaloneExecutor::RunProfile( return copy_desc; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc index bdae93c001bfa..437a14ea00404 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc @@ -17,8 +17,7 @@ #include #include -namespace paddle { -namespace framework { +namespace paddle::framework { void* AlignedMalloc(size_t size, size_t alignment) { assert(alignment >= sizeof(void*) && (alignment & (alignment - 1)) == 0); @@ -56,5 +55,4 @@ void AlignedFree(void* mem_ptr) { #endif } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/op_def_api.cc b/paddle/fluid/framework/op_def_api.cc index 1204c95dedc19..ee88ad5d161e2 100644 --- a/paddle/fluid/framework/op_def_api.cc +++ b/paddle/fluid/framework/op_def_api.cc @@ -41,8 +41,7 @@ namespace { */ #include "paddle/fluid/framework/op_def.pbtxt" //NOLINT -namespace paddle { -namespace framework { +namespace paddle::framework { const proto::OpDef& GetOpDef(const std::string& op_name) { static std::unordered_map ops_definition; @@ -73,5 +72,4 @@ const proto::OpDef& GetOpDef(const std::string& op_name) { bool HasOpDef(const std::string& op_name) { return op_def_map.find(op_name) != op_def_map.end(); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index 512cdd9b38769..e2d6ca02f9e6d 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -25,8 +25,7 @@ extern "C" { #include "paddle/fluid/framework/program_converter.h" #include "paddle/fluid/framework/version.h" -namespace paddle { -namespace framework { +namespace paddle::framework { BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) { auto *b = desc_.add_blocks(); @@ -287,5 +286,4 @@ bool ProgramDesc::NeedUpdate() const { return need; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/program_utils.cc b/paddle/fluid/framework/program_utils.cc index ede7f9aa74759..8cf1c1718c122 100644 --- a/paddle/fluid/framework/program_utils.cc +++ b/paddle/fluid/framework/program_utils.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/block_desc.h" -namespace paddle { -namespace framework { +namespace paddle::framework { template inline void VisitAllElements(Container &&container, @@ -214,5 +213,4 @@ void DumpProgramDescFile(const std::string &name, const ProgramDesc &program) { WriteToFile(filename.c_str(), print_str); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 0af05cab56ac5..2c2f3bd76d0b7 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -26,8 +26,7 @@ COMMON_DECLARE_bool(eager_delete_scope); #define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_); -namespace paddle { -namespace framework { +namespace paddle::framework { Scope::Scope() : vars_(), kids_() {} Scope::~Scope() { DropKids(); } // NOLINT @@ -307,5 +306,4 @@ std::string GenScopeTreeDebugInfo(Scope* root) { return os.str(); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/section_worker.cc b/paddle/fluid/framework/section_worker.cc index 19e09ab5edf8d..f5425ba13f96e 100644 --- a/paddle/fluid/framework/section_worker.cc +++ b/paddle/fluid/framework/section_worker.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/platform/device_context.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class TrainerDesc; @@ -248,6 +247,5 @@ void SectionWorker::TrainFiles() { ++batch_id_; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework #endif diff --git a/paddle/fluid/framework/selected_rows_utils.cc b/paddle/fluid/framework/selected_rows_utils.cc index 3f72ced811390..a4e1e91940443 100644 --- a/paddle/fluid/framework/selected_rows_utils.cc +++ b/paddle/fluid/framework/selected_rows_utils.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows_utils.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void SerializeToStream(std::ostream& os, const phi::SelectedRows& selected_rows, @@ -95,5 +94,4 @@ void DeserializeFromStream(std::istream& is, is, selected_rows->mutable_value(), dev_ctx); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/shape_inference.cc b/paddle/fluid/framework/shape_inference.cc index 3f90bf08ac713..c3a940f877f89 100644 --- a/paddle/fluid/framework/shape_inference.cc +++ b/paddle/fluid/framework/shape_inference.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace framework { +namespace paddle::framework { std::vector InferShapeContext::GetReaderDims( const std::string &name) const { @@ -45,5 +44,4 @@ void InferShapeContext::SetReaderDims(const std::string &name, return this->SetRepeatedDims(arg_names[0], dims); } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/string_array.cc b/paddle/fluid/framework/string_array.cc index e701a423abd82..96aa8d04988aa 100644 --- a/paddle/fluid/framework/string_array.cc +++ b/paddle/fluid/framework/string_array.cc @@ -20,8 +20,7 @@ limitations under the License. */ #include "glog/logging.h" -namespace paddle { -namespace framework { +namespace paddle::framework { std::wstring_convert> kConverter; @@ -101,5 +100,4 @@ void StringMapFromStream(std::istream& is, } } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/transfer_scope_cache.cc b/paddle/fluid/framework/transfer_scope_cache.cc index 60c2516c0047d..90d5786a12d2c 100644 --- a/paddle/fluid/framework/transfer_scope_cache.cc +++ b/paddle/fluid/framework/transfer_scope_cache.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/framework/transfer_scope_cache.h" -namespace paddle { -namespace framework { +namespace paddle::framework { std::unordered_map& global_transfer_data_cache() { thread_local auto* x = new std::unordered_map; @@ -57,5 +56,4 @@ Scope* TryCreateTransferScope(const phi::KernelKey& type0, return new_scope; } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/framework/variable_helper.cc b/paddle/fluid/framework/variable_helper.cc index 6f0beeb0b7311..5bde58e1c327d 100644 --- a/paddle/fluid/framework/variable_helper.cc +++ b/paddle/fluid/framework/variable_helper.cc @@ -24,8 +24,7 @@ limitations under the License. */ #include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/platform/place.h" -namespace paddle { -namespace framework { +namespace paddle::framework { void InitializeVariable(Variable *var, proto::VarType::Type var_type) { if (var_type == proto::VarType::LOD_TENSOR) { @@ -86,5 +85,4 @@ void CopyVariable(const Variable &src_var, Variable *dst_var) { } } -} // namespace framework -} // namespace paddle +} // namespace paddle::framework diff --git a/paddle/fluid/imperative/data_loader.cc b/paddle/fluid/imperative/data_loader.cc index 0e778ca8d7184..99ed4ca01287e 100644 --- a/paddle/fluid/imperative/data_loader.cc +++ b/paddle/fluid/imperative/data_loader.cc @@ -26,8 +26,7 @@ #include "paddle/fluid/memory/allocation/mmap_allocator.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace imperative { +namespace paddle::imperative { static std::map> load_process_pids; @@ -193,7 +192,6 @@ void ThrowErrorIfLoadProcessFailed() { } } -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative #endif diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index a3c5b51b80b3b..383994cac6dfb 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -31,8 +31,7 @@ #endif COMMON_DECLARE_bool(use_mkldnn); -namespace paddle { -namespace imperative { +namespace paddle::imperative { using framework::Variable; void ThreadSafeNameSet::Insert(const std::string& name) { @@ -639,5 +638,4 @@ std::shared_ptr CreateGradOpNode( return nullptr; } -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index 3ed9b97bfc362..f748948a8cf2b 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -33,14 +33,11 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Variable; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace imperative { +namespace paddle::imperative { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) void NCCLParallelContext::BcastNCCLId( @@ -232,5 +229,4 @@ void NCCLParallelContext::SynchronizeCompute() { #endif -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 8811d402c20dd..ec46d19390a91 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -38,8 +38,7 @@ COMMON_DECLARE_bool(sort_sum_gradient); -namespace paddle { -namespace imperative { +namespace paddle::imperative { struct HashPair { template @@ -1184,5 +1183,4 @@ void PartialGradEngine::Execute() { Clear(); } -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 9f4f46c60cea4..2a39e664276ed 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -39,8 +39,7 @@ COMMON_DECLARE_bool(check_nan_inf); PD_DECLARE_bool(benchmark); COMMON_DECLARE_bool(run_kp_kernel); -namespace paddle { -namespace imperative { +namespace paddle::imperative { static const phi::Kernel empty_kernel; static const framework::RuntimeContext empty_ctx({}, {}); @@ -752,5 +751,4 @@ void PreparedOp::Run(const NameVarMap& ins, } } -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index abb6c491af3f1..b4e08a47b8efa 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -30,9 +30,7 @@ #include "paddle/phi/common/data_type.h" #include "paddle/utils/string/pretty_log.h" -namespace paddle { -namespace inference { -namespace analysis { +namespace paddle::inference::analysis { using string::PrettyLogEndl; using string::Style; @@ -341,6 +339,4 @@ std::unique_ptr IRPassManager::Apply(std::unique_ptr graph) { return graph; } -} // namespace analysis -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::analysis diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index ea97be8f90a60..60634f75df3ab 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -22,12 +22,12 @@ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace inference { +namespace paddle::inference { extern void ReadBinaryFile(const std::string &filename, std::string *contents); -namespace analysis { +} // namespace paddle::inference +namespace paddle::inference::analysis { void IrGraphBuildPass::RunImpl(Argument *argument) { if (!argument->scope_valid()) { @@ -130,6 +130,4 @@ std::unique_ptr IrGraphBuildPass::LoadModel( std::string IrGraphBuildPass::repr() const { return "ir_graph_build_pass"; } -} // namespace analysis -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::analysis diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc index 2f7f61406b384..5399cf631f1df 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc @@ -18,9 +18,7 @@ #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/program_desc.h" -namespace paddle { -namespace inference { -namespace analysis { +namespace paddle::inference::analysis { void IrGraphToProgramPass::RunImpl(Argument *argument) { auto cache_pass = @@ -44,6 +42,4 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) { new framework::proto::ProgramDesc(*desc.Proto())); } -} // namespace analysis -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::analysis diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 2e722f9a7e6e9..cc512a234602b 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -32,9 +32,7 @@ PD_DEFINE_bool( // NOLINT false, "Keep old mode for developers, the model is saved on cpu not device."); -namespace paddle { -namespace inference { -namespace analysis { +namespace paddle::inference::analysis { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { @@ -236,6 +234,4 @@ std::string IrParamsSyncAmongDevicesPass::repr() const { return "ir_params_sync_among_devices_pass"; } -} // namespace analysis -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::analysis diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index aaf9439d2b9ed..e8b8c27a24e58 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/scope.h" -namespace paddle { -namespace inference { -namespace analysis { +namespace paddle::inference::analysis { void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { std::string model_opt_cache_dir = argument->optimized_model_save_path(); @@ -137,6 +135,4 @@ std::string SaveOptimizedModelPass::repr() const { return "save_optimized_model_pass"; } -} // namespace analysis -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::analysis diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index f47a9d166bf2d..cefe3d74fec00 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -37,6 +37,10 @@ COMMON_DECLARE_uint64(initial_gpu_memory_in_mb); #endif +#ifdef PADDLE_WITH_CINN +COMMON_DECLARE_bool(use_cinn); +#endif + namespace paddle { struct MkldnnQuantizerConfig; @@ -1552,7 +1556,13 @@ void AnalysisConfig::EnableCINN() { #endif } -bool AnalysisConfig::cinn_enabled() const { return use_cinn_; } +bool AnalysisConfig::cinn_enabled() const { + bool is_enabled = use_cinn_; +#ifdef PADDLE_WITH_CINN + is_enabled = is_enabled || FLAGS_use_cinn; +#endif + return is_enabled; +} void AnalysisConfig::EnableCustomPasses(const std::vector &passes, bool custom_pass_only) { diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index adb7021633b8e..7a211edc2a699 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -28,6 +28,7 @@ #include "paddle/fluid//platform/device/gpu/gpu_types.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/feed_hook.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/naive_executor.h" @@ -1444,7 +1445,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, LOG(ERROR) << "fail to set feed"; return false; } - + if (config_.new_ir_enabled()) { + ::paddle::framework::RunFeedHooks(*pir_program_, *scope); + } #ifdef PADDLE_WITH_TENSORRT if (config_.tensorrt_engine_enabled()) { inference::tensorrt::TensorRTEngine::predictor_id_per_thread = @@ -1519,7 +1522,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, LOG(ERROR) << "fail to set feed"; return false; } - + if (config_.new_ir_enabled()) { + ::paddle::framework::RunFeedHooks(*pir_program_, *scope); + } #ifdef PADDLE_WITH_TENSORRT if (config_.tensorrt_engine_enabled()) { inference::tensorrt::TensorRTEngine::predictor_id_per_thread = diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 5f9f8a5284e6e..b042f27ac9845 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -212,6 +212,12 @@ if(NOT WIN32) set(DEPS ${DEPS} ${PADDLE_LIB}/paddle/lib/libphi${CMAKE_SHARED_LIBRARY_SUFFIX} ${PADDLE_LIB}/paddle/lib/libcommon${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(WITH_GPU OR WITH_ROCM) + set(DEPS + ${DEPS} + ${PADDLE_LIB}/paddle/lib/libphi_kernel_gpu${CMAKE_SHARED_LIBRARY_SUFFIX} + ) + endif() endif() else() set(DEPS diff --git a/paddle/fluid/inference/api/demo_ci/clean.sh b/paddle/fluid/inference/api/demo_ci/clean.sh index c265721db5775..8901cd16b5e1d 100755 --- a/paddle/fluid/inference/api/demo_ci/clean.sh +++ b/paddle/fluid/inference/api/demo_ci/clean.sh @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index e1369ca51c5d0..d72b6bfadf6bf 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -184,7 +184,7 @@ for WITH_STATIC_LIB in ON OFF; do fi done done - + # --------tensorrt mobilenet on windows------ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then rm -rf * @@ -203,7 +203,7 @@ for WITH_STATIC_LIB in ON OFF; do ./trt_mobilenet_demo.exe \ --modeldir=$DATA_DIR/mobilenet/model \ --data=$DATA_DIR/mobilenet/data.txt \ - --refer=$DATA_DIR/mobilenet/result.txt + --refer=$DATA_DIR/mobilenet/result.txt if [ $? -ne 0 ]; then echo "trt_mobilenet_demo runs failed." >> ${current_dir}/test_summary.txt EXIT_CODE=1 @@ -268,7 +268,7 @@ for WITH_STATIC_LIB in ON OFF; do ./trt_mobilenet_demo \ --modeldir=$DATA_DIR/mobilenet/model \ --data=$DATA_DIR/mobilenet/data.txt \ - --refer=$DATA_DIR/mobilenet/result.txt + --refer=$DATA_DIR/mobilenet/result.txt if [ $? -ne 0 ]; then echo "trt_mobilenet_demo runs failed " >> ${current_dir}/test_summary.txt EXIT_CODE=1 diff --git a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat index 6eb932a190654..4bb859becf70c 100644 --- a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat +++ b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat @@ -65,12 +65,12 @@ if /i "%use_gpu%"=="Y" ( set use_gpu=N ) -rem set_path_vs_command_prompt +rem set_path_vs_command_prompt :set_vcvarsall_dir SET /P vcvarsall_dir="Please input the path of visual studio command Prompt, such as C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat =======>" set tmp_var=!vcvarsall_dir! call:remove_space -set vcvarsall_dir=!tmp_var! +set vcvarsall_dir=!tmp_var! IF NOT EXIST "%vcvarsall_dir%" ( echo "------------%vcvarsall_dir% not exist------------" goto set_vcvarsall_dir @@ -104,18 +104,18 @@ if EXIST "%source_path%\%model_name%.tar.gz" ( SET /P python_path="Please input the path of python.exe, such as C:\Python37\python.exe =======>" set tmp_var=!python_path! call:remove_space - set python_path=!tmp_var! + set python_path=!tmp_var! if "!python_path!"=="" ( set python_path=python.exe ) else ( if NOT exist "!python_path!" ( - echo "------------!python_path! not exist------------" + echo "------------!python_path! not exist------------" goto:eof - ) + ) ) md %source_path%\%model_name% !python_path! %source_path%\untar_model.py %source_path%\%model_name%.tar.gz %source_path%\%model_name% - + SET error_code=N if "%model_name%"=="mobilenet" ( if NOT EXIST "%source_path%\%model_name%\model" set error_code=Y @@ -127,7 +127,7 @@ if EXIST "%source_path%\%model_name%.tar.gz" ( del /f /s /q "%source_path%\%model_name%\*.*" >nul 2>&1 rd /s /q "%source_path%\%model_name%" >nul 2>&1 goto:eof - ) + ) ) ) @@ -201,7 +201,7 @@ if /i "%use_gpu%"=="Y" ( ) if exist "%build_path%\Release\%demo_name%.exe" ( - cd %build_path%\Release + cd %build_path%\Release set GLOG_v=4 if "%demo_name%"=="simple_on_word2vec" ( %demo_name%.exe --dirname="%source_path%\%model_name%\%model_name%" --use_gpu="%use_gpu%" diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 57f8066df1eeb..d8206093efa53 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -60,11 +60,6 @@ void Tensor::Reshape(const std::vector &shape) { "No tensor called [%s] in the runtime scope", name_)); auto *tensor = var->GetMutable(); tensor->Resize(common::make_ddim(shape)); -#ifdef PADDLE_WITH_DNNL - if (tensor->layout() == phi::DataLayout::ONEDNN) { - tensor->set_layout(phi::DataLayout::ANY); - } -#endif } void Tensor::ReshapeStrings(const size_t &shape) { @@ -212,11 +207,6 @@ void Tensor::CopyFromCpu(const T *data) { if (place_ == PlaceType::kCPU) { auto *t_data = tensor->mutable_data(paddle::platform::CPUPlace()); std::memcpy(static_cast(t_data), data, ele_size); -#ifdef PADDLE_WITH_DNNL - if (tensor->layout() == phi::DataLayout::ONEDNN) { - tensor->set_layout(phi::DataLayout::ANY); - } -#endif } else if (place_ == PlaceType::kGPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/inference/api/helper.cc b/paddle/fluid/inference/api/helper.cc index 416a62e980fe5..9823c45195361 100644 --- a/paddle/fluid/inference/api/helper.cc +++ b/paddle/fluid/inference/api/helper.cc @@ -34,8 +34,7 @@ #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" -namespace paddle { -namespace inference { +namespace paddle::inference { template <> std::string to_string>( @@ -446,5 +445,4 @@ void InitGflagsFromEnv() { framework::InitGflags(gflags); } -} // namespace inference -} // namespace paddle +} // namespace paddle::inference diff --git a/paddle/fluid/inference/api/paddle_infer_contrib.cc b/paddle/fluid/inference/api/paddle_infer_contrib.cc index 8d5e1e6ce1cae..00da2279917e2 100644 --- a/paddle/fluid/inference/api/paddle_infer_contrib.cc +++ b/paddle/fluid/inference/api/paddle_infer_contrib.cc @@ -20,8 +20,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/float16.h" -namespace paddle_infer { -namespace contrib { +namespace paddle_infer::contrib { using paddle::PaddleDType; @@ -290,5 +289,4 @@ bool Status::operator!=(const Status& x) const noexcept { return !(*this == x); } -} // namespace contrib -} // namespace paddle_infer +} // namespace paddle_infer::contrib diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index a296074f9d6cf..905144110386f 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -538,6 +538,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "group_norm_silu_xpu_fuse_pass", "embedding_with_eltwise_add_xpu_fuse_pass", "qk_qkv_attention_xpu_fuse_pass", + "block_multihead_attention_xpu_pass", "multi_encoder_xpu_fuse_pass", "multi_encoder_xpu_adaptive_seqlen_fuse_pass", "multi_encoder_xpu_slice_fuse_pass", @@ -613,11 +614,14 @@ const std::vector kPirGpuPasses{ "fused_weight_only_linear_pass", "matmul_add_act_fuse_pass", "fc_elementwise_layernorm_fuse_pass", + "add_norm_fuse_pass", + "group_norm_silu_fuse_pass", "matmul_scale_fuse_pass", "matmul_transpose_fuse_pass", "transpose_flatten_concat_fuse_pass", "remove_redundant_transpose_pass", - "transfer_layout_pass"}; + "transfer_layout_pass", +}; const std::vector kPirXpuPasses{// Functional pass "map_op_to_another_pass", @@ -625,7 +629,7 @@ const std::vector kPirXpuPasses{// Functional pass // Operator fusion pass "add_layernorm_xpu_fuse_pass", "conv2d_bn_xpu_fuse_pass", - "group_norm_silu_xpu_fuse_pass"}; + "group_norm_silu_fuse_pass"}; const std::vector kPirMkldnnPasses { "depthwise_conv_onednn_pass", // diff --git a/paddle/fluid/inference/check_symbol.sh b/paddle/fluid/inference/check_symbol.sh index 0d8892f20514f..ea7f66a5f729c 100755 --- a/paddle/fluid/inference/check_symbol.sh +++ b/paddle/fluid/inference/check_symbol.sh @@ -1,13 +1,13 @@ #!/bin/sh # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/inference/experimental/javaapi/test.sh b/paddle/fluid/inference/experimental/javaapi/test.sh index d664ee240375a..343c2dfa59cdd 100644 --- a/paddle/fluid/inference/experimental/javaapi/test.sh +++ b/paddle/fluid/inference/experimental/javaapi/test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/inference/goapi/test.sh b/paddle/fluid/inference/goapi/test.sh index fbde661d177f7..79cf3e5a74378 100644 --- a/paddle/fluid/inference/goapi/test.sh +++ b/paddle/fluid/inference/goapi/test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,7 +17,7 @@ # 1. download the mobilenetv1 model to test config and predictor if [ ! -d mobilenetv1 ]; then wget https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/mobilenetv1.tgz - tar xzf mobilenetv1.tgz + tar xzf mobilenetv1.tgz fi # 2. set LD_LIBRARY_PATH diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map index 267dcf7fb601d..180d4e643ba23 100644 --- a/paddle/fluid/inference/paddle_inference.map +++ b/paddle/fluid/inference/paddle_inference.map @@ -71,7 +71,7 @@ /* *paddle::framework*; */ *paddle::framework::InitDevices*; *paddle::framework::InitMemoryMethod*; - + *paddle::framework::InterpreterCore*; *paddle::framework::Executor*; *paddle::framework::proto*; diff --git a/paddle/fluid/inference/tensorrt/convert/bitwise_not_op.cc b/paddle/fluid/inference/tensorrt/convert/bitwise_not_op.cc index 63a02d4e393e8..77b829228e5f0 100644 --- a/paddle/fluid/inference/tensorrt/convert/bitwise_not_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/bitwise_not_op.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class BitwiseNotConverter : public OpConverter { public: @@ -73,8 +71,6 @@ class BitwiseNotConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(bitwise_not, BitwiseNotConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/bitwise_or_op.cc b/paddle/fluid/inference/tensorrt/convert/bitwise_or_op.cc index 814ee8bd98551..9c5beb4634035 100644 --- a/paddle/fluid/inference/tensorrt/convert/bitwise_or_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/bitwise_or_op.cc @@ -17,9 +17,7 @@ #include #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class BitwiseOrConverter : public OpConverter { public: @@ -53,8 +51,6 @@ class BitwiseOrConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(bitwise_or, BitwiseOrConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc b/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc index 767cf996f7d7f..e84c18a79c4b3 100644 --- a/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/inference/tensorrt/plugin/c_allreduce_op_plugin.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { using ReduceType = paddle::inference::tensorrt::plugin::ReduceType; std::map op_to_reduce_type = { {"c_allreduce_sum", paddle::inference::tensorrt::plugin::kRedSum}, @@ -88,9 +86,7 @@ class CAllReduceOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(c_allreduce_sum, CAllReduceOpConverter); REGISTER_TRT_OP_CONVERTER(c_allreduce_max, CAllReduceOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/celu_op.cc b/paddle/fluid/inference/tensorrt/convert/celu_op.cc index 837364a9feca7..d2279f9610b2d 100644 --- a/paddle/fluid/inference/tensorrt/convert/celu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/celu_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class CeluOpConverter : public OpConverter { public: @@ -82,8 +80,6 @@ class CeluOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(celu, CeluOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 6f4fdc30214b5..f1d66090eeb3c 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * ConcatOp @@ -53,8 +51,6 @@ class ConcatOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(concat, ConcatOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc index 37a53d31f47b5..547ec74c19fa6 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv3d_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { template void ConvertConv3d(TensorRTEngine* engine, @@ -192,9 +190,7 @@ class Deconv3dOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(conv3d, Conv3dOpConverter); REGISTER_TRT_OP_CONVERTER(conv3d_transpose, Deconv3dOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc index 3b9cc9dd0d349..79e40a80f9531 100644 --- a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class FillConstantOpConverter : public OpConverter { public: @@ -124,8 +122,6 @@ class FillConstantOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(fill_constant, FillConstantOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/flip_op.cc b/paddle/fluid/inference/tensorrt/convert/flip_op.cc index 0ac714507b5ce..de162b7fbd9ee 100644 --- a/paddle/fluid/inference/tensorrt/convert/flip_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/flip_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class FlipOpConverter : public OpConverter { public: @@ -76,8 +74,6 @@ class FlipOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(flip, FlipOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/fused_token_prune_op.cc b/paddle/fluid/inference/tensorrt/convert/fused_token_prune_op.cc index e9d4ae9182095..f015e809dc210 100644 --- a/paddle/fluid/inference/tensorrt/convert/fused_token_prune_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fused_token_prune_op.cc @@ -12,9 +12,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class FusedTokenPruneOpConverter : public OpConverter { public: @@ -109,8 +107,6 @@ class FusedTokenPruneOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(fused_token_prune, FusedTokenPruneOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/gather_nd_op.cc b/paddle/fluid/inference/tensorrt/convert/gather_nd_op.cc index 508d7a5f9b390..2a70b7b524973 100644 --- a/paddle/fluid/inference/tensorrt/convert/gather_nd_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gather_nd_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class GatherNdOpConverter : public OpConverter { public: @@ -63,8 +61,6 @@ class GatherNdOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(gather_nd, GatherNdOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/grid_sampler_op.cc b/paddle/fluid/inference/tensorrt/convert/grid_sampler_op.cc index a2fe27590df02..adf2f10584805 100644 --- a/paddle/fluid/inference/tensorrt/convert/grid_sampler_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/grid_sampler_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * GridSampler Op @@ -81,8 +79,6 @@ class GridSamplerOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(grid_sampler, GridSamplerOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/index_select_op.cc b/paddle/fluid/inference/tensorrt/convert/index_select_op.cc index 9ee875c92445e..6f869b38b1924 100644 --- a/paddle/fluid/inference/tensorrt/convert/index_select_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/index_select_op.cc @@ -14,19 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -namespace proto { +} // namespace paddle::framework +namespace paddle::framework::proto { class OpDesc; -} // namespace proto -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::proto -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * Gather Op @@ -68,8 +64,6 @@ class IndexSelectConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(index_select, IndexSelectConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc index bd97df48309c7..384183d11f51c 100644 --- a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class InstanceNormOpConverter : public OpConverter { public: @@ -77,8 +75,6 @@ class InstanceNormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(instance_norm, InstanceNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc index d3fda4cb24e28..f505c36b2ed5c 100644 --- a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { // LeakyRelu converter from fluid to tensorRT class LeakyReluOpConverter : public OpConverter { @@ -121,8 +119,6 @@ class LeakyReluOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(leaky_relu, LeakyReluOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/lookup_table_op.cc b/paddle/fluid/inference/tensorrt/convert/lookup_table_op.cc index cdb49be72f50f..b86139b6b6476 100644 --- a/paddle/fluid/inference/tensorrt/convert/lookup_table_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/lookup_table_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class LookupTableOpConverter : public OpConverter { public: @@ -72,9 +70,7 @@ class LookupTableV2OpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(lookup_table, LookupTableOpConverter); REGISTER_TRT_OP_CONVERTER(lookup_table_v2, LookupTableV2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/matrix_multiply_op.cc b/paddle/fluid/inference/tensorrt/convert/matrix_multiply_op.cc index 16d6f3f20750c..fd72f8b78f9af 100644 --- a/paddle/fluid/inference/tensorrt/convert/matrix_multiply_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/matrix_multiply_op.cc @@ -12,9 +12,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * After trt_map_ops_to_matrix_multiply_pass(mul, matmul, matmul_v2 -> @@ -266,8 +264,6 @@ class MatrixMultiplyOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(matrix_multiply, MatrixMultiplyOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc index 107217477d14f..f2d00ab4b4667 100644 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc @@ -13,9 +13,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class MultiClassNMS3OpConverter : public OpConverter { public: @@ -170,8 +168,6 @@ class MultiClassNMS3OpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(multiclass_nms3, MultiClassNMS3OpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc index 68f18bd6e7472..2ea04f6fcfd3d 100644 --- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class MultiheadMatMulOpConverter : public OpConverter { public: @@ -960,8 +958,6 @@ class MultiheadMatMulOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(multihead_matmul, MultiheadMatMulOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/pool3d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool3d_op.cc index c0f38cf79ff91..1e7514389e2ea 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool3d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool3d_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { inline void DealCeilMode(const nvinfer1::Dims &input_shape, std::vector ksize, @@ -228,9 +226,7 @@ class Pool3dOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt USE_OP_ITSELF(pool3d); REGISTER_TRT_OP_CONVERTER(pool3d, Pool3dOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc index 9f9cbe7c6bceb..4c73c5c897570 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/many_emb_layernorm_varseqlen_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter { public: @@ -237,9 +235,7 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(fused_preln_embedding_eltwise_layernorm, PrelnEmbEltwiseLayerNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc index 824f0ff902874..5e2c32f5c7d31 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/plugin/preln_residual_bias_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { using half = paddle::platform::float16; class PrelnResidualBiasOpConverter : public OpConverter { @@ -105,9 +103,7 @@ class PrelnResidualBiasOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(fused_bias_dropout_residual_layer_norm, PrelnResidualBiasOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc index e6beaae910d96..6c83913f7d888 100644 --- a/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/prompt_tuning_emb_layernorm_varseqlen_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class PromptTuningEmbEltwiseLayerNormOpConverter : public OpConverter { public: @@ -168,9 +166,7 @@ class PromptTuningEmbEltwiseLayerNormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(prompt_tuning_emb_eltwise_layernorm, PromptTuningEmbEltwiseLayerNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/qk_multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/qk_multihead_matmul_op.cc index aafbec6660c67..2d7798878d971 100644 --- a/paddle/fluid/inference/tensorrt/convert/qk_multihead_matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/qk_multihead_matmul_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class QkMultiheadMatMulOpConverter : public OpConverter { public: @@ -290,8 +288,6 @@ class QkMultiheadMatMulOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(qk_multihead_matmul, QkMultiheadMatMulOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/quantize_linear_op.cc b/paddle/fluid/inference/tensorrt/convert/quantize_linear_op.cc index 74a8f56ea6c20..0ddcee9244925 100644 --- a/paddle/fluid/inference/tensorrt/convert/quantize_linear_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/quantize_linear_op.cc @@ -12,9 +12,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class QuantizeLinearOpConverter : public OpConverter { public: @@ -60,8 +58,6 @@ class QuantizeLinearOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(quantize_linear, QuantizeLinearOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/roi_align_op.cc b/paddle/fluid/inference/tensorrt/convert/roi_align_op.cc index 8fbdea5edd4c9..74f13234bf14d 100644 --- a/paddle/fluid/inference/tensorrt/convert/roi_align_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/roi_align_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * Roi Align Op @@ -69,8 +67,6 @@ class RoiAlignOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(roi_align, RoiAlignOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/roll_op.cc b/paddle/fluid/inference/tensorrt/convert/roll_op.cc index ca42b3c34c3f8..1d4d8a5b4a229 100644 --- a/paddle/fluid/inference/tensorrt/convert/roll_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/roll_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/helper.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * Stack converter from fluid to tensorRT. */ @@ -91,8 +89,6 @@ class RollOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(roll, RollOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/share_data_op.cc b/paddle/fluid/inference/tensorrt/convert/share_data_op.cc index 38fa1ff6e0c83..a309d3faa10ec 100644 --- a/paddle/fluid/inference/tensorrt/convert/share_data_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/share_data_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class ShareDataOpConverter : public OpConverter { public: @@ -32,8 +30,6 @@ class ShareDataOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(share_data, ShareDataOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc index 988d0d064c862..e87bf699b15fd 100644 --- a/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class SkipLayerNormOpConverter : public OpConverter { public: @@ -257,8 +255,6 @@ class SkipLayerNormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(skip_layernorm, SkipLayerNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/skip_merge_layernorm_op.cc b/paddle/fluid/inference/tensorrt/convert/skip_merge_layernorm_op.cc index 4bb54de495b19..a73be2eb3e3c6 100644 --- a/paddle/fluid/inference/tensorrt/convert/skip_merge_layernorm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/skip_merge_layernorm_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/skip_merge_layernorm_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class SkipMergeLayernormOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, @@ -87,8 +85,6 @@ class SkipMergeLayernormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(skip_merge_layernorm, SkipMergeLayernormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/stack_op.cc b/paddle/fluid/inference/tensorrt/convert/stack_op.cc index 30ffcd88472d3..1e5dcdeac5019 100644 --- a/paddle/fluid/inference/tensorrt/convert/stack_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/stack_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * Stack converter from fluid to tensorRT. @@ -80,8 +78,6 @@ class StackOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(stack, StackOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/sum_op.cc b/paddle/fluid/inference/tensorrt/convert/sum_op.cc index 900a37126f1ce..e9a1408185af2 100644 --- a/paddle/fluid/inference/tensorrt/convert/sum_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/sum_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class SumOpConverter : public OpConverter { public: @@ -47,8 +45,6 @@ class SumOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(sum, SumOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc index 7ef6d1f3241d8..c9a69c8a7c624 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc @@ -19,9 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { TEST(OpConverter, ConvertBlock) { framework::ProgramDesc prog; @@ -68,8 +66,6 @@ TEST(OpConverter, ConvertBlock) { *block->Proto(), {"conv2d-Y"}, scope, engine_.get() /*TensorRTEngine*/); } -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt USE_TRT_CONVERTER(conv2d) diff --git a/paddle/fluid/inference/tensorrt/convert/trans_layernorm_op.cc b/paddle/fluid/inference/tensorrt/convert/trans_layernorm_op.cc index a5db8ed88c4c0..8251d3a3e745e 100644 --- a/paddle/fluid/inference/tensorrt/convert/trans_layernorm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/trans_layernorm_op.cc @@ -11,9 +11,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class TransLayerNormOpConverter : public OpConverter { public: @@ -84,8 +82,6 @@ class TransLayerNormOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(trans_layernorm, TransLayerNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc b/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc index 1dca9bb818c38..f7fda67a3643f 100644 --- a/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc @@ -15,9 +15,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * Convert Transformer Input(pos_id, max_seqlen). @@ -58,8 +56,6 @@ class TransformerInputConvert : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(transformer_input_convert, TransformerInputConvert); diff --git a/paddle/fluid/inference/tensorrt/convert/transpose_op.cc b/paddle/fluid/inference/tensorrt/convert/transpose_op.cc index 62ef6edd2230b..045a991492628 100644 --- a/paddle/fluid/inference/tensorrt/convert/transpose_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/transpose_op.cc @@ -11,9 +11,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { /* * TransposeOp @@ -48,9 +46,7 @@ class TransposeOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(transpose, TransposeOpConverter); REGISTER_TRT_OP_CONVERTER(transpose2, TransposeOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc index ea78ec9292159..f720515acc2eb 100644 --- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc @@ -23,9 +23,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class UnaryOpConverter : public OpConverter { public: @@ -216,9 +214,7 @@ class RoundOpConverter : public UnaryOpConverter { }; #endif -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(exp, ExpOpConverter); REGISTER_TRT_OP_CONVERTER(log, LogOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/unsqueeze2_op.cc b/paddle/fluid/inference/tensorrt/convert/unsqueeze2_op.cc index 7cdc1b07fd04d..72f19d07f5a1f 100644 --- a/paddle/fluid/inference/tensorrt/convert/unsqueeze2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/unsqueeze2_op.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class Unsqueeze2OpConverter : public OpConverter { public: @@ -94,8 +92,6 @@ class Unsqueeze2OpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(unsqueeze2, Unsqueeze2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/yolo_box_head_op.cc b/paddle/fluid/inference/tensorrt/convert/yolo_box_head_op.cc index eafb38221ecf3..c03368c3f4bcc 100644 --- a/paddle/fluid/inference/tensorrt/convert/yolo_box_head_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/yolo_box_head_op.cc @@ -12,18 +12,14 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/yolo_box_head_op_plugin.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -namespace proto { +} // namespace paddle::framework +namespace paddle::framework::proto { class OpDesc; -} // namespace proto -} // namespace framework -} // namespace paddle +} // namespace paddle::framework::proto -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class YoloBoxHeadOpConverter : public OpConverter { public: @@ -50,8 +46,6 @@ class YoloBoxHeadOpConverter : public OpConverter { } }; -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt REGISTER_TRT_OP_CONVERTER(yolo_box_head, YoloBoxHeadOpConverter); diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc index e6bc25af044dc..28ef055897b80 100644 --- a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc +++ b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc @@ -17,9 +17,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/unfold_functor.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class ExprWrapper { public: @@ -124,6 +122,7 @@ static std::vector DimsExprs2VecExprWrapper( nvinfer1::IExprBuilder& expr_builder // NOLINT ) { std::vector x_dims_wrap; + x_dims_wrap.reserve(x_dims.nbDims); for (int i = 0; i < x_dims.nbDims; i++) { x_dims_wrap.emplace_back(x_dims.d[i], &expr_builder); } @@ -154,6 +153,7 @@ nvinfer1::DimsExprs GatherNdInferMeta( std::vector result_dims; // The result dims is // Index.shape[:-1] + X.shape[Index.shape[-1]:] + result_dims.reserve(index_dims_size - 1); for (int i = 0; i < index_dims_size - 1; ++i) { result_dims.emplace_back(index_dims.d[i]); } @@ -899,6 +899,4 @@ PD_REGISTER_DYNAMIC_INFER_META_FN(pad, PadInferMeta); PD_REGISTER_DYNAMIC_INFER_META_FN(argsort, ArgsortInferMeta); PD_REGISTER_DYNAMIC_INFER_META_FN(scatter, ScatterInferMeta); PD_REGISTER_DYNAMIC_INFER_META_FN(solve, SolveInferMeta); -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt diff --git a/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc b/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc index da9784fbb6487..64e55023892c4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc +++ b/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc @@ -16,10 +16,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { +namespace paddle::inference::tensorrt::plugin { TEST(split_op_plugin, test_plugin) { int axis = 1; @@ -60,7 +57,4 @@ TEST(split_op_plugin, test_plugin_creater) { creator.setPluginNamespace("test"); } -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt::plugin diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc index 637bd84deaff0..83941eb00cf22 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc @@ -14,10 +14,7 @@ #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { +namespace paddle::inference::tensorrt::plugin { inline void Serialize(void*& buffer, // NOLINT const std::vector& input_dims, @@ -139,7 +136,4 @@ const char* TensorRTPluginCreator::getPluginNamespace() const TRT_NOEXCEPT { return plugin_namespace_.c_str(); } -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt::plugin diff --git a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc index d4631f7057582..28161758be07f 100644 --- a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc +++ b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc @@ -14,9 +14,7 @@ #include "paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { bool PluginArgumentMappingContext::HasInput(const std::string& name) const { auto inputs = op_desc_->Inputs(); @@ -160,6 +158,4 @@ bool PluginArgumentMappingContext::IsForInferShape() const { return false; } -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt diff --git a/paddle/fluid/inference/tensorrt/test_arg_mapping_context.cc b/paddle/fluid/inference/tensorrt/test_arg_mapping_context.cc index 85dddfea2a7c7..9bcf06cdd978c 100644 --- a/paddle/fluid/inference/tensorrt/test_arg_mapping_context.cc +++ b/paddle/fluid/inference/tensorrt/test_arg_mapping_context.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { TEST(ArgMappingContextTest, BasicFunction) { paddle::framework::proto::OpDesc op; @@ -123,6 +121,4 @@ TEST(ArgMappingContextTest, BasicFunction) { EXPECT_EQ(context.IsDenseTensorOutput("Out"), true); } -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index d87c9af8cfa67..ae12901e7da90 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -29,9 +29,7 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" using float16 = phi::dtype::float16; -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class TensorRTDynamicShapeValueEngineTest : public ::testing::Test { public: @@ -1049,6 +1047,4 @@ TEST_F(TensorRTDynamicShapeGNTest, test_trt_dynamic_shape_groupnorm) { } */ #endif -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index 4c08da6d060eb..1f0b81da76ca3 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace inference { -namespace tensorrt { +namespace paddle::inference::tensorrt { class TensorRTEngineTest : public ::testing::Test { protected: @@ -334,6 +332,4 @@ TEST_F(TensorRTEngineTest, test_pool2d) { ASSERT_EQ(y_cpu[1], 5.0); } -} // namespace tensorrt -} // namespace inference -} // namespace paddle +} // namespace paddle::inference::tensorrt diff --git a/paddle/fluid/inference/utils/model_utils.cc b/paddle/fluid/inference/utils/model_utils.cc index 27bc8b35306e1..0397793aeecfc 100644 --- a/paddle/fluid/inference/utils/model_utils.cc +++ b/paddle/fluid/inference/utils/model_utils.cc @@ -18,8 +18,7 @@ #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/phi/common/data_type.h" -namespace paddle { -namespace inference { +namespace paddle::inference { using paddle::framework::proto::VarType; @@ -70,5 +69,4 @@ phi::DataType GetModelPrecision(const framework::ProgramDesc& program) { return ret; } -} // namespace inference -} // namespace paddle +} // namespace paddle::inference diff --git a/paddle/fluid/inference/utils/table_printer.cc b/paddle/fluid/inference/utils/table_printer.cc index a9b6633217ad0..d182759278d10 100644 --- a/paddle/fluid/inference/utils/table_printer.cc +++ b/paddle/fluid/inference/utils/table_printer.cc @@ -30,8 +30,7 @@ #include #include -namespace paddle { -namespace inference { +namespace paddle::inference { std::string TablePrinter::PrintTable() { std::stringstream ss; @@ -211,5 +210,4 @@ void TablePrinter::AddRow(std::stringstream& ss, size_t row_idx) { } } -} // namespace inference -} // namespace paddle +} // namespace paddle::inference diff --git a/paddle/fluid/ir_adaptor/translator/op_compat_info.cc.j2 b/paddle/fluid/ir_adaptor/translator/op_compat_info.cc.j2 index e7b7812fe61be..71c38e487c909 100644 --- a/paddle/fluid/ir_adaptor/translator/op_compat_info.cc.j2 +++ b/paddle/fluid/ir_adaptor/translator/op_compat_info.cc.j2 @@ -2,7 +2,7 @@ namespace paddle { namespace translator { - + OpNameNormalizer::OpNameNormalizer() { op_name_mappings = { {% for legacy_name, normalized_name in op_name_pairs.items() %} @@ -11,35 +11,35 @@ OpNameNormalizer::OpNameNormalizer() { }; op_arg_name_mappings = { {% for op_name, arg_name_mappings in op_arg_name_pairs.items() %} - { - "{{op_name}}", + { + "{{op_name}}", { {% for normalized_name, legacy_name in arg_name_mappings.items() %} { "{{normalized_name}}", "{{legacy_name}}" }, {% endfor %} - }, + }, }, {% endfor %} }; op_mutable_attributes = { {% for op_name, mutable_attributes in op_mutable_attributes.items() %} - { - "{{op_name}}", + { + "{{op_name}}", { {% for attribute_name in mutable_attributes %} "{{attribute_name}}", {% endfor %} - }, + }, }, {% endfor %} }; op_mutable_attribute_infos = { {% for op_name, mutable_attribute_infos in op_mutable_attribute_infos.items() %} - { - "{{op_name}}", + { + "{{op_name}}", { {% for attribute_name, attribute_info in mutable_attribute_infos.items() %} - { + { "{{attribute_name}}", { {% for candidate_var_name in attribute_info %} @@ -48,7 +48,7 @@ OpNameNormalizer::OpNameNormalizer() { }, }, {% endfor %} - }, + }, }, {% endfor %} }; diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index 005f73e7b6427..8dbfe787ceb81 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -1045,7 +1045,7 @@ struct CastOpTranscriber : public OpTranscriber { const OpDesc& op_desc) override { auto& attribute_translator = AttributeTranslator::instance(); pir::AttributeMap attribute_map = {}; - const OpAttributeInfo info = op_attr_infos[0]; + const OpAttributeInfo& info = op_attr_infos[0]; std::string legacy_attr_name("out_dtype"); diff --git a/paddle/fluid/ir_adaptor/translator/program_translator.cc b/paddle/fluid/ir_adaptor/translator/program_translator.cc index a544f89bd3b38..a16e31e13075a 100644 --- a/paddle/fluid/ir_adaptor/translator/program_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/program_translator.cc @@ -41,8 +41,7 @@ #include "paddle/pir/include/dialect/control_flow/ir/cf_op.h" #include "paddle/pir/include/dialect/control_flow/ir/cf_type.h" -namespace paddle { -namespace translator { +namespace paddle::translator { using ProgramDesc = ::paddle::framework::ProgramDesc; using BlockDesc = ::paddle::framework::BlockDesc; @@ -824,5 +823,4 @@ ProgramTranslator::VarDesc2Value() { return var_desc_2_value; } -} // namespace translator -} // namespace paddle +} // namespace paddle::translator diff --git a/paddle/fluid/jit/function_schema.cc b/paddle/fluid/jit/function_schema.cc index cae24962e13e2..5fcd9527c45a0 100644 --- a/paddle/fluid/jit/function_schema.cc +++ b/paddle/fluid/jit/function_schema.cc @@ -18,8 +18,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/fluid/jit/function_utils.h" -namespace paddle { -namespace jit { +namespace paddle::jit { Argument::Argument(const std::string& name, bool is_out) : name_(name), is_output_(is_out) {} @@ -96,5 +95,4 @@ void FunctionInfo::RemoveDescFeedFetch() { utils::RemoveFeedFetch(program_desc_.get()); } -} // namespace jit -} // namespace paddle +} // namespace paddle::jit diff --git a/paddle/fluid/jit/function_utils.cc b/paddle/fluid/jit/function_utils.cc index 519bcb2a88877..88173f8df1e3d 100644 --- a/paddle/fluid/jit/function_utils.cc +++ b/paddle/fluid/jit/function_utils.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/framework/variable.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace jit { -namespace utils { +namespace paddle::jit::utils { std::vector ToDenseTensors(const std::vector &tensors) { std::vector ret; @@ -111,6 +109,4 @@ void RemoveFeedFetch(framework::ProgramDesc *program_desc) { } } -} // namespace utils -} // namespace jit -} // namespace paddle +} // namespace paddle::jit::utils diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index c1a493db17ea9..823649686f0b9 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -23,8 +23,7 @@ #include "paddle/fluid/jit/function.h" #include "paddle/fluid/jit/function_schema.h" -namespace paddle { -namespace jit { +namespace paddle::jit { Layer::Layer(const std::shared_ptr& params_map, const std::shared_ptr& attrs_map, @@ -104,5 +103,4 @@ std::shared_ptr Layer::Clone(void* stream) { return x; } -} // namespace jit -} // namespace paddle +} // namespace paddle::jit diff --git a/paddle/fluid/jit/property.cc b/paddle/fluid/jit/property.cc index d91aba11cfb55..ddbd24d590498 100644 --- a/paddle/fluid/jit/property.cc +++ b/paddle/fluid/jit/property.cc @@ -23,8 +23,7 @@ limitations under the License. */ #include "paddle/fluid/jit/property.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace jit { +namespace paddle::jit { using Variable = paddle::framework::Variable; @@ -378,5 +377,4 @@ std::vector Property::GetStrings(const std::string &name) { return {}; } -} // namespace jit -} // namespace paddle +} // namespace paddle::jit diff --git a/paddle/fluid/jit/property.proto b/paddle/fluid/jit/property.proto index 5f89e1da90b91..a00da9fc6e40a 100644 --- a/paddle/fluid/jit/property.proto +++ b/paddle/fluid/jit/property.proto @@ -84,7 +84,7 @@ message TensorProto { // For int64. // When this field is present, the data_type field MUST be INT64 repeated int64 int64_data = 7 [packed = true]; - + // For double // Complex128 tensors are encoded as a single array of doubles, // with the real components appearing in odd numbered positions, @@ -130,16 +130,16 @@ message ValueProto { STRINGS = 8; TENSORS = 9; } - optional string name = 1; - + optional string name = 1; + optional AttributeType type = 2; // discriminator that indicates which field below is in use - + // Exactly ONE of the following fields must be present optional float f = 3; // float optional int64 i = 4; // int optional bytes s = 5; // UTF-8 string optional TensorProto t = 6; // tensor value - + repeated float floats = 7; // list of floats repeated int64 ints = 8; // list of ints repeated bytes strings = 9; // list of UTF-8 strings @@ -147,5 +147,5 @@ message ValueProto { } message PropertyVals { - repeated ValueProto entrys=1; + repeated ValueProto entrys=1; } diff --git a/paddle/fluid/jit/serializer_utils.cc b/paddle/fluid/jit/serializer_utils.cc index 4fdc07f55ac74..4d22be839e4f1 100644 --- a/paddle/fluid/jit/serializer_utils.cc +++ b/paddle/fluid/jit/serializer_utils.cc @@ -20,9 +20,7 @@ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/var_desc.h" -namespace paddle { -namespace jit { -namespace utils { +namespace paddle::jit::utils { bool IsPersistable(framework::VarDesc* desc_ptr) { auto type = desc_ptr->GetType(); @@ -109,6 +107,4 @@ void InitKernelSignatureMap() { paddle::framework::InitDefaultKernelSignatureMap(); } -} // namespace utils -} // namespace jit -} // namespace paddle +} // namespace paddle::jit::utils diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index c96730f5fda50..eef6c1a1e8c4a 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -117,9 +117,7 @@ COMMON_DECLARE_bool(use_auto_growth_pinned_allocator); COMMON_DECLARE_bool(use_cuda_malloc_async_allocator); COMMON_DECLARE_bool(auto_free_cudagraph_allocations_on_launch); -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) class CUDAGraphAllocator @@ -1908,6 +1906,4 @@ void AllocatorFacade::SetDefaultStream(const platform::CustomPlace& place, UNUSED static std::shared_ptr unused_obj = std::make_shared(platform::CPUPlace()); -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index 0d4ddca4f237e..5c46376626994 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -42,9 +42,7 @@ PADDLE_DEFINE_EXPORTED_READONLY_bool( PADDLE_DEFINE_EXPORTED_READONLY_bool(print_allocator_trace_info, false, "print trace memory info"); -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( std::shared_ptr underlying_allocator, @@ -226,6 +224,4 @@ void AutoGrowthBestFitAllocator::Trace() const { << " curr_chunks_num:" << chunks_.size(); } -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index 398c015627860..426eeeae70e55 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -19,9 +19,7 @@ #include "paddle/fluid/memory/stats.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { bool CPUAllocator::IsAllocThreadSafe() const { return true; } @@ -52,6 +50,4 @@ phi::Allocation *CPUAllocator::AllocateImpl(size_t size) { HOST_MEMORY_STAT_UPDATE(Reserved, 0, size); return new Allocation(p, size, platform::CPUPlace()); } -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 781addd7dba60..f233a5d8618eb 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -29,9 +29,7 @@ #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { bool CUDAAllocator::IsAllocThreadSafe() const { return true; } void CUDAAllocator::FreeImpl(phi::Allocation* allocation) { PADDLE_ENFORCE_EQ( @@ -86,6 +84,4 @@ phi::Allocation* CUDAAllocator::AllocateImpl(size_t size) { err_msg)); } -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc index a1b29a193a9e8..96ed41ad27dee 100644 --- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc @@ -29,9 +29,7 @@ #endif #if CUDA_VERSION >= 10020 -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { CUDAVirtualMemAllocator::CUDAVirtualMemAllocator( const platform::CUDAPlace& place) @@ -228,8 +226,6 @@ phi::Allocation* CUDAVirtualMemAllocator::AllocateImpl(size_t size) { reinterpret_cast(ptr), size, platform::Place(place_)); // NOLINT } -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation #endif diff --git a/paddle/fluid/memory/allocation/memory_block.cc b/paddle/fluid/memory/allocation/memory_block.cc index 26a2310c17e27..cf4407a22dd10 100644 --- a/paddle/fluid/memory/allocation/memory_block.cc +++ b/paddle/fluid/memory/allocation/memory_block.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace memory { -namespace detail { +namespace paddle::memory::detail { void MemoryBlock::Init(MetadataCache* cache, Type t, @@ -154,6 +152,4 @@ MemoryBlock* MemoryBlock::Metadata() const { reinterpret_cast(this) - 1)); } -} // namespace detail -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::detail diff --git a/paddle/fluid/memory/allocation/memory_block_desc.cc b/paddle/fluid/memory/allocation/memory_block_desc.cc index d20d56a6d05e8..1d1f3c2396921 100644 --- a/paddle/fluid/memory/allocation/memory_block_desc.cc +++ b/paddle/fluid/memory/allocation/memory_block_desc.cc @@ -17,9 +17,7 @@ limitations under the License. */ #include "paddle/fluid/memory/allocation/memory_block.h" -namespace paddle { -namespace memory { -namespace detail { +namespace paddle::memory::detail { MemoryBlock::Desc::Desc(MemoryBlock::Type t, size_t i, @@ -74,6 +72,4 @@ bool MemoryBlock::Desc::CheckGuards() const { return guard_begin == hash(*this, 1) && guard_end == hash(*this, 2); } -} // namespace detail -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::detail diff --git a/paddle/fluid/memory/allocation/meta_cache.cc b/paddle/fluid/memory/allocation/meta_cache.cc index 945b0f7b89283..cca35490551a6 100644 --- a/paddle/fluid/memory/allocation/meta_cache.cc +++ b/paddle/fluid/memory/allocation/meta_cache.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/fluid/memory/allocation/memory_block.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { -namespace memory { -namespace detail { +namespace paddle::memory::detail { MetadataCache::MetadataCache(bool uses_gpu) : uses_gpu_(uses_gpu) {} @@ -64,6 +62,4 @@ void MetadataCache::Invalidate(MemoryBlock* block) { } } -} // namespace detail -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::detail diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index 32853f08f94e5..fe7e722eab181 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -16,9 +16,7 @@ #include "paddle/fluid/memory/stats.h" #include "paddle/fluid/platform/profiler/mem_tracing.h" -namespace paddle { -namespace memory { -namespace allocation { +namespace paddle::memory::allocation { bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; } void CPUPinnedAllocator::FreeImpl(phi::Allocation *allocation) { #ifdef PADDLE_WITH_HIP @@ -49,6 +47,4 @@ phi::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) { platform::TracerMemEventType::ReservedAllocate); return new Allocation(ptr, size, platform::CUDAPinnedPlace()); } -} // namespace allocation -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::allocation diff --git a/paddle/fluid/memory/allocation/system_allocator.cc b/paddle/fluid/memory/allocation/system_allocator.cc index a6e19b84ba8d1..903d8d85954d7 100644 --- a/paddle/fluid/memory/allocation/system_allocator.cc +++ b/paddle/fluid/memory/allocation/system_allocator.cc @@ -46,9 +46,7 @@ COMMON_DECLARE_double(fraction_of_gpu_memory_to_use); COMMON_DECLARE_uint64(initial_gpu_memory_in_mb); COMMON_DECLARE_uint64(reallocate_gpu_memory_in_mb); -namespace paddle { -namespace memory { -namespace detail { +namespace paddle::memory::detail { void* AlignedMalloc(size_t size) { void* p = nullptr; @@ -348,6 +346,4 @@ void CustomAllocator::Free(void* p, size_t size, size_t index) { bool CustomAllocator::UseGpu() const { return true; } #endif -} // namespace detail -} // namespace memory -} // namespace paddle +} // namespace paddle::memory::detail diff --git a/paddle/fluid/memory/stats.cc b/paddle/fluid/memory/stats.cc index 2d66a5b6838b0..0eaf15c602224 100644 --- a/paddle/fluid/memory/stats.cc +++ b/paddle/fluid/memory/stats.cc @@ -22,8 +22,7 @@ PADDLE_DEFINE_EXPORTED_bool( log_memory_stats, false, "Log memory stats after each op runs, just used for debug."); -namespace paddle { -namespace memory { +namespace paddle::memory { class StatRegistry { public: @@ -173,5 +172,4 @@ int RegisterAllStats() { UNUSED static int register_all_stats = RegisterAllStats(); -} // namespace memory -} // namespace paddle +} // namespace paddle::memory diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 4714f3a2eb446..fc28e02b7bdb9 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -82,7 +82,7 @@ endif() set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi common phi_utils static_prim_api get_expected_kernel_func) -register_operators(EXCLUDES py_func_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op +register_operators(EXCLUDES py_func_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op run_program_op quantize_linear_op save_combine_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils) op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS}) @@ -108,8 +108,6 @@ if (WITH_GPU OR WITH_ROCM) op_library(sync_batch_norm_op DEPS processgroup_comm_utils) endif() -op_library(lstm_op DEPS ${OP_HEADER_DEPS}) - set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) if (WITH_DGC) diff --git a/paddle/fluid/operators/assign_pos_op.cc b/paddle/fluid/operators/assign_pos_op.cc deleted file mode 100644 index 1157b3f964aaa..0000000000000 --- a/paddle/fluid/operators/assign_pos_op.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle::operators { - -class AssignPosOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("cum_count"), "Input", "cum_count", "AssignPos"); - OP_INOUT_CHECK( - ctx->HasInput("eff_num_len"), "Input", "eff_num_len", "AssignPos"); - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AssignPos"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "AssignPos"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto cum_count_dtype = - OperatorWithKernel::IndicateVarDataType(ctx, "cum_count"); - auto X_dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - - PADDLE_ENFORCE_EQ(cum_count_dtype, - X_dtype, - phi::errors::InvalidArgument( - "The dtype of the cum_count and X should be same")); - PADDLE_ENFORCE_EQ(cum_count_dtype, - framework::proto::VarType::INT64, - phi::errors::InvalidArgument( - "The dtype of the cum_count_dtype, eff_num_len and " - "X should be same as int64")); - return phi::KernelKey(cum_count_dtype, ctx.device_context().GetPlace()); - } -}; - -class AssignPosOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "numbers to scatter."); - AddInput("cum_count", "The cumulative sum count of numbers."); - AddInput("eff_num_len", - "The effective numbers of numbers should be scattered."); - AddOutput("Out", "Assemble numbers in the order of counters."); - - AddComment(R"DOC( -assign_pos_op Operator. - -Assign pos decides which tokens should be fetched belong to -specially counter orderingly. - -)DOC"); - } -}; - -} // namespace paddle::operators - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(assign_pos, - ops::AssignPosOp, - ops::AssignPosOpMaker); diff --git a/paddle/fluid/operators/channel_shuffle_op.cc b/paddle/fluid/operators/channel_shuffle_op.cc deleted file mode 100644 index 69f75691a0318..0000000000000 --- a/paddle/fluid/operators/channel_shuffle_op.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/backward.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class ChannelShuffleOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -class ChannelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), " - "the input feature data of ChannelShuffleOp, the layout is " - "[N, C, H, W] or [N, H, W, C]."); - AddOutput("Out", - "(Tensor, default Tensor), the output of " - "ChannelShuffleOp. The layout is also [N, C, " - "H, W] or [N, H, W, C]."); - AddAttr("groups", "number of groups to divide channels in."); - AddAttr( - "data_format", - "An optional string from: \"NHWC\", \"NCHW\". " - "Defaults to \"NHWC\", Specify the data format of the input data.") - .SetDefault("NCHW"); - - AddComment(R"DOC( - Channel Shuffle operator - This operator divides channels in a tensor of shape :math:`(*, C, H, W)` - into :math:`g` groups and rearranges them as :math:`(*, C/g, g, H, W)` - while keeping the original tensor shape. - - Please refer to the paper: - `ShuffleNet: An Extremely Efficient Convolutional Neural Network for - Mobile Devices `_ - by Zhang et. al (2017) for more details. - - )DOC"); - } -}; - -class ChannelShuffleGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -template -class ChannelShuffleGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("channel_shuffle_grad"); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetAttrMap(this->Attrs()); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(channel_shuffle, - ChannelShuffleInferShapeFunctor, - PD_INFER_META(phi::ChannelShuffleInferMeta)); - -REGISTER_OPERATOR(channel_shuffle, - ops::ChannelShuffleOp, - ops::ChannelShuffleOpMaker, - ops::ChannelShuffleGradOpMaker, - ops::ChannelShuffleGradOpMaker, - ChannelShuffleInferShapeFunctor); - -DECLARE_INFER_SHAPE_FUNCTOR(channel_shuffle_grad, - ChannelShuffleGradInferShapeFunctor, - PD_INFER_META(phi::ChannelShuffleGradInferMeta)); - -REGISTER_OPERATOR(channel_shuffle_grad, - ops::ChannelShuffleGradOp, - ChannelShuffleGradInferShapeFunctor); diff --git a/paddle/fluid/operators/collective/c_allreduce_avg_op.cc b/paddle/fluid/operators/collective/c_allreduce_avg_op.cc index 963ea26321bdb..13d07557f1e7c 100644 --- a/paddle/fluid/operators/collective/c_allreduce_avg_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_avg_op.cc @@ -14,17 +14,14 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class CAllReduceAvgOpMaker : public CAllReduceOpMaker { protected: @@ -33,8 +30,7 @@ class CAllReduceAvgOpMaker : public CAllReduceOpMaker { DECLARE_INPLACE_OP_INFERER(AllreduceAvgInplaceInferer, {"X", "Out"}); -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index ab174de1cec3c..c496ad8955e7c 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -14,19 +14,16 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class CAllReduceMaxOpMaker : public CAllReduceOpMaker { protected: @@ -37,8 +34,7 @@ DECLARE_INPLACE_OP_INFERER(AllreduceMaxInplaceInferer, {"X", "Out"}); DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceMax, kRedMax) -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index b9bcc0174b03f..ad9fbda3dafeb 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -14,19 +14,16 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class CAllReduceProdOpMaker : public CAllReduceOpMaker { protected: @@ -37,8 +34,7 @@ DECLARE_INPLACE_OP_INFERER(AllreduceProdInplaceInferer, {"X", "Out"}); DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceProd, kRedProd) -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_comm_init_all_op.cc b/paddle/fluid/operators/collective/c_comm_init_all_op.cc index 3a6156eb96e71..21729fd438b19 100644 --- a/paddle/fluid/operators/collective/c_comm_init_all_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_all_op.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_comm_init_all_op.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class CCommInitAllOp : public framework::OperatorWithKernel { public: @@ -47,8 +46,7 @@ Initialize all collective communication context } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index 768c60c27b093..41c31b8e7b5cb 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -42,14 +42,11 @@ COMMON_DECLARE_bool(dynamic_static_unified_comm); #include "paddle/phi/core/distributed/store/store_utils.h" #include "paddle/phi/core/distributed/store/tcp_store.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class Scope; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace operators { +namespace paddle::operators { class CCommInitOp : public framework::OperatorBase { public: @@ -183,8 +180,7 @@ Initialize collective communication context within this trainer } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc index 2241ec81019d1..0da7899638880 100644 --- a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc @@ -24,8 +24,7 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/device_manager.h" -namespace paddle { -namespace operators { +namespace paddle::operators { #ifdef PADDLE_WITH_CUSTOM_DEVICE static void CopyXCCLIDToVar(const std::vector& xccl_ids, @@ -97,8 +96,7 @@ For trainer 1~n: start a gRPC server to get the UniqueId, once got, stop the ser } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cc index 569b9733aa6a1..dcca25ef76fdb 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cc @@ -14,19 +14,16 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class CReduceMaxOpMaker : public CReduceOpMaker { protected: @@ -35,8 +32,7 @@ class CReduceMaxOpMaker : public CReduceOpMaker { DEFINE_C_REDUCE_CPU_KERNEL(CReduceMax, kRedMax) -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cc index bdde5115d2e4b..11965ca4dd21f 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include -namespace paddle { -namespace operators { +namespace paddle::operators { class CReduceScatterOp : public framework::OperatorWithKernel { public: @@ -63,8 +62,7 @@ Reference: https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/us } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/global_scatter_op.cc b/paddle/fluid/operators/collective/global_scatter_op.cc index e54c70d0d8db6..a91deb2f11d85 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/global_scatter_op.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class GlobalScatterOp : public framework::OperatorWithKernel { public: @@ -104,8 +103,7 @@ class GlobalScatterOpGradMaker : public framework::SingleGradOpMaker { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc index 283826a5a31fc..f56d680afbb23 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc @@ -15,17 +15,14 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class MpAllReduceSumOp : public framework::OperatorWithKernel { public: @@ -75,8 +72,7 @@ DECLARE_INPLACE_OP_INFERER(MpAllReduceSumInplaceInferer, {"X", "Out"}); DEFINE_C_ALLREDUCE_CPU_KERNEL(MpAllReduceSum, kRedSum); -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/partial_send_op.cc b/paddle/fluid/operators/collective/partial_send_op.cc index cf2a0ece1a7ab..961b8c4cf1382 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/partial_send_op.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class PartialSendOp : public framework::OperatorWithKernel { public: @@ -84,8 +83,7 @@ Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/p2p.h } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/collective/send_v2_op.cc b/paddle/fluid/operators/collective/send_v2_op.cc index cc41558804d6f..067488404a0b9 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/send_v2_op.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class SendOpV2 : public framework::OperatorWithKernel { public: @@ -78,8 +77,7 @@ Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/p2p.h } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index a7f7724a4be57..6b492aa9c5225 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -14,18 +14,14 @@ limitations under the License. */ #include "paddle/fluid/operators/common_infer_shape_functions.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class InferShapeContext; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework // This file almostly contains all the infershape functions that are used in // operators. -namespace paddle { -namespace operators { -namespace details { +namespace paddle::operators::details { inline void GetBroadcastDimsArrays(const phi::DDim &x_dims, const phi::DDim &y_dims, @@ -105,7 +101,8 @@ phi::DDim BroadcastTwoDims(const phi::DDim &x_dims, return common::make_ddim(out_dims_array); } -} // namespace details +} // namespace paddle::operators::details +namespace paddle::operators { // shape input(0) -> output(0) without change. void UnaryOpUnchangedInferShape(framework::InferShapeContext *ctx) { @@ -196,5 +193,4 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { } } -} // namespace operators -} // namespace paddle +} // namespace paddle::operators diff --git a/paddle/fluid/operators/compat/conv2d.pbtxt b/paddle/fluid/operators/compat/conv2d.pbtxt index b18e026499243..1b602fe43aab1 100644 --- a/paddle/fluid/operators/compat/conv2d.pbtxt +++ b/paddle/fluid/operators/compat/conv2d.pbtxt @@ -50,7 +50,7 @@ extra { attrs { name: "quantization_type" type: STRING - } + } attrs { name: "bit_length" type: INT diff --git a/paddle/fluid/operators/compat/conv2d_transpose.pbtxt b/paddle/fluid/operators/compat/conv2d_transpose.pbtxt index c805547e0143d..ed04ecc4b71ec 100644 --- a/paddle/fluid/operators/compat/conv2d_transpose.pbtxt +++ b/paddle/fluid/operators/compat/conv2d_transpose.pbtxt @@ -8,7 +8,7 @@ def { } inputs { name: "Bias" - } + } outputs { name: "Output" } diff --git a/paddle/fluid/operators/compat/conv2d_transpose_bias.pbtxt b/paddle/fluid/operators/compat/conv2d_transpose_bias.pbtxt index bce4fc9f0e114..93bf29b8b394a 100644 --- a/paddle/fluid/operators/compat/conv2d_transpose_bias.pbtxt +++ b/paddle/fluid/operators/compat/conv2d_transpose_bias.pbtxt @@ -8,7 +8,7 @@ def { } inputs { name: "Bias" - } + } outputs { name: "Output" } diff --git a/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt b/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt index ee04cd73dd70c..a0d80211c2594 100644 --- a/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt +++ b/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt @@ -42,7 +42,7 @@ extra { attrs { name: "quantization_type" type: STRING - } + } attrs { name: "bit_length" type: INT diff --git a/paddle/fluid/operators/compat/fused_transpose.pbtxt b/paddle/fluid/operators/compat/fused_transpose.pbtxt index e4c7c218cc117..677d2e5792f75 100644 --- a/paddle/fluid/operators/compat/fused_transpose.pbtxt +++ b/paddle/fluid/operators/compat/fused_transpose.pbtxt @@ -17,26 +17,26 @@ def { extra { attrs{ name: "fused_squeeze2_axes" - type: INTS + type: INTS } attrs{ name: "fused_unsqueeze2_axes" - type: INTS + type: INTS } attrs{ name: "fused_reshape2_shape" - type: INTS + type: INTS } attrs{ name: "scale" - type: FLOAT + type: FLOAT } attrs{ name: "shift" - type: FLOAT + type: FLOAT } attrs{ name: "output_data_type" - type: STRING + type: STRING } } diff --git a/paddle/fluid/operators/compat/mul.pbtxt b/paddle/fluid/operators/compat/mul.pbtxt index 056f799c6c49c..28b40d0e6526c 100644 --- a/paddle/fluid/operators/compat/mul.pbtxt +++ b/paddle/fluid/operators/compat/mul.pbtxt @@ -22,7 +22,7 @@ extra { attrs { name: "Out0_threshold" type: FLOAT - } + } attrs { name: "bit_length" type: INT @@ -30,7 +30,7 @@ extra { attrs { name: "quantization_type" type: STRING - } + } attrs { name: "skip_quant" type: BOOLEAN diff --git a/paddle/fluid/operators/compat/sequence_conv.pbtxt b/paddle/fluid/operators/compat/sequence_conv.pbtxt index c5335a25c557a..679b1095a57ba 100644 --- a/paddle/fluid/operators/compat/sequence_conv.pbtxt +++ b/paddle/fluid/operators/compat/sequence_conv.pbtxt @@ -23,7 +23,7 @@ def { attrs { name: "contextStride" type: INT - } + } } extra { attrs { @@ -49,5 +49,5 @@ extra { attrs { name: "op_device" type: STRING - } + } } diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 0cc5a0bbd0927..5d4b4b21ccdcc 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -25,8 +25,7 @@ limitations under the License. */ COMMON_DECLARE_bool(use_mkldnn); -namespace paddle { -namespace operators { +namespace paddle::operators { const char ConditionalOp::kInputs[] = "Input"; // NOLINT const char ConditionalOp::kOutputs[] = "Out"; // NOLINT @@ -334,8 +333,7 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpMaker { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR(conditional_block, diff --git a/paddle/fluid/operators/controlflow/pylayer_op.cc b/paddle/fluid/operators/controlflow/pylayer_op.cc index 0a6cb8aac83c0..872028bead1c9 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op.cc @@ -19,8 +19,7 @@ #include "paddle/fluid/operators/controlflow/control_flow_op_helper.h" #include "paddle/phi/kernels/funcs/math_function.h" -namespace paddle { -namespace operators { +namespace paddle::operators { namespace { // NOLINT enum class PyLayerBlockIndex { kFORWARD = 0, kBACKWARD = 1, kNONE = 2 }; @@ -263,8 +262,7 @@ class PyLayerBackwardInferVarType : public framework::VarTypeInference { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR(pylayer, diff --git a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc index bdd669c644e6e..68263cec46282 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc @@ -16,14 +16,11 @@ #include -namespace paddle { -namespace framework { +namespace paddle::framework { class ProgramDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace operators { +namespace paddle::operators { static bool IsMatchedPyLayerOpAndPyLayerGradOp(const OpVariant &fwd_op, const OpVariant &bwd_op) { @@ -173,5 +170,4 @@ void PrepareSafeEagerDeletionOnPyLayerOpAndPyLayerGradOp( program, &fwd_ops, &bwd_ops); } -} // namespace operators -} // namespace paddle +} // namespace paddle::operators diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc index de04cb0e3bba5..ffdb3f01454a2 100644 --- a/paddle/fluid/operators/custom_device_common_op_registry.cc +++ b/paddle/fluid/operators/custom_device_common_op_registry.cc @@ -1366,7 +1366,10 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CConcatOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - phi::dtype::float16>); + phi::dtype::float16>, + paddle::operators::CConcatOpCustomDeviceKernel< + paddle::platform::CustomDeviceContext, + phi::dtype::bfloat16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_split, device_type, @@ -1378,7 +1381,10 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { int>, paddle::operators::CSplitOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - phi::dtype::float16>); + phi::dtype::float16>, + paddle::operators::CSplitOpCustomDeviceKernel< + paddle::platform::CustomDeviceContext, + phi::dtype::bfloat16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_embedding, device_type, diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index c3880559f737f..8fdf5326a29c9 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class BipartiteMatchOp : public framework::OperatorWithKernel { public: @@ -308,8 +307,7 @@ If Tensor, the height of ColToRowMatchIndices is 1. } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index d9bb602338352..c15c161a60999 100755 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -6,8 +6,6 @@ endif() register_operators( EXCLUDES fused_bn_activation_op - fusion_group_op - fusion_lstm_op fused_bn_add_activation_op fused_attention_op fused_transformer_op @@ -19,8 +17,6 @@ register_operators( fused_gate_attention_op resnet_basic_block_op) -op_library(fusion_lstm_op) - if(WITH_XPU) op_library(resnet_basic_block_op) op_library(resnet_unit_op) @@ -38,10 +34,6 @@ if(WITH_GPU OR WITH_ROCM) # HIP not support cudnnTransformTensor # HIP not support cudnnConvolutionBiasActivationForward op_library(fused_gate_attention_op) - # fusion_group - if(NOT APPLE AND NOT WIN32) - op_library(fusion_group_op) - endif() # fused_bn_add_activation # HIP not support bn act fuse in MIOPEN if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401)) diff --git a/paddle/fluid/operators/fused/fused_conv2d_op.cc b/paddle/fluid/operators/fused/fused_conv2d_op.cc index 4e440bc972fbd..04d2d4043bf96 100644 --- a/paddle/fluid/operators/fused/fused_conv2d_op.cc +++ b/paddle/fluid/operators/fused/fused_conv2d_op.cc @@ -20,8 +20,7 @@ limitations under the License. */ #include "paddle/fluid/operators/generator/get_expected_kernel_func.h" #include "paddle/phi/infermeta/multiary.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class FusedConvOpMaker : public framework::OpProtoAndCheckerMaker { public: @@ -118,8 +117,7 @@ class FusedConvOp : public framework::OperatorWithKernel { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc deleted file mode 100644 index 3ded78e3be4cf..0000000000000 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ /dev/null @@ -1,500 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/fused/fused_elemwise_activation_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -bool IsUnaryCompound(const std::vector &functor_list) { - PADDLE_ENFORCE_EQ( - functor_list.size(), - 2, - phi::errors::InvalidArgument( - "Invalid functor list size %d, which should be equal to %d.", - functor_list.size(), - 2)); - static std::unordered_set binary_fun = {"elementwise_add", - "elementwise_mul", - "elementwise_add_grad", - "elementwise_mul_grad"}; - return binary_fun.count(functor_list[1]) != 0; -} - -bool HasInPlaceUnary(const std::vector &functor_list) { - PADDLE_ENFORCE_EQ( - functor_list.size(), - 2, - phi::errors::InvalidArgument( - "Invalid functor list size %d, which should be equal to %d.", - functor_list.size(), - 2)); - static std::unordered_set InplaceOpSet = {"relu", "relu_grad"}; - bool is_in_place = false; - for (auto &func_name : functor_list) { - is_in_place |= (InplaceOpSet.count(func_name) == 1); - } - return is_in_place; -} - -bool InputXCanBeAbsent(const std::vector &functor_list) { - PADDLE_ENFORCE_EQ( - functor_list.size(), - 2, - phi::errors::InvalidArgument( - "Invalid functor list size %d, which should be equal to %d.", - functor_list.size(), - 2)); - static std::unordered_set binary_fun = {"elementwise_add_grad"}; - return binary_fun.count(functor_list[0]) != 0 || - binary_fun.count(functor_list[1]) != 0; -} - -/* - * Whether the compound function is supported. - * For Unary(Binary(X, Y)), the intermediate_out's shape is the same the final - * out. - */ -static bool IsSupportedCompound(const std::vector &functors) { - PADDLE_ENFORCE_EQ( - functors.size(), - 2UL, - phi::errors::InvalidArgument( - "Invalid functor list size %d, which should be equal to %d.", - functors.size(), - 2)); - - static std::unordered_set unary_fun = { - "scale", "relu", "tanh", "sigmoid", "gelu"}; - static std::unordered_set binary_fun = {"elementwise_add", - "elementwise_mul"}; - - std::string unary_fun_str; - if (binary_fun.count(functors[0])) { - unary_fun_str = functors[1]; - } else if (binary_fun.count(functors[1])) { - unary_fun_str = functors[0]; - } else { - PADDLE_THROW(phi::errors::InvalidArgument( - "%s and %s are not included in fused_list.", functors[0], functors[1])); - } - PADDLE_ENFORCE_EQ(unary_fun.count(unary_fun_str), - 1, - phi::errors::InvalidArgument( - "%s is not included in fused_list.", unary_fun_str)); - return true; -} - -class FusedElemwiseActivationOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::InvalidArgument( - "Input(X) of FusedElemwiseActivationOp op should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Y"), - true, - phi::errors::InvalidArgument( - "Input(Y) of FusedElemwiseActivationOp op should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - phi::errors::InvalidArgument( - "Output(Out) of FusedElemwiseActivationOp op should not be null.")); - - auto x_dim = ctx->GetInputDim("X"); - auto y_dim = ctx->GetInputDim("Y"); - - // Whether the shape of Y is a continuous subsequence of X, - // For more information please refer to the op's introduction. - bool bcast_y = IsBcastY(x_dim, y_dim); - - auto &out_dim = bcast_y ? x_dim : y_dim; - std::string out_lod = bcast_y ? "X" : "Y"; - - if (ctx->Attrs().Get("save_intermediate_out")) { - PADDLE_ENFORCE_EQ( - ctx->HasOutput("IntermediateOut"), - true, - phi::errors::InvalidArgument( - "Output(IntermediateOut) of FusedElemwiseActivationOp " - "should not be null.")); - - if (IsUnaryCompound( - ctx->Attrs().Get>("functor_list"))) { - // for Unary(Binary(X, Y)), the shape and lod of out and - // intermediate_out are the same. - ctx->SetOutputDim("IntermediateOut", out_dim); - // set the lod of intermediate_out - ctx->ShareLoD(out_lod, /*->*/ "IntermediateOut"); - } else { - // for Binary(X, Unary(Y)), the shape and lod of Y and - // intermediate_out are the same. - ctx->SetOutputDim("IntermediateOut", y_dim); - // set the lod of intermediate_out - ctx->ShareLoD("Y", /*->*/ "IntermediateOut"); - } - } - ctx->SetOutputDim("Out", out_dim); - ctx->ShareLoD(out_lod, /*->*/ "Out"); - } - - static bool IsBcastY(const phi::DDim &x_dim, const phi::DDim &y_dim) { - bool bcast_y = x_dim.size() >= y_dim.size(); - if (x_dim.size() == y_dim.size()) { - for (int i = 0; i < x_dim.size(); ++i) { - if (x_dim[i] < y_dim[i]) { - bcast_y = false; - break; - } - } - } - return bcast_y; - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), - ctx.Input("Y")->dtype(), - phi::errors::InvalidArgument( - "The element's type of input should be the same.")); - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class FusedElemwiseActivationMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "X", - "(Tensor) The input tensor of fused_elemwise_activation operator."); - AddInput( - "Y", - "(Tensor) The input tensor of fused_elemwise_activation operator."); - AddOutput("Out", - "vector The output tensor of fused_elemwise_activation " - "operator."); - AddOutput("IntermediateOut", - "Tensor The IntermediateOut tensor of fused_elemwise_activation " - "operator.") - .AsIntermediate(); - AddAttr("axis", - "axis is used by elementwise_op, the default value is -1.") - .SetDefault(-1); - AddAttr("scale", - "scale is used by scale_op, the default value is 0.0.") - .SetDefault(0.0); - AddAttr("save_intermediate_out", - "Whether to save the intermediate_out.") - .SetDefault(false); - AddAttr>("functor_list", - "The functors that should be fused.") - .AddCustomChecker([&](const std::vector &functor_list) { - PADDLE_ENFORCE_EQ( - IsSupportedCompound(functor_list), - true, - phi::errors::InvalidArgument( - "the input functors should support compounding.")); - }); - - AddComment(R"DOC( -FusedElemwiseActivation Operator. - -At present, FusedElemwiseActivation only supports Two kinds of compound -operators (elementwise_op and activation_op): - - Z = Binary(X, Unary(Y)) - Z = Unary(Binary(X, Y)) - -There are two cases for this operator: - -1. The shape of $Y$ and $X$ is the same. -2. The shape of $Y$ is a continuous subsequence of $X$ or the shape of $X$ is a continuous subsequence of $Y$. - -For case 2 (assume that the shape of $Y$ is a continuous subsequence of $X$ ): - -1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index - for broadcasting $Y$ onto $X$. -2. If $axis$ is -1 (default), $axis = rank(X) - rank(Y)$. -3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of - subsequence, such as shape(Y) = (2, 1) => (2). - -For example: - - .. code-block:: text - - shape(X) = (2, 3, 4, 5), shape(Y) = (,) - shape(X) = (2, 3, 4, 5), shape(Y) = (5,) - shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2 - shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1 - shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 - shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0 - - -The inputs $X$ and $Y$ can carry the different LoD information. -But the output only shares the LoD information with the one whose shape is the same with Out. -The attributions of activation_op can be get from fused_elemwise_activation_op's. -The functor_list records the functions to be fused, for example -["scale", "elementwise_add"]. - -)DOC"); - } -}; - -template -class FusedElemwiseActivationGradMaker - : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr grad_op) const override { - grad_op->SetType(this->ForwardOpType() + "_grad"); - - for (auto &input_param : this->InputNames()) { - grad_op->SetInput(input_param, this->Input(input_param)); - grad_op->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param, true)); - } - - grad_op->SetInput("Out", this->Output("Out")); - grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - - grad_op->SetAttrMap(this->Attrs()); - - std::vector functor_names = PADDLE_GET_CONST( - std::vector, grad_op->GetAttr("functor_list")); - - functor_names[0] += "_grad"; - functor_names[1] += "_grad"; - grad_op->SetAttr("functor_list", functor_names); - - if (PADDLE_GET_CONST(bool, grad_op->GetAttr("save_intermediate_out"))) { - // PADDLE_ENFORCE_NE(Output("IntermediateOut").size(), 0); - grad_op->SetInput("IntermediateOut", this->Output("IntermediateOut")); - grad_op->SetOutput(framework::GradVarName("IntermediateOut"), - this->OutputGrad("IntermediateOut")); - } else { - grad_op->SetInput("IntermediateOut", this->EmptyOutput()); - grad_op->SetOutput(framework::GradVarName("IntermediateOut"), - this->EmptyOutputGrad()); - } - } -}; - -class FusedElemwiseAddActivationMaker : public FusedElemwiseActivationMaker {}; - -template -class FusedElemwiseAddActivationGradMaker - : public FusedElemwiseActivationGradMaker { - public: - using FusedElemwiseActivationGradMaker::FusedElemwiseActivationGradMaker; -}; - -class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput(framework::GradVarName("Out")), - true, - phi::errors::InvalidArgument("Input(Out@Grad) should not be null.")); - - auto functor_list = - ctx->Attrs().Get>("functor_list"); - - if (ctx->Attrs().Get("save_intermediate_out")) { - PADDLE_ENFORCE_EQ(ctx->HasInput("IntermediateOut"), - true, - phi::errors::InvalidArgument( - "Input(IntermediateOut) should not be null.")); - } else { - if (!InputXCanBeAbsent(functor_list)) { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - phi::errors::InvalidArgument("Input(X) should not be null.")); - } - } - - auto x_grad_name = framework::GradVarName("X"); - auto y_grad_name = framework::GradVarName("Y"); - auto inter_grad_name = framework::GradVarName("IntermediateOut"); - - if (ctx->HasOutput(x_grad_name)) { - if (ctx->HasInputs("X")) { - ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("X")); - ctx->ShareLoD("X", x_grad_name); - } else { - // Currently, only when Binary is elementwise_add or elementwise_sub, - // the "X" could be absent. - PADDLE_ENFORCE_EQ( - InputXCanBeAbsent(functor_list), - true, - phi::errors::InvalidArgument( - "Only when BinaryFunctor is elementwise_add, the 'X' " - "could be absent.")); - - // Node: If "X" is absence, the shape of Y should be a continuous - // subsequence of X, otherwise, we could not infer the shape of dx. - - ctx->SetOutputDim(x_grad_name, - ctx->GetInputDim(framework::GradVarName("Out"))); - ctx->ShareLoD(framework::GradVarName("Out"), x_grad_name); - } - } - - if (ctx->HasOutput(y_grad_name)) { - PADDLE_ENFORCE_EQ( - ctx->HasInput("Y"), - true, - phi::errors::InvalidArgument("Input(Y) should not be null.")); - ctx->SetOutputDim(y_grad_name, ctx->GetInputDim("Y")); - ctx->ShareLoD("Y", y_grad_name); - } - - if (ctx->HasOutput(inter_grad_name)) { - // For Unary(Binary(X, Y)), IntermediateOut should not be empty. - if (IsUnaryCompound(functor_list)) { - ctx->SetOutputDim(inter_grad_name, - ctx->GetInputDim(framework::GradVarName("Out"))); - ctx->ShareLoD(framework::GradVarName("Out"), inter_grad_name); - } else { - ctx->SetOutputDim(inter_grad_name, ctx->GetInputDim("Y")); - ctx->ShareLoD("Y", inter_grad_name); - } - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -class FusedElemwiseAddActivationOp : public FusedElemwiseActivationOp { - public: - using FusedElemwiseActivationOp::FusedElemwiseActivationOp; - void InferShape(framework::InferShapeContext *ctx) const override { - FusedElemwiseActivationOp::InferShape(ctx); - std::vector functor_names = - ctx->Attrs().Get>("functor_list"); - bool elemntwise_add_detected = false; - for (auto const &names : functor_names) { - if (names == "elementwise_add") { - elemntwise_add_detected = true; - break; - } - } - PADDLE_ENFORCE_EQ( - elemntwise_add_detected, - true, - phi::errors::InvalidArgument( - "When the FusedElemwiseAddActivationOp Is used in fused pass, the " - "elementwise_add Op must be" - "detected and used, Please check the fuse pass pattern")); - } -}; - -class FusedElemwiseAddActivationOpGrad : public FusedElemwiseActivationOpGrad { - public: - using FusedElemwiseActivationOpGrad::FusedElemwiseActivationOpGrad; - - void InferShape(framework::InferShapeContext *ctx) const override { - FusedElemwiseActivationOpGrad::InferShape(ctx); - std::vector functor_names = - ctx->Attrs().Get>("functor_list"); - bool elemntwise_add_grad_detected = false; - for (auto const &names : functor_names) { - if (names == "elementwise_add_grad") { - elemntwise_add_grad_detected = true; - break; - } - } - PADDLE_ENFORCE_EQ( - elemntwise_add_grad_detected, - true, - phi::errors::InvalidArgument( - "When the FusedElemwiseAddActivationOpGrad Is used in fused pass, " - "the elementwise_add_grad Op must be" - "detected and used, Please check the fuse pass pattern")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER( - FusedElemwiseAddActivationNoNeddBufVarInferer, "X", "Y"); -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - fused_elemwise_activation, - ops::FusedElemwiseActivationOp, - ops::FusedElemwiseActivationMaker, - ops::FusedElemwiseActivationGradMaker, - ops::FusedElemwiseActivationGradMaker); -REGISTER_OPERATOR(fused_elemwise_activation_grad, - ops::FusedElemwiseActivationOpGrad); - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation, - CPU, - ALL_LAYOUT, - ops::FusedElemwiseActivationKernel, - float, - double) {} - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation_grad, - CPU, - ALL_LAYOUT, - ops::FusedElemwiseActivationGradKernel, - float, - double) {} - -// for memory optimization, we register the fused_elemwise_add_activation OP -REGISTER_OPERATOR( - fused_elemwise_add_activation, - ops::FusedElemwiseAddActivationOp, - ops::FusedElemwiseAddActivationMaker, - ops::FusedElemwiseAddActivationGradMaker, - ops::FusedElemwiseAddActivationGradMaker); -REGISTER_OPERATOR(fused_elemwise_add_activation_grad, - ops::FusedElemwiseAddActivationNoNeddBufVarInferer, - ops::FusedElemwiseAddActivationOpGrad); - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, - CPU, - ALL_LAYOUT, - ops::FusedElemwiseAddActivationKernel, - float, - double) {} - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation_grad, - CPU, - ALL_LAYOUT, - ops::FusedElemwiseAddActivationGradKernel, - float, - double) {} diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu deleted file mode 100644 index d231bbff9b93b..0000000000000 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/fused/fused_elemwise_activation_op.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation, - GPU, - ALL_LAYOUT, - ops::FusedElemwiseActivationKernel, - float, - double, - phi::dtype::float16) {} -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation_grad, - GPU, - ALL_LAYOUT, - ops::FusedElemwiseActivationGradKernel, - float, - double, - phi::dtype::float16) {} - -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, - GPU, - ALL_LAYOUT, - ops::FusedElemwiseAddActivationKernel, - float, - double, - phi::dtype::float16) {} -PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation_grad, - GPU, - ALL_LAYOUT, - ops::FusedElemwiseAddActivationGradKernel, - float, - double, - phi::dtype::float16) {} diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index ea8040c763644..685400f167e7c 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -20,8 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -namespace paddle { -namespace operators { +namespace paddle::operators { /** * Get row matrix shape from a vector shape. If the rank of x_dim > 1, the @@ -430,8 +429,7 @@ class FusedFeedForwardOpDoubleGradMaker protected: void Apply(GradOpPtr grad_op) const override {} }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR(fused_feedforward, diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cc b/paddle/fluid/operators/fused/fused_gate_attention_op.cc index 16aa1398e08d8..717eb990f49f3 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -namespace paddle { -namespace operators { +namespace paddle::operators { using DDim = phi::DDim; @@ -365,8 +364,7 @@ class FusedGateAttentionGradOpMaker : public framework::SingleGradOpMaker { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/fused/fusion_group_op.cc b/paddle/fluid/operators/fused/fusion_group_op.cc deleted file mode 100644 index b42dd927c6e31..0000000000000 --- a/paddle/fluid/operators/fused/fusion_group_op.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class FusionGroupOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, phi::GPUPlace(0)); - }; -}; - -class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Inputs", - "(std::vector) The inputs of fusion_group op.") - .AsDuplicable(); - AddOutput("Outs", - "(std::vector) The outputs of fusion_group op.") - .AsDuplicable(); - AddAttr>("outs_dtype", - "The data type of Outputs in fusion_group op.") - .SetDefault({}); - AddAttr>("inputs_dtype", - "The data type of Inputs in fusion_group op.") - .SetDefault({}); - AddAttr("type", "Fusion type.").SetDefault(0); - AddAttr("func_name", "Name of the generated functions.") - .SetDefault(""); - AddComment(R"DOC( -fusion_group Operator. - -It is used to execute a generated CUDA kernel which fuse the computation of -multiple operators into one. It supports several types: -0, fused computation of elementwise operations in which all the dims of inputs - and outputs should be exactly the same. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -DECLARE_INFER_SHAPE_FUNCTOR(fusion_group, - FusionGroupInferShapeFunctor, - PD_INFER_META(phi::FusionGroupInferMeta)); - -namespace ops = paddle::operators; -REGISTER_OPERATOR(fusion_group, - ops::FusionGroupOp, - ops::FusionGroupOpMaker, - FusionGroupInferShapeFunctor); diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc deleted file mode 100644 index f40ac248f1962..0000000000000 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ /dev/null @@ -1,577 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/fused/fusion_lstm_op.h" - -#include - -#include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/phi/kernels/funcs/fc_functor.h" -#include "paddle/phi/kernels/funcs/jit/kernels.h" -#include "paddle/phi/kernels/funcs/sequence2batch.h" - -namespace paddle { -namespace operators { - -void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasInput("WeightX"), "Input", "WeightX", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasInput("WeightH"), "Input", "WeightH", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasOutput("XX"), "Output", "XX", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasOutput("Hidden"), "Output", "Hidden", "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasOutput("Cell"), "Output", "Cell", "fusion_lstm"); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dims.size(), - 2, - phi::errors::InvalidArgument( - "Input(X)'s rank must be 2, but received x's rank " - "is:%d, x dim is:[%s]", - x_dims.size(), - x_dims)); - - if (ctx->HasInput("H0")) { - OP_INOUT_CHECK(ctx->HasInput("C0"), "Input", "C0", "fusion_lstm"); - auto h_dims = ctx->GetInputDim("H0"); - auto c_dims = ctx->GetInputDim("C0"); - PADDLE_ENFORCE_EQ(h_dims, - c_dims, - phi::errors::InvalidArgument( - "The dimension of Input(H0) and Input(C0) should be " - "same, but received h0 dims is:[%s], c0 dims is:[%s]", - h_dims, - c_dims)); - } - - auto wx_dims = ctx->GetInputDim("WeightX"); - PADDLE_ENFORCE_EQ(wx_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(WeightX) should be 2, but received " - "WeightX's rank is:%d, WeightX dim is:[%s]", - wx_dims.size(), - wx_dims)); - PADDLE_ENFORCE_EQ(wx_dims[0], - x_dims[1], - phi::errors::InvalidArgument( - "The first dimension of Input(WeightX) " - "should equal to second dimension of Input(X), but " - "received WeightX first dim is:%d, X second dim is:%d", - wx_dims[0], - x_dims[1])); - - int frame_size = static_cast(wx_dims[1] / 4); - auto wh_dims = ctx->GetInputDim("WeightH"); - - PADDLE_ENFORCE_EQ(wh_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(WeightH) should be 2, but received " - "WeightH rank is:%d, WeightH dim is:[%s]", - wh_dims.size(), - wh_dims)); - PADDLE_ENFORCE_EQ(wh_dims[0], - frame_size, - phi::errors::InvalidArgument( - "The first dimension of Input(WeightH) " - "should equal to frame size, but received WeightH " - "first dim is:%d, frame size is:%d.", - wh_dims[0], - frame_size)); - - PADDLE_ENFORCE_EQ(wh_dims[1], - 4 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(WeightH) " - "should equal to 4 * frame_size, but received WeightH " - "second dimension is:%d, frame size is:%d.", - wh_dims[1], - frame_size)); - - auto b_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ(b_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(Bias) should be 2, but received " - "Bias rank is:%d, Bias dim is:[%s]", - b_dims.size(), - b_dims)); - PADDLE_ENFORCE_EQ(b_dims[0], - 1, - phi::errors::InvalidArgument( - "The first dimension of Input(Bias) should be 1, but " - "received Bias's dimension is:[%s]", - b_dims)); - - if (ctx->Attrs().Get("use_peepholes")) { - PADDLE_ENFORCE_EQ(b_dims[1], - 7 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(Bias) should be " - "7 * %d if enable peepholes connection, but received " - "Bias dim is:[%s]", - frame_size, - b_dims)); - ctx->SetOutputDim("CheckedCell", {2, frame_size}); - } else { - PADDLE_ENFORCE_EQ( - b_dims[1], - 4 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(Bias) should be " - "4 * %d if disable peepholes, but received Bias dim is:[%s]", - frame_size, - b_dims)); - } - - phi::DDim out_dims({x_dims[0], frame_size}); - ctx->SetOutputDim("Hidden", out_dims); - ctx->SetOutputDim("Cell", out_dims); - ctx->ShareLoD("X", "Hidden"); - ctx->ShareLoD("X", "Cell"); - int xx_width = 0; - if (ctx->Attrs().Get("use_seq")) { - xx_width = static_cast(wx_dims[1]); - } else { - xx_width = - static_cast(x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]); - - OP_INOUT_CHECK(ctx->HasOutput("BatchedInput"), - "Output", - "BatchedInput", - "fusion_lstm"); - OP_INOUT_CHECK(ctx->HasOutput("BatchedHidden"), - "Output", - "BatchedHidden", - "fusion_lstm"); - OP_INOUT_CHECK( - ctx->HasOutput("BatchedCell"), "Output", "BatchedCell", "fusion_lstm"); - OP_INOUT_CHECK( - ctx->HasOutput("ReorderedH0"), "Output", "ReorderedH0", "fusion_lstm"); - OP_INOUT_CHECK( - ctx->HasOutput("ReorderedC0"), "Output", "ReorderedC0", "fusion_lstm"); - - ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); - ctx->SetOutputDim("BatchedHidden", out_dims); - ctx->SetOutputDim("BatchedCell", out_dims); - } - ctx->SetOutputDim("XX", {x_dims[0], xx_width}); - ctx->ShareLoD("X", "XX"); -} - -phi::KernelKey FusionLSTMOp::GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); -} - -void FusionLSTMOpMaker::Make() { - AddInput( - "X", - "(phi::DenseTensor) the input is a LodTensor, which support " - "variable-time length input sequence. The underlying tensor in " - "this phi::DenseTensor is a matrix with shape (T X M), where T is the " - "total time steps in this mini-batch, M is the dim size of x."); - AddInput("WeightX", - "(phi::DenseTensor) the learnable weights of X." - " - The shape is (M x 4D), where M is the dim size of x, D is the " - "hidden size. " - " - Weight = {W_cx, W_ix, W_fx, W_ox}"); - AddInput( - "WeightH", - "(phi::DenseTensor) same as LSTMOp, the learnable hidden-hidden weights." - " - The shape is (D x 4D), where D is the hidden size. " - " - Weight = {W_ch, W_ih, W_fh, W_oh}"); - AddInput("Bias", - "(phi::DenseTensor) the learnable weights. Almost same as LSTMOp" - "Note: we should add the fc bias into this (1x4D) in bias." - "input-hidden bias weight and peephole connections weight if " - "setting `use_peepholes` True. " - "1. `use_peepholes = False` " - " - The shape is (1 x 4D). " - " - Bias = {b_c, b_i, b_f, b_o}." - "2. `use_peepholes = True` " - " - The shape is (1 x 7D). " - " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."); - AddInput("H0", - "(phi::DenseTensor, optional) (same as LSTMOp) the initial hidden " - "state is an " - "optional " - "input. This is a tensor with shape (N x D), where N is the " - "batch size and D is the hidden size.") - .AsDispensable(); - AddInput("C0", - "(phi::DenseTensor, optional) (same as LSTMOp) (the initial cell " - "state is an " - "optional " - "input. This is a tensor with shape (N x D), where N is the " - "batch size. `H0` and `C0` can be NULL but only at the same time.") - .AsDispensable(); - AddOutput( - "Hidden", - "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. " - "The shape is (T x D), and lod is the same with the `Input`."); - AddOutput( - "Cell", - "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. " - "The shape is (T x D), and lod is the same with the `Input`."); - AddOutput("XX", - "(phi::DenseTensor) the result after X * WeightX (size is T x 4D)" - " or batched_X (size is T x M), this will be automatically chosen," - " where T is the total time steps in this mini-batch," - " D is the hidden size, M is the dim size of x input.") - .AsIntermediate(); - AddOutput("BatchedInput", "(phi::DenseTensor) (T x 4D).").AsIntermediate(); - AddOutput("BatchedHidden", "(phi::DenseTensor) (T x D).").AsIntermediate(); - AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate(); - AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate(); - AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate(); - AddOutput("CheckedCell", "(phi::DenseTensor) (2 x D) only for peephole.") - .AsIntermediate(); - AddAttr("use_peepholes", - "(bool, default: True) " - "whether to enable diagonal/peephole connections.") - .SetDefault(true); - AddAttr("is_reverse", - "(bool, default: False) " - "whether to compute reversed LSTM.") - .SetDefault(false); - AddAttr("use_seq", - "(bool, default: True) " - "whether to use seq mode to compute.") - .SetDefault(true); - AddAttr("gate_activation", - "(string, default: sigmoid)" - "The activation for input gate, forget gate and output " - "gate, `sigmoid` by default.") - .SetDefault("sigmoid") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddAttr("cell_activation", - "(string, default: tanh)" - "The activation for cell output, `tanh` by default.") - .SetDefault("tanh") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddAttr("candidate_activation", - "(string, default: tanh)" - "The activation for candidate hidden state, " - "`tanh` by default.") - .SetDefault("tanh") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddAttr("Scale_data", - "Scale to be used for int8 input/output data." - "Only used with MKL-DNN INT8.") - .SetDefault(1.0f); - AddAttr("Shift_data", - "Shift to be used for int8 input/output data." - "Only used with MKL-DNN INT8.") - .SetDefault(0.0f); - AddAttr>("Scale_weights", - "Scale_weights to be used for int8 weights data." - "Only used with MKL-DNN INT8.") - .SetDefault({1.0f}); - AddAttr("force_fp32_output", - "(bool, default false) Force INT8 kernel output FP32, only " - "used in MKL-DNN INT8") - .SetDefault(false); - AddComment(R"DOC( -Fusion Long-Short Term Memory (LSTM) Operator. -This operator fuse the X into LSTM, more details can refer to LSTM op. -)DOC"); -} - -template -class FusionLSTMKernel : public framework::OpKernel { - public: -#define INIT_BASE_DEFINES \ - auto* x = ctx.Input("X"); \ - auto* h0 = ctx.Input("H0"); \ - auto* c0 = ctx.Input("C0"); \ - auto* wx = ctx.Input("WeightX"); \ - auto* wh = ctx.Input("WeightH"); \ - auto* bias = ctx.Input("Bias"); \ - auto* xx = ctx.Output("XX"); \ - auto* hidden_out = ctx.Output("Hidden"); \ - auto* cell_out = ctx.Output("Cell"); \ - bool is_reverse = ctx.Attr("is_reverse"); \ - bool use_peepholes = ctx.Attr("use_peepholes"); \ - auto x_dims = x->dims(); /* T x M*/ \ - auto wh_dims = wh->dims(); /* D x 4D*/ \ - const int M = x_dims[1]; \ - const int D = wh_dims[0]; \ - const int D4 = wh_dims[1] - -#define INIT_OTHER_DEFINES \ - const T* x_data = x->data(); \ - const T* wx_data = wx->data(); \ - const T* wh_data = wh->data(); \ - /* diagonal weight*/ \ - const T* wp_data = bias->data() + D4; \ - /* for peephole only*/ \ - T* checked_cell_data = nullptr; \ - auto place = ctx.GetPlace(); \ - if (use_peepholes) { \ - /* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \ - auto* checked_cell = ctx.Output("CheckedCell"); \ - checked_cell_data = checked_cell->mutable_data(place); \ - } \ - const phi::jit::lstm_attr_t attr( \ - D, \ - phi::jit::to_kerneltype(ctx.Attr("gate_activation")), \ - phi::jit::to_kerneltype(ctx.Attr("candidate_activation")), \ - phi::jit::to_kerneltype(ctx.Attr("cell_activation")), \ - use_peepholes); \ - phi::jit::lstm_t one_step; \ - one_step.wp = wp_data; \ - one_step.checked = checked_cell_data; \ - auto ComputeC1H1 = phi::jit::KernelFuncs, \ - phi::CPUPlace>::Cache() \ - .At(attr); \ - auto ComputeCtHt = phi::jit::KernelFuncs, \ - phi::CPUPlace>::Cache() \ - .At(attr) - -// Wh GEMM -#define GEMM_WH_ADDON(bs, prev, out) \ - blas.GEMM(CblasNoTrans, \ - CblasNoTrans, \ - bs, \ - D4, \ - D, \ - static_cast(1), \ - prev, \ - D, \ - wh_data, \ - D4, \ - static_cast(1), \ - out, \ - D4) - - void SeqCompute(const framework::ExecutionContext& ctx) const { - INIT_BASE_DEFINES; - INIT_OTHER_DEFINES; - auto x_lod = x->lod(); - const int total_T = static_cast(x_dims[0]); - const int N = static_cast(x_lod[0].size() - 1); - const T* h0_data = h0 ? h0->data() : nullptr; - const T* c0_data = c0 ? c0->data() : nullptr; - T* xx_data = xx->mutable_data(place); - T* h_out_data = hidden_out->mutable_data(place); - T* c_out_data = cell_out->mutable_data(place); - auto& dev_ctx = ctx.template device_context(); - auto blas = phi::funcs::GetBlas(dev_ctx); - - phi::funcs::FCFunctor fc; - fc(dev_ctx, total_T, D4, M, x_data, wx_data, xx_data, bias->data()); - - int xx_offset = D4; - int gate_offset = D; - if (is_reverse) { - const int offset = (total_T - 1) * D; - xx_data = xx_data + offset * 4; - h_out_data = h_out_data + offset; - c_out_data = c_out_data + offset; - xx_offset = -D4; - gate_offset = -D; - } - - for (int i = 0; i < N; ++i) { - int bid = is_reverse ? N - 1 - i : i; - int seq_len = static_cast(x_lod[0][bid + 1] - x_lod[0][bid]); - const T* prev_c_data = nullptr; - const T* prev_h_data = nullptr; - int tstart = 0; - if (h0_data) { - prev_h_data = h0_data + bid * D; - prev_c_data = c0_data + bid * D; - } else { - one_step.gates = xx_data; - one_step.ct = c_out_data; - one_step.ht = h_out_data; - ComputeC1H1(&one_step, &attr); - tstart = 1; - // move one step - prev_h_data = h_out_data; - prev_c_data = c_out_data; - xx_data = xx_data + xx_offset; - h_out_data = h_out_data + gate_offset; - c_out_data = c_out_data + gate_offset; - } - for (int step = tstart; step < seq_len; ++step) { - GEMM_WH_ADDON(1, prev_h_data, xx_data); - - one_step.gates = xx_data; - one_step.ct_1 = prev_c_data; - one_step.ct = c_out_data; - one_step.ht = h_out_data; - ComputeCtHt(&one_step, &attr); - // move one step - prev_h_data = h_out_data; - prev_c_data = c_out_data; - xx_data = xx_data + xx_offset; - h_out_data = h_out_data + gate_offset; - c_out_data = c_out_data + gate_offset; - } - } - } - - void BatchCompute(const framework::ExecutionContext& ctx) const { - INIT_BASE_DEFINES; - if (x->lod()[0].size() == 2) { - xx->Resize({x_dims[0], D4}); - SeqCompute(ctx); - return; - } - INIT_OTHER_DEFINES; - - auto* reordered_h0 = ctx.Output("ReorderedH0"); - auto* reordered_c0 = ctx.Output("ReorderedC0"); - auto* batched_input = ctx.Output("BatchedInput"); - auto* batched_c_out = ctx.Output("BatchedCell"); - auto* batched_h_out = ctx.Output("BatchedHidden"); - T* xx_data = xx->mutable_data(place); - T* batched_input_data = batched_input->mutable_data(place); - T* batched_c_out_data = batched_c_out->mutable_data(place); - T* batched_h_out_data = batched_h_out->mutable_data(place); - hidden_out->mutable_data(place); - cell_out->mutable_data(place); - - phi::funcs::LoDTensor2BatchFunctor to_batch; - auto& dev_ctx = ctx.template device_context(); - auto blas = phi::funcs::GetBlas(dev_ctx); - phi::funcs::FCFunctor fc; - if (M > D4) { - fc(dev_ctx, x_dims[0], D4, M, x_data, wx_data, xx_data, bias->data()); - to_batch(dev_ctx, *xx, batched_input, true, is_reverse); - } else { - to_batch(dev_ctx, *x, xx, true, is_reverse); - batched_input->set_lod(xx->lod()); - fc(dev_ctx, - x_dims[0], - D4, - M, - xx_data, - wx_data, - batched_input_data, - bias->data()); - } - - auto batched_lod = batched_input->lod(); - const auto& seq_order = batched_lod[2]; - const int max_bs = static_cast(seq_order.size()); - reordered_h0->Resize({max_bs, D}); - reordered_c0->Resize({max_bs, D}); - - int tstart = 0; - T* prev_h_data = nullptr; - T* prev_c_data = nullptr; - if (h0) { - // reorder h0, c0 - T* reordered_h0_data = reordered_h0->mutable_data(place); - T* reordered_c0_data = reordered_c0->mutable_data(place); - const T* h0_data = h0->data(); - const T* c0_data = c0->data(); - prev_h_data = reordered_h0_data; - prev_c_data = reordered_c0_data; - size_t sz = D; - for (int i = 0; i < max_bs; ++i) { - blas.VCOPY(sz, h0_data + seq_order[i] * D, reordered_h0_data); - blas.VCOPY(sz, c0_data + seq_order[i] * D, reordered_c0_data); - reordered_h0_data += D; - reordered_c0_data += D; - } - } else { - // compute without h0, c0 - T* cur_in_data = batched_input_data; - T* cur_h_out_data = batched_h_out_data; - T* cur_c_out_data = batched_c_out_data; - for (int i = 0; i < max_bs; ++i) { - one_step.gates = cur_in_data; - one_step.ct = cur_c_out_data; - one_step.ht = cur_h_out_data; - ComputeC1H1(&one_step, &attr); - - cur_in_data += D4; - cur_c_out_data += D; - cur_h_out_data += D; - } - tstart = 1; - prev_h_data = batched_h_out_data; - prev_c_data = batched_c_out_data; - } - - // compute kernel part - const auto& batch_starts = batched_lod[0]; - const int max_seq_len = static_cast(batch_starts.size() - 1); - const int offset = tstart * max_bs * D; - batched_input_data = batched_input_data + offset * 4; - batched_h_out_data = batched_h_out_data + offset; - batched_c_out_data = batched_c_out_data + offset; - for (int step = tstart; step < max_seq_len; ++step) { - const int cur_bs = - static_cast(batch_starts[step + 1] - batch_starts[step]); - GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); - T* cur_in_data = batched_input_data; - T* cur_prev_c_data = prev_c_data; - T* cur_c_out_data = batched_c_out_data; - T* cur_h_out_data = batched_h_out_data; - for (int i = 0; i < cur_bs; ++i) { - one_step.gates = cur_in_data; - one_step.ct_1 = cur_prev_c_data; - one_step.ct = cur_c_out_data; - one_step.ht = cur_h_out_data; - ComputeCtHt(&one_step, &attr); - - // move one batch - cur_in_data += D4; - cur_prev_c_data += D; - cur_c_out_data += D; - cur_h_out_data += D; - } - // move one step - prev_c_data = batched_c_out_data; - prev_h_data = batched_h_out_data; - batched_c_out_data = cur_c_out_data; - batched_h_out_data = cur_h_out_data; - batched_input_data = cur_in_data; - } - - phi::funcs::Batch2LoDTensorFunctor to_seq; - batched_h_out->set_lod(batched_lod); - to_seq(dev_ctx, *batched_h_out, hidden_out); - batched_c_out->set_lod(batched_lod); - to_seq(dev_ctx, *batched_c_out, cell_out); - } - - void Compute(const framework::ExecutionContext& ctx) const override { - if (ctx.Attr("use_seq")) { - SeqCompute(ctx); - } else { - BatchCompute(ctx); - } - } - -#undef GEMM_WH_ADDON -#undef INIT_OTHER_DEFINES -#undef INIT_BASE_DEFINES -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(fusion_lstm, ops::FusionLSTMOp, ops::FusionLSTMOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - fusion_lstm, CPU, ALL_LAYOUT, ops::FusionLSTMKernel, float, double) {} diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.h b/paddle/fluid/operators/fused/fusion_lstm_op.h deleted file mode 100644 index c62060d7c225c..0000000000000 --- a/paddle/fluid/operators/fused/fusion_lstm_op.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class FusionLSTMOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override; -}; - -class FusionLSTMOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc b/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc deleted file mode 100644 index 05c517fd9ac09..0000000000000 --- a/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc +++ /dev/null @@ -1,476 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/operators/fused/fusion_lstm_op.h" -#include "paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h" -#include "paddle/phi/core/expect.h" - -namespace paddle { -namespace operators { - -using phi::OneDNNContext; -using phi::funcs::OneDNNGetDataType; -using phi::funcs::OneDNNMemDesc; -using phi::funcs::RNNReorderType; -using OneDNNMemoryFormat = dnnl::memory::format_tag; - -template -class LSTMMKLDNNHandler - : public RNNMKLDNNHandler { - public: - LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, - const OneDNNContext& dev_ctx, - const dnnl::engine onednn_engine, - phi::Place cpu_place UNUSED, - const phi::DenseTensor* input, - const phi::DenseTensor* weight_h, - const phi::DenseTensor* h0, - const phi::DenseTensor* c0 UNUSED, - const bool is_reverse, - const int64_t N, - const int64_t Ti, - const int64_t IC, - const int64_t OC, - const std::string& unique_name UNUSED) - : RNNMKLDNNHandler( - ctx, - dev_ctx, - onednn_engine, - ctx.GetPlace(), - input, - weight_h, - h0, - is_reverse, - N, - Ti, - IC, - OC, - 4, - ctx.InputName("X") + ctx.InputName("WeightH")) { - if (unlikely(!this->isCached())) { - const bool is_INT8 = std::is_same::value; - const bool use_peepholes = ctx.Attr("use_peepholes"); - // oneDNN kernel has hardcoded activation functions - PADDLE_ENFORCE_EQ( - ctx.Attr("gate_activation"), - "sigmoid", - phi::errors::Unimplemented("oneDNN fusion_lstm supports only " - "sigmoid as a gate activation.")); - PADDLE_ENFORCE_EQ( - ctx.Attr("cell_activation"), - "tanh", - phi::errors::Unimplemented( - "oneDNN fusion_lstm supports only tanh as a cell activation.")); - PADDLE_ENFORCE_EQ( - ctx.Attr("candidate_activation"), - "tanh", - phi::errors::Unimplemented( - "oneDNN fusion_lstm supports only tanh a candidate activation.")); - - // Weights for int8 kernel are of a type s8 - const auto weights_dt = - is_INT8 ? dnnl::memory::data_type::s8 : OneDNNGetDataType(); - - // oneDNN RNN dimensions - const int64_t D = 1; // Directions - const int64_t L = 1; // Layers (PP supports only 1 stacked layer) - const int64_t G = 4; // Number of Gates, 4 for LSTM - - // Create memory descriptors - auto input_md = OneDNNMemDesc( - {Ti, N, IC}, OneDNNGetDataType(), OneDNNMemoryFormat::tnc); - auto weight_x_md = - OneDNNMemDesc({L, D, IC, G, OC}, weights_dt, OneDNNMemoryFormat::any); - auto weight_h_md = - OneDNNMemDesc({L, D, OC, G, OC}, weights_dt, OneDNNMemoryFormat::any); - auto bias_md = OneDNNMemDesc( - {L, D, G, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::ldgo); - auto hidden_md = OneDNNMemDesc( - {Ti, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); - - auto h0_md = OneDNNMemDesc( - {L, D, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); - auto c0_md = OneDNNMemDesc( - {L, D, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); - - // Create LSTM oneDNN primitive - const auto direction = - is_reverse ? dnnl::rnn_direction::unidirectional_right2left - : dnnl::rnn_direction::unidirectional_left2right; - if (!use_peepholes) { - this->AcquireForwardPrimitiveDescriptor( - this->attr_, - dnnl::prop_kind::forward_inference, - direction, - input_md, - h0_md, - c0_md, - weight_x_md, - weight_h_md, - bias_md, - hidden_md, - dnnl::memory::desc(), - dnnl::memory::desc()); - } else { - auto weight_peephole_md = OneDNNMemDesc({L, D, 3, OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldgo); - this->AcquireForwardPrimitiveDescriptor( - this->attr_, - dnnl::prop_kind::forward_inference, - direction, - input_md, - h0_md, - c0_md, - weight_x_md, - weight_h_md, - weight_peephole_md, - bias_md, - hidden_md, - dnnl::memory::desc(), - dnnl::memory::desc()); - } - } - } - - // PaddlePaddle has different order of weights than oneDNN, so a reorder is - // needed - // PaddlePaddle: {c, i, f, o} - // oneDNN: {i, f, c, o} - template - void ReorderGates(U* weights, int64_t I) { - size_t inner_block_size = this->OC; - size_t block_size = inner_block_size * this->G; - for (size_t i = 0; i < (size_t)I; ++i) { // NOLINT - size_t offset = i * block_size; - - U* base_pos = weights + offset; - std::swap_ranges(base_pos, - base_pos + inner_block_size, - base_pos + inner_block_size); // c <-> i - std::swap_ranges(base_pos + inner_block_size, - base_pos + 2 * inner_block_size, - base_pos + 2 * inner_block_size); // c <-> f - } - } - - template - std::shared_ptr AcquireWeightXMemory( - const phi::DenseTensor* weight_x) { - const std::string wx_key = this->memory_key_ + "@weight_x"; - auto memory_p = - std::static_pointer_cast(this->dev_ctx_.GetBlob(wx_key)); - - if (!memory_p) { - auto user_md = OneDNNMemDesc({1, 1, this->IC, this->G, this->OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldigo); - auto user_memory = dnnl::memory(user_md, this->engine_); - - auto* weight_x_data = reinterpret_cast(user_memory.get_data_handle()); - memcpy(weight_x_data, - weight_x->data(), - sizeof(U) * this->IC * this->G * this->OC); - - ReorderGates(weight_x_data, this->IC); - - memory_p = std::make_shared( - this->fwd_pd_->weights_layer_desc(), this->engine_); - - auto& astream = OneDNNContext::tls().get_stream(); - dnnl::reorder(user_memory, *memory_p, this->attr_) - .execute(astream, user_memory, *memory_p); - - this->dev_ctx_.SetBlob(wx_key, memory_p); - } - return memory_p; - } - - template - std::shared_ptr AcquireWeightHMemory( - const phi::DenseTensor* weight_h) { - const std::string wh_key = this->memory_key_ + "@weight_h"; - auto memory_p = - std::static_pointer_cast(this->dev_ctx_.GetBlob(wh_key)); - - if (!memory_p) { - auto user_md = OneDNNMemDesc({1, 1, this->OC, this->G, this->OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldigo); - auto user_memory = dnnl::memory(user_md, this->engine_); - - auto* weight_h_data = reinterpret_cast(user_memory.get_data_handle()); - memcpy(weight_h_data, - weight_h->data(), - sizeof(U) * this->OC * this->G * this->OC); - - ReorderGates(weight_h_data, this->OC); - - memory_p = std::make_shared( - this->fwd_pd_->weights_iter_desc(), this->engine_); - - auto& astream = OneDNNContext::tls().get_stream(); - dnnl::reorder(user_memory, *memory_p, this->attr_) - .execute(astream, user_memory, *memory_p); - - this->dev_ctx_.SetBlob(wh_key, memory_p); - } - return memory_p; - } - - std::shared_ptr AcquireBiasMemory( - const phi::DenseTensor* bias) { - const std::string bias_key = this->memory_key_ + "@bias"; - auto memory_p = std::static_pointer_cast( - this->dev_ctx_.GetBlob(bias_key)); - - if (!memory_p) { - memory_p = std::make_shared(this->fwd_pd_->bias_desc(), - this->engine_); - auto* bias_data = reinterpret_cast(memory_p->get_data_handle()); - if (bias) { - const float* user_bias_data = - bias->data(); // Bias in oneDNN is always float - - memcpy(bias_data, user_bias_data, sizeof(float) * this->G * this->OC); - - ReorderGates(bias_data, 1); - } else { - // oneDNN always need bias memory, if it's not provided in PP, let - // oneDNN allocate memory and set it to 0 - memset(bias_data, 0, sizeof(float) * this->G * this->OC); - } - - this->dev_ctx_.SetBlob(bias_key, memory_p); - } - return memory_p; - } - - std::shared_ptr AcquirePeepholeWeights( - const phi::DenseTensor* bias) { - const std::string peepholes_key = this->memory_key_ + "@peepholes_weights"; - auto memory_p = std::static_pointer_cast( - this->dev_ctx_.GetBlob(peepholes_key)); - - if (!memory_p) { - auto user_md = OneDNNMemDesc({1, 1, 3, this->OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldgo); - auto user_memory = dnnl::memory(user_md, this->engine_); - memory_p = std::make_shared( - this->fwd_pd_->weights_peephole_desc(), this->engine_); - auto* peephole_weights_data = - reinterpret_cast(memory_p->get_data_handle()); - - const float* user_bias_data = - bias->data(); // Bias in oneDNN is always float - memcpy(peephole_weights_data, - user_bias_data + 4 * this->OC, - sizeof(float) * 3 * this->OC); - - this->dev_ctx_.SetBlob(peepholes_key, memory_p); - } - return memory_p; - } - - std::shared_ptr AcquireC0Memory(const phi::DenseTensor* c0) { - const std::string c0_key = this->memory_key_ + "@c0"; - auto memory_p = - std::static_pointer_cast(this->dev_ctx_.GetBlob(c0_key)); - - if (!memory_p) { - auto user_c0_memory = dnnl::memory(); - if (c0) { - user_c0_memory = - dnnl::memory({{1, 1, this->N, this->OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldnc}, - this->engine_, - phi::funcs::to_void_cast(c0->data())); - } else { - user_c0_memory = dnnl::memory({{1, 1, this->N, this->OC}, - OneDNNGetDataType(), - OneDNNMemoryFormat::ldnc}, - this->engine_); - memset(user_c0_memory.get_data_handle(), - 0, - sizeof(float) * this->N * this->OC); - } - memory_p = std::make_shared( - this->fwd_pd_->src_iter_c_desc(), this->engine_); - - auto& astream = OneDNNContext::tls().get_stream(); - dnnl::reorder(user_c0_memory, *memory_p) - .execute(astream, user_c0_memory, *memory_p); - - this->dev_ctx_.SetBlob(c0_key, memory_p); - } - return memory_p; - } -}; - -template -class FusionLSTMMKLDNNKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const bool is_bf16 = std::is_same::value; - const bool force_fp32_output = ctx.Attr("force_fp32_output"); - - // BF16 does not support force output - if (!is_bf16 && force_fp32_output) { // NOLINT - RunKernel(ctx); - } else { - RunKernel(ctx); - } - } - - template - void RunKernel(const framework::ExecutionContext& ctx) const { - auto& dev_ctx = ctx.template device_context(); - const auto& onednn_engine = dev_ctx.GetEngine(); - - // Get Tensors - const auto* input = ctx.Input("X"); - const auto* h0 = ctx.Input("H0"); - const auto* c0 = ctx.Input("C0"); - const auto* weight_x = ctx.Input("WeightX"); - const auto* weight_h = ctx.Input("WeightH"); - const auto* bias = ctx.Input("Bias"); - auto* hidden = ctx.Output("Hidden"); - auto x_dims = input->dims(); - auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) - ? common::flatten_to_2d(x_dims, 1) - : x_dims; - // Get attributes - const bool is_reverse = ctx.Attr("is_reverse"); - const bool use_peepholes = ctx.Attr("use_peepholes"); - - // Get tensor dimensions - const auto x_mat_dims_vec = common::vectorize(x_mat_dims); - const auto weight_h_dims = common::vectorize(weight_h->dims()); - const auto& input_lod = input->lod()[0]; - - // Calculate RNN dimensions - const int64_t N = input_lod.size() - 1; // Number of sentences (batches) - const int64_t Ti = // Max length of the sentence in a batch - [&input_lod]() { - size_t res = 0; - for (size_t i = 0; i < (input_lod.size() - 1); ++i) { - res = std::max(res, input_lod[i + 1] - input_lod[i]); - } - return res; - }(); - const int64_t IC = x_mat_dims_vec[1]; // Input channels - const int64_t OC = weight_h_dims[0]; // Output channels - - LSTMMKLDNNHandler handler( - ctx, - dev_ctx, - onednn_engine, - ctx.GetPlace(), - input, - weight_h, - h0, - c0, - is_reverse, - N, - Ti, - IC, - OC, - ctx.InputName("X") + ctx.InputName("WeightH")); - - auto input_memory_p = - handler.AcquireInputMemoryWithReorder(input, is_reverse); - auto c0_memory_p = handler.AcquireC0Memory(c0); - - std::shared_ptr h0_memory_p, weight_h_memory_p, - weight_x_memory_p; - - if (weight_h->dtype() == phi::DataType::FLOAT32) { - h0_memory_p = handler.template AcquireH0Memory(h0); - weight_x_memory_p = - handler.template AcquireWeightXMemory(weight_x); - weight_h_memory_p = - handler.template AcquireWeightHMemory(weight_h); - } else if (weight_h->dtype() == phi::DataType::BFLOAT16) { - h0_memory_p = handler.template AcquireH0Memory(h0); - weight_x_memory_p = - handler.template AcquireWeightXMemory(weight_x); - weight_h_memory_p = - handler.template AcquireWeightHMemory(weight_h); - } else { - h0_memory_p = handler.template AcquireH0Memory(h0); - weight_x_memory_p = - handler.template AcquireWeightXMemory(weight_x); - weight_h_memory_p = - handler.template AcquireWeightHMemory(weight_h); - } - - auto bias_memory_p = handler.AcquireBiasMemory(bias); - auto hidden_onednn_memory_p = handler.AcquireOutputMemory(); - - std::unordered_map lstm_args = { - {DNNL_ARG_SRC_LAYER, *input_memory_p}, - {DNNL_ARG_SRC_ITER, *h0_memory_p}, - {DNNL_ARG_SRC_ITER_C, *c0_memory_p}, - {DNNL_ARG_WEIGHTS_LAYER, *weight_x_memory_p}, - {DNNL_ARG_WEIGHTS_ITER, *weight_h_memory_p}, - {DNNL_ARG_BIAS, *bias_memory_p}, - {DNNL_ARG_DST_LAYER, *hidden_onednn_memory_p}}; - - if (use_peepholes) { - auto peephole_weight_p = handler.AcquirePeepholeWeights(bias); - std::pair peepholes_weights(DNNL_ARG_WEIGHTS_PEEPHOLE, - *peephole_weight_p); - lstm_args.insert(peepholes_weights); - } - - auto lstm_forward_p = handler.AcquireForwardPrimitive(); - - auto& astream = OneDNNContext::tls().get_stream(); - lstm_forward_p->execute(astream, lstm_args); - astream.wait(); - - auto* hidden_onednn_data = hidden_onednn_memory_p->get_data_handle(); - auto* hidden_data = - phi::funcs::to_void_cast(hidden->mutable_data(ctx.GetPlace())); - if (handler.is_NTC()) { - handler.reorderRNNdata(hidden_onednn_data, - hidden_data, - input_lod, - is_reverse, - RNNReorderType::NTC_PP); - } else { - handler.reorderRNNdata(hidden_onednn_data, - hidden_data, - input_lod, - is_reverse, - RNNReorderType::TNC_PP); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(fusion_lstm, - OneDNN, - ONEDNN, - ops::FusionLSTMMKLDNNKernel, - float, - uint8_t, - phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 30f1aff92a256..9f8b8d0744ffe 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/common/float16.h" -namespace paddle { -namespace operators { +namespace paddle::operators { // Shape of bitmask static phi::DDim GetBitmaskDims(std::vector out_shape) { @@ -450,8 +449,7 @@ class ResNetUnitOpInferVarType } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR(resnet_unit, diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc index 851c448865363..14021c1a2f659 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc @@ -13,8 +13,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/generator.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class SoftmaxMaskFuseUpperTriangleOp : public framework::OperatorWithKernel { public: @@ -89,8 +88,7 @@ class SoftmaxMaskFuseUpperTriangleGradOpMaker } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index bcd27e8186b7f..40680dbf00829 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -76,7 +76,7 @@ that were saved using the SaveCombine operator. } // namespace operators } // namespace paddle -namespace ops = paddle::operators; +namespace ops = paddle::operators; // NOLINT REGISTER_OPERATOR(load_combine, ops::LoadCombineOp, diff --git a/paddle/fluid/operators/logspace_op.cc b/paddle/fluid/operators/logspace_op.cc index 171ee209ebd0e..4088f4ba0f291 100644 --- a/paddle/fluid/operators/logspace_op.cc +++ b/paddle/fluid/operators/logspace_op.cc @@ -20,8 +20,7 @@ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class LogspaceOp : public framework::OperatorWithKernel { public: @@ -63,8 +62,7 @@ class LogspaceOpMaker : public framework::OpProtoAndCheckerMaker { )DOC"); } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; DECLARE_INFER_SHAPE_FUNCTOR(logspace, diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc deleted file mode 100644 index ac5cb81c060f0..0000000000000 --- a/paddle/fluid/operators/lstm_op.cc +++ /dev/null @@ -1,365 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/lstm_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class LSTMOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "LSTM"); - OP_INOUT_CHECK(ctx->HasInput("Weight"), "Input", "Weight", "LSTM"); - OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "LSTM"); - - OP_INOUT_CHECK(ctx->HasOutput("Hidden"), "Output", "Hidden", "LSTM"); - OP_INOUT_CHECK(ctx->HasOutput("Cell"), "Output", "Cell", "LSTM"); - - bool is_test = ctx->Attrs().Get("is_test"); - - if (!is_test) { - OP_INOUT_CHECK( - ctx->HasOutput("BatchGate"), "Output", "BatchGate", "LSTM"); - OP_INOUT_CHECK(ctx->HasOutput("BatchCellPreAct"), - "Output", - "BatchCellPreAct", - "LSTM"); - } - auto in_dims = ctx->GetInputDim("Input"); - PADDLE_ENFORCE_EQ( - in_dims.size(), - 2, - phi::errors::InvalidArgument( - "Input(X)'s rank must be 2, but received %d.", in_dims.size())); - - if (ctx->HasInput("H0")) { - PADDLE_ENFORCE_EQ( - ctx->HasInput("C0"), - true, - phi::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " - "should not be null at the same time.")); - auto h_dims = ctx->GetInputDim("H0"); - auto c_dims = ctx->GetInputDim("C0"); - PADDLE_ENFORCE_EQ(h_dims, - c_dims, - phi::errors::InvalidArgument( - "The dimension of Input(H0) and Input(C0) should " - "be the same, but received [%s] (H0) vs [%s] (C0).", - h_dims, - c_dims)); - } - - int frame_size = static_cast(in_dims[1] / 4); - auto w_dims = ctx->GetInputDim("Weight"); - PADDLE_ENFORCE_EQ( - w_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(Weight) should be 2, but received %d.", - w_dims.size())); - PADDLE_ENFORCE_EQ(w_dims[0], - frame_size, - phi::errors::InvalidArgument( - "The first dimension of Input(Weight) should be %d, " - "but received %d.", - frame_size, - w_dims[0])); - PADDLE_ENFORCE_EQ(w_dims[1], - 4 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(Weight) should be 4 * " - "%d, but received %d.", - frame_size, - w_dims[1])); - - auto b_dims = ctx->GetInputDim("Bias"); - PADDLE_ENFORCE_EQ( - b_dims.size(), - 2, - phi::errors::InvalidArgument( - "The rank of Input(Bias) should be 2, but received %d.", - b_dims.size())); - PADDLE_ENFORCE_EQ( - b_dims[0], - 1, - phi::errors::InvalidArgument( - "The first dimension of Input(Bias) should be 1, but received %d.", - b_dims[0])); - - if (ctx->Attrs().Get("use_peepholes")) { - PADDLE_ENFORCE_EQ( - b_dims[1], - 7 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(Bias) should be 7 * %d if enable " - "peepholes connection, but received %d.", - frame_size, - b_dims[1])); - } else { - PADDLE_ENFORCE_EQ( - b_dims[1], - 4 * frame_size, - phi::errors::InvalidArgument( - "The second dimension of Input(Bias) should be 4 * %d if disable " - "peepholes connection, but received %d.", - frame_size, - b_dims[1])); - } - - phi::DDim out_dims({in_dims[0], frame_size}); - ctx->SetOutputDim("Hidden", out_dims); - ctx->SetOutputDim("Cell", out_dims); - if (!is_test) { - ctx->SetOutputDim("BatchGate", in_dims); - ctx->SetOutputDim("BatchCellPreAct", out_dims); - } - ctx->ShareLoD("Input", "Hidden"); - ctx->ShareLoD("Input", "Cell"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.device_context().GetPlace()); - } -}; - -class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "Input", - "(phi::DenseTensor) the first input is a phi::DenseTensor, which " - "support variable-time length input sequence. The underlying tensor in " - "this phi::DenseTensor is a matrix with shape (T X 4D), where T is the " - "total time steps in this mini-batch, D is the hidden size."); - AddInput("H0", - "(Tensor, optional) the initial hidden state is an optional " - "input. This is a tensor with shape (N x D), where N is the " - "batch size and D is the hidden size.") - .AsDispensable(); - AddInput("C0", - "(Tensor, optional) the initial cell state is an optional " - "input. This is a tensor with shape (N x D), where N is the " - "batch size. `H0` and `C0` can be NULL but only at the same time.") - .AsDispensable(); - AddInput("Weight", - "(Tensor) the learnable hidden-hidden weights." - " - The shape is (D x 4D), where D is the hidden size. " - " - Weight = {W_ch, W_ih, W_fh, W_oh}"); - AddInput("Bias", - "(Tensor) the learnable weights, which contains two parts: " - "input-hidden bias weight and peephole connections weight if " - "setting `use_peepholes` True. " - "1. `use_peepholes = False` " - " - The shape is (1 x 4D). " - " - Bias = {b_c, b_i, b_f, b_o}." - "2. `use_peepholes = True` " - " - The shape is (1 x 7D). " - " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."); - AddOutput("Hidden", - "(phi::DenseTensor) the hidden state of LSTM operator. " - "The shape is (T x D), and lod is the same with the `Input`."); - AddOutput("Cell", - "(phi::DenseTensor) the cell state of LSTM operator. " - "The shape is (T x D), and lod is the same with the `Input`."); - AddOutput( - "BatchGate", - "(phi::DenseTensor) This phi::DenseTensor contains input gate, forget " - "gate " - "and output gate after the nonlinear computation. This " - "phi::DenseTensor has the same shape as the reorganized input, which " - "is also be called batch input. The LoD size is 2. The first " - "LoD is the batch offsets and the second LoD contains the " - "indexes, which denote the position of reorganized sequence " - "in the raw input.") - .AsIntermediate() - .AsExtra(); - AddOutput("BatchCellPreAct", - "(phi::DenseTensor) This phi::DenseTensor is obtained in the " - "forward and used " - "in the backward.") - .AsIntermediate() - .AsExtra(); - AddAttr("use_peepholes", - "(bool, default: True) " - "whether to enable diagonal/peephole connections.") - .SetDefault(true); - AddAttr("is_reverse", - "(bool, default: False) " - "whether to compute reversed LSTM.") - .SetDefault(false); - AddAttr("is_test", "True if in test phase.").SetDefault(false); - AddAttr( - "gate_activation", - "(string, default: sigmoid)" - "The activation for input gate, forget gate and output " - "gate, `sigmoid` by default.") - .SetDefault("sigmoid") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddAttr("cell_activation", - "(string, default: tanh)" - "The activation for cell output, `tanh` by default.") - .SetDefault("tanh") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddAttr("candidate_activation", - "(string, default: tanh)" - "The activation for candidate hidden state, " - "`tanh` by default.") - .SetDefault("tanh") - .InEnum({"sigmoid", "tanh", "relu", "identity"}); - AddComment(R"DOC( -Long-Short Term Memory (LSTM) Operator. - -The default implementation is diagonal/peephole connection -(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows: - -$$ i_t = \\sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$ - -$$ f_t = \\sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) $$ - -$$ \\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) $$ - -$$ o_t = \\sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) $$ - -$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$ - -$$ h_t = o_t \\odot act_h(c_t) $$ - -- W terms denote weight matrices (e.g. $W_{xi}$ is the matrix - of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$ - are diagonal weight matrices for peephole connections. In our implementation, - we use vectors to represent these diagonal weight matrices. -- The b terms denote bias vectors ($b_i$ is the input gate bias vector). -- $\sigma$ is the non-line activations, such as logistic sigmoid function. -- $i, f, o$ and $c$ are the input gate, forget gate, output gate, - and cell activation vectors, respectively, all of which have the same size as - the cell output activation vector $h$. -- The $\odot$ is the element-wise product of the vectors. -- $act_g$ and $act_h$ are the cell input and cell output activation functions - and `tanh` is usually used for them. -- $\tilde{c_t}$ is also called candidate hidden state, - which is computed based on the current input and the previous hidden state. - -Set `use_peepholes` False to disable peephole connection. The formula -is omitted here, please refer to the paper -http://www.bioinf.jku.at/publications/older/2604.pdf for details. - -Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$ -operations on the input $x_{t}$ are NOT included in this operator. -Users can choose to use fully-connect operator before LSTM operator. - -)DOC"); - } -}; - -class LSTMGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "LSTM@Grad"); - OP_INOUT_CHECK(ctx->HasInput("Hidden"), "Input", "Hidden", "LSTM@Grad"); - OP_INOUT_CHECK(ctx->HasInput("Cell"), "Input", "Cell", "LSTM@Grad"); - OP_INOUT_CHECK(ctx->HasInput("Weight"), "Input", "Weight", "LSTM@Grad"); - OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", "LSTM@Grad"); - - OP_INOUT_CHECK( - ctx->HasInput("BatchGate"), "Input", "BatchGate", "LSTM@Grad"); - OP_INOUT_CHECK(ctx->HasInput("BatchCellPreAct"), - "Input", - "BatchCellPreAct", - "LSTM@Grad"); - - auto SetOutGradDim = [&ctx](const std::string& name) { - auto g_name = framework::GradVarName(name); - if (ctx->HasOutput(g_name)) - ctx->SetOutputDim(g_name, ctx->GetInputDim(name)); - }; - - SetOutGradDim("Input"); - SetOutGradDim("Weight"); - SetOutGradDim("Bias"); - SetOutGradDim("H0"); - SetOutGradDim("C0"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.device_context().GetPlace()); - } -}; - -template -class LSTMGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("lstm_grad"); - op->SetAttrMap(this->Attrs()); - op->SetInput("Input", this->Input("Input")); - op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input")); - - if (this->HasInput("H0")) { - op->SetInput("H0", this->Input("H0")); - op->SetOutput(framework::GradVarName("H0"), this->InputGrad("H0")); - } - - if (this->HasInput("C0")) { - op->SetInput("C0", this->Input("C0")); - op->SetOutput(framework::GradVarName("C0"), this->InputGrad("C0")); - } - - op->SetInput("Weight", this->Input("Weight")); - op->SetOutput(framework::GradVarName("Weight"), this->InputGrad("Weight")); - - op->SetInput("Bias", this->Input("Bias")); - op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); - - op->SetInput("Cell", this->Output("Cell")); - - op->SetInput("Hidden", this->Output("Hidden")); - op->SetInput(framework::GradVarName("Hidden"), this->OutputGrad("Hidden")); - - op->SetInput("BatchGate", this->Output("BatchGate")); - op->SetInput("BatchCellPreAct", this->Output("BatchCellPreAct")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(lstm, - ops::LSTMOp, - ops::LSTMOpMaker, - ops::LSTMGradOpMaker, - ops::LSTMGradOpMaker); -REGISTER_OPERATOR(lstm_grad, ops::LSTMGradOp); - -PD_REGISTER_STRUCT_KERNEL( - lstm, CPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL( - lstm_grad, CPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {} diff --git a/paddle/fluid/operators/lstm_op.cu.cc b/paddle/fluid/operators/lstm_op.cu.cc deleted file mode 100644 index b06521088a95a..0000000000000 --- a/paddle/fluid/operators/lstm_op.cu.cc +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/lstm_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL( - lstm, GPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL( - lstm_grad, GPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {} diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h deleted file mode 100644 index 9eaba45a2d597..0000000000000 --- a/paddle/fluid/operators/lstm_op.h +++ /dev/null @@ -1,444 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/phi/kernels/funcs/detail/activation_functions.h" -#include "paddle/phi/kernels/funcs/lstm_compute.h" -#include "paddle/phi/kernels/funcs/sequence2batch.h" - -namespace paddle { -namespace operators { - -template -inline void ReorderInitState(const DeviceContext& ctx, - const phi::DenseTensor& src, - phi::Vector index_lod, - phi::DenseTensor* dst, - bool indexed_src) { - phi::funcs::CopyMatrixRowsFunctor row_shuffle; - dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index_lod, dst, indexed_src); -} - -template -class LSTMKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - bool is_test = ctx.Attr("is_test"); - - auto* input = ctx.Input("Input"); - auto* weight = ctx.Input("Weight"); - auto* bias = ctx.Input("Bias"); - - auto* hidden_t0 = ctx.Input("H0"); - auto* cell_t0 = ctx.Input("C0"); - - phi::DenseTensor* batch_gate = nullptr; - phi::DenseTensor batch_gate_temp; - if (is_test) { - batch_gate = &batch_gate_temp; - batch_gate->Resize(input->dims()); - } else { - batch_gate = ctx.Output("BatchGate"); - } - batch_gate->mutable_data(ctx.GetPlace()); - auto* hidden_out = ctx.Output("Hidden"); - hidden_out->mutable_data(ctx.GetPlace()); - auto* cell_out = ctx.Output("Cell"); - cell_out->mutable_data(ctx.GetPlace()); - - bool is_reverse = ctx.Attr("is_reverse"); - phi::funcs::LoDTensor2BatchFunctor to_batch; - auto& device_ctx = ctx.template device_context(); - to_batch(device_ctx, *input, batch_gate, true, is_reverse); - - auto in_dims = input->dims(); - int frame_size = static_cast(in_dims[1] / 4); - phi::DDim dims({in_dims[0], frame_size}); - - if (bias) { - phi::DenseTensor b = *bias; - b.Resize({bias->numel(), 1}); - phi::DenseTensor gate_bias = b.Slice(0, 4 * frame_size); - phi::funcs::RowwiseAdd add_bias; - add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); - } - - phi::funcs::LstmMetaValue lstm_value; - if (bias && ctx.Attr("use_peepholes")) { - T* bias_data = const_cast(bias->data()); - // the code style in LstmMetaValue will be updated later. - - lstm_value.check_ig = bias_data + 4 * frame_size; - lstm_value.check_fg = lstm_value.check_ig + frame_size; - lstm_value.check_og = lstm_value.check_fg + frame_size; - } else { - lstm_value.check_ig = nullptr; - lstm_value.check_fg = nullptr; - lstm_value.check_og = nullptr; - } - lstm_value.prev_state_value = nullptr; - phi::DenseTensor ordered_c0; - - phi::Vector order(batch_gate->lod()[2]); - - if (cell_t0) { - // Since the batch computing for LSTM reorders the input sequence - // according to their length. The initialized cell state also needs - // to reorder. - ReorderInitState( - device_ctx, *cell_t0, order, &ordered_c0, true); - lstm_value.prev_state_value = ordered_c0.data(); - } - - // Use the local variable as here. - phi::DenseTensor batch_hidden, batch_cell, batch_cell_pre_act_temp; - phi::DenseTensor* batch_cell_pre_act; - if (is_test) { - batch_cell_pre_act = &batch_cell_pre_act_temp; - } else { - batch_cell_pre_act = ctx.Output("BatchCellPreAct"); - } - batch_hidden.mutable_data(dims, ctx.GetPlace()); - batch_cell.mutable_data(dims, ctx.GetPlace()); - batch_cell_pre_act->mutable_data(dims, ctx.GetPlace()); - - auto batch_starts = batch_gate->lod()[0]; - size_t num_batch = batch_starts.size() - 1; - auto gate_act = phi::funcs::detail::GetActivationType( - ctx.Attr("gate_activation")); - auto cell_act = phi::funcs::detail::GetActivationType( - ctx.Attr("cell_activation")); - auto cand_act = phi::funcs::detail::GetActivationType( - ctx.Attr("candidate_activation")); - - auto blas = phi::funcs::GetBlas(device_ctx); - for (size_t n = 0; n < num_batch; n++) { - int bstart = static_cast(batch_starts[n]); - int bend = static_cast(batch_starts[n + 1]); - - phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); - phi::DenseTensor out_t = batch_hidden.Slice(bstart, bend); - phi::DenseTensor cell_t = batch_cell.Slice(bstart, bend); - phi::DenseTensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend); - - int cur_batch_size = bend - bstart; - - if (n > 0) { - int pre_h_start = static_cast(batch_starts[n - 1]); - int pre_h_end = pre_h_start + cur_batch_size; - auto pre_hidden_t = batch_hidden.Slice(pre_h_start, pre_h_end); - blas.MatMul(pre_hidden_t, - false, - *weight, - false, - static_cast(1.0), - &gate_t, - static_cast(1.0)); - } else if (hidden_t0) { - // If n == 0 and there is no initialized hidden state, that is to say - // the H0 is zeros, the calculation W_h * H0 will be skiped. - // If n == 0 and there is initialized hidden state, calculate W_h * H0. - - // Since the batch computing for LSTM reorders the input sequence - // according to their length. The initialized hidden state also needs - // to reorder. - phi::DenseTensor ordered_h0; - ReorderInitState( - device_ctx, *hidden_t0, order, &ordered_h0, true); - blas.MatMul(ordered_h0, - false, - *weight, - false, - static_cast(1.0), - &gate_t, - static_cast(1.0)); - } - - lstm_value.gate_value = gate_t.data(); - lstm_value.output_value = out_t.data(); - lstm_value.state_value = cell_t.data(); - lstm_value.state_active_value = cell_pre_act_t.data(); - T cell_clip = 0.0; - phi::funcs::LstmUnitFunctor::compute(device_ctx, - lstm_value, - frame_size, - cur_batch_size, - cell_clip, - gate_act, - cell_act, - cand_act); - lstm_value.prev_state_value = lstm_value.state_value; - } - - phi::funcs::Batch2LoDTensorFunctor to_seq; - batch_hidden.set_lod(batch_gate->lod()); - // restore the output hidden in phi::DenseTensor from the batch hidden - to_seq(device_ctx, batch_hidden, hidden_out); - - batch_cell.set_lod(batch_gate->lod()); - // restore the output cell state in phi::DenseTensor from the batch cell - to_seq(device_ctx, batch_cell, cell_out); - } -}; - -template -class LSTMGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* weight = ctx.Input("Weight"); - auto* bias = ctx.Input("Bias"); - - auto* hidden_out = ctx.Input("Hidden"); - auto* cell_out = ctx.Input("Cell"); - - auto* batch_gate = ctx.Input("BatchGate"); - auto* batch_cell_pre_act = ctx.Input("BatchCellPreAct"); - - auto* hidden_g = - ctx.Input(framework::GradVarName("Hidden")); - - auto* in_g = ctx.Output(framework::GradVarName("Input")); - auto* weight_g = - ctx.Output(framework::GradVarName("Weight")); - auto* bias_g = ctx.Output(framework::GradVarName("Bias")); - - auto* h0 = ctx.Input("H0"); - auto* c0 = ctx.Input("C0"); - - auto* h0_g = ctx.Output(framework::GradVarName("H0")); - auto* c0_g = ctx.Output(framework::GradVarName("C0")); - - auto& device_ctx = ctx.template device_context(); - phi::funcs::SetConstant zero; - if (weight_g) { - weight_g->mutable_data(ctx.GetPlace()); - zero(device_ctx, weight_g, static_cast(0.0)); - } - - // ordered_h0/c0 is the reordered hidden/cell initialization. - // ordered_h0_g/c0_g is the reordered gradient of hidden/cell - // initialization. - phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - phi::Vector order(batch_gate->lod()[2]); - - if (c0) { - ReorderInitState( - device_ctx, *c0, order, &ordered_c0, true); - } - if (c0 && c0_g) { - ordered_c0_g.mutable_data(c0_g->dims(), ctx.GetPlace()); - } - - auto in_dims = input->dims(); - auto out_dims = hidden_g->dims(); - int frame_size = static_cast(in_dims[1] / 4); - PADDLE_ENFORCE_EQ( - frame_size, - out_dims[1], - phi::errors::InvalidArgument( - "The second dimension of Input(" + - framework::GradVarName("Hidden") + - ") should be %d, but received %d in LSTM@Grad operator.", - frame_size, - out_dims[1])); - - phi::funcs::LstmMetaValue lstm_value; - if (bias && ctx.Attr("use_peepholes")) { - T* bias_data = const_cast(bias->data()); - lstm_value.check_ig = bias_data + 4 * frame_size; - lstm_value.check_fg = lstm_value.check_ig + frame_size; - lstm_value.check_og = lstm_value.check_fg + frame_size; - } else { - lstm_value.check_ig = nullptr; - lstm_value.check_fg = nullptr; - lstm_value.check_og = nullptr; - } - - phi::funcs::LstmMetaGrad lstm_grad; - - if (bias && bias_g) { - bias_g->mutable_data(ctx.GetPlace()); - zero(device_ctx, bias_g, static_cast(0.0)); - } - if (bias && bias_g && ctx.Attr("use_peepholes")) { - T* bias_g_data = bias_g->data(); - lstm_grad.check_ig_grad = bias_g_data + 4 * frame_size; - lstm_grad.check_fg_grad = lstm_grad.check_ig_grad + frame_size; - lstm_grad.check_og_grad = lstm_grad.check_fg_grad + frame_size; - } else { - lstm_grad.check_ig_grad = nullptr; - lstm_grad.check_fg_grad = nullptr; - lstm_grad.check_og_grad = nullptr; - } - - phi::funcs::LoDTensor2BatchFunctor to_batch; - - auto ToBatch = [&batch_gate, &to_batch](const DeviceContext& ctx, - const phi::DenseTensor& src, - const phi::DDim& dims, - phi::DenseTensor& dst) { - dst.mutable_data(dims, ctx.GetPlace()); - dst.set_lod(batch_gate->lod()); - to_batch(ctx, src, &dst, false); - }; - - phi::DenseTensor batch_hidden, batch_hidden_g, batch_cell; - ToBatch(device_ctx, *hidden_out, out_dims, batch_hidden); - ToBatch(device_ctx, *hidden_g, out_dims, batch_hidden_g); - ToBatch(device_ctx, *cell_out, out_dims, batch_cell); - - phi::DenseTensor batch_cell_g, batch_gate_g; - batch_cell_g.mutable_data(out_dims, ctx.GetPlace()); - // TODO(qingqing) support the case output cell has gradient. - // to_batch(device_ctx, *cell_g, batch_cell_g, false); - zero(device_ctx, &batch_cell_g, static_cast(0.0)); - batch_gate_g.mutable_data(batch_gate->dims(), ctx.GetPlace()); - batch_gate_g.set_lod(batch_gate->lod()); - - auto gate_act = phi::funcs::detail::GetActivationType( - ctx.Attr("gate_activation")); - auto cell_act = phi::funcs::detail::GetActivationType( - ctx.Attr("cell_activation")); - auto cand_act = phi::funcs::detail::GetActivationType( - ctx.Attr("candidate_activation")); - - auto batch_starts = batch_gate->lod()[0]; - size_t num_batch = batch_starts.size() - 1; - auto blas = phi::funcs::GetBlas(device_ctx); - for (int n = static_cast(num_batch) - 1; n >= 0; n--) { - int bstart = static_cast(batch_starts[n]); - int bend = static_cast(batch_starts[n + 1]); - - phi::DenseTensor gate = batch_gate->Slice(bstart, bend); - phi::DenseTensor cell = batch_cell.Slice(bstart, bend); - phi::DenseTensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); - lstm_value.gate_value = gate.data(); - lstm_value.state_value = cell.data(); - lstm_value.state_active_value = cell_pre_act.data(); - - phi::DenseTensor out_g = batch_hidden_g.Slice(bstart, bend); - phi::DenseTensor gate_g = batch_gate_g.Slice(bstart, bend); - phi::DenseTensor cell_g = batch_cell_g.Slice(bstart, bend); - lstm_grad.state_grad = cell_g.data(); - lstm_grad.gate_grad = gate_g.data(); - lstm_grad.output_grad = out_g.data(); - - if (n > 0) { - int bstart_pre = static_cast(batch_starts[n - 1]); - phi::DenseTensor cell_pre = batch_cell.Slice(bstart_pre, bstart); - phi::DenseTensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); - lstm_value.prev_state_value = cell_pre.data(); - lstm_grad.prev_state_grad = cell_pre_g.data(); - } else { - lstm_value.prev_state_value = c0 ? ordered_c0.data() : nullptr; - lstm_grad.prev_state_grad = c0_g ? ordered_c0_g.data() : nullptr; - } - - // lstm_value.output_value not used in bp, set to nullptr - // lstm_grad.state_active_grad not used in bp, set to nullptr - lstm_value.output_value = nullptr; - lstm_grad.state_active_grad = nullptr; - int cur_batch_size = bend - bstart; - T cell_clip = 0.0; - phi::funcs::LstmUnitGradFunctor::compute(device_ctx, - lstm_value, - lstm_grad, - frame_size, - cur_batch_size, - cell_clip, - gate_act, - cell_act, - cand_act); - - if (n > 0) { - int pre_h_start = static_cast(batch_starts[n - 1]); - int pre_h_end = pre_h_start + cur_batch_size; - auto pre_hidden_g = batch_hidden_g.Slice(pre_h_start, pre_h_end); - blas.MatMul(gate_g, - false, - *weight, - true, - static_cast(1.0), - &pre_hidden_g, - static_cast(1.0)); - if (weight_g) { - /* backward weight */ - auto pre_hidden = batch_hidden.Slice(pre_h_start, pre_h_end); - blas.MatMul(pre_hidden, - true, - gate_g, - false, - static_cast(1.0), - weight_g, - static_cast(1.0)); - } - } else { - if (h0 && weight_g) { - ReorderInitState( - device_ctx, *h0, order, &ordered_h0, true); - blas.MatMul(ordered_h0, - true, - gate_g, - false, - static_cast(1.0), - weight_g, - static_cast(1.0)); - } - if (h0 && h0_g) { - ordered_h0_g.mutable_data(h0_g->dims(), ctx.GetPlace()); - blas.MatMul(gate_g, - false, - *weight, - true, - static_cast(1.0), - &ordered_h0_g, - static_cast(0.0)); - } - } - } - - phi::funcs::Batch2LoDTensorFunctor to_seq; - if (in_g) { - /* backward data */ - in_g->mutable_data(ctx.GetPlace()); - to_seq(device_ctx, batch_gate_g, in_g); - } - if (bias && bias_g) { - /* backward bias */ - phi::DenseTensor b_g = *bias_g; - b_g.Resize({bias_g->numel(), 1}); - phi::DenseTensor gate_bias_g = b_g.Slice(0, 4 * frame_size); - phi::funcs::ColwiseSum col_sum; - col_sum(device_ctx, batch_gate_g, &gate_bias_g); - } - - if (h0 && h0_g) { - ReorderInitState( - device_ctx, ordered_h0_g, order, h0_g, false); - } - if (c0 && c0_g) { - ReorderInitState( - device_ctx, ordered_c0_g, order, c0_g, false); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index 85cd21831c9b1..b06c7a01a718a 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -18,20 +18,17 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class OpDesc; class InferShapeContext; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class MemcpyH2DOp : public framework::OperatorWithKernel { public: @@ -104,8 +101,7 @@ raise error if the type is not listed above. } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/nop_op.cc b/paddle/fluid/operators/nop_op.cc index 2c1486636561b..baf6fbbcf8661 100644 --- a/paddle/fluid/operators/nop_op.cc +++ b/paddle/fluid/operators/nop_op.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class NopOp : public framework::OperatorWithKernel { public: @@ -45,8 +44,7 @@ establish the dependency between input and output tensors. } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/number_count_op.cc b/paddle/fluid/operators/number_count_op.cc deleted file mode 100644 index 7fb293891d3a5..0000000000000 --- a/paddle/fluid/operators/number_count_op.cc +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/number_count_op.h" - -namespace paddle { -namespace operators { - -class NumberCountOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("numbers"), "Input", "numbers", "NumberCount"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "number_count", "NumberCount"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - // the dtype of the numbers should be same as int64 - auto number_dtype = OperatorWithKernel::IndicateVarDataType(ctx, "numbers"); - - PADDLE_ENFORCE_EQ(number_dtype, - framework::proto::VarType::INT64, - phi::errors::InvalidArgument( - "The dtype of the number_dtype should be int64")); - return phi::KernelKey(number_dtype, ctx.GetPlace()); - } -}; - -class NumberCountOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("numbers", "(Tensor) The input gate index tensor."); - AddOutput("Out", "(Tensor) The output number count tensor."); - AddAttr("upper_range", "(int), The number of different numbers."); - - AddComment(R"DOC(number_count Operator.count numbers.)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(number_count, - ops::NumberCountOp, - ops::NumberCountOpMaker); diff --git a/paddle/fluid/operators/number_count_op.h b/paddle/fluid/operators/number_count_op.h deleted file mode 100644 index 12ad10c3e73cc..0000000000000 --- a/paddle/fluid/operators/number_count_op.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" - -#if defined(PADDLE_WITH_GLOO) -#include "paddle/fluid/framework/fleet/gloo_wrapper.h" -#endif - -namespace paddle { -namespace operators { - -template -class NumberCountOpCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override {} -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/ops_signature/assign_pos_sig.cc b/paddle/fluid/operators/ops_signature/assign_pos_sig.cc deleted file mode 100644 index 010d164d83dae..0000000000000 --- a/paddle/fluid/operators/ops_signature/assign_pos_sig.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature AssignPosOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature( - "assign_pos", {"X", "cum_count", "eff_num_len"}, {}, {"Out"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(assign_pos, phi::AssignPosOpArgumentMapping); diff --git a/paddle/fluid/operators/ops_signature/decayed_adagrad_sig.cc b/paddle/fluid/operators/ops_signature/decayed_adagrad_sig.cc deleted file mode 100644 index d622a8a342789..0000000000000 --- a/paddle/fluid/operators/ops_signature/decayed_adagrad_sig.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature DecayedAdagradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("decayed_adagrad", - {"Param", "Grad", "Moment", "LearningRate"}, - {"decay", "epsilon"}, - {"ParamOut", "MomentOut"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(decayed_adagrad, - phi::DecayedAdagradOpArgumentMapping); diff --git a/paddle/fluid/operators/ops_signature/fusion_group_sig.cc b/paddle/fluid/operators/ops_signature/fusion_group_sig.cc deleted file mode 100644 index 666e6f77d218f..0000000000000 --- a/paddle/fluid/operators/ops_signature/fusion_group_sig.cc +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature FusionGroupOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("fusion_group", - {"Inputs"}, - {"outs_dtype", "inputs_dtype", "func_name", "type"}, - {"Outs"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(fusion_group, phi::FusionGroupOpArgumentMapping); diff --git a/paddle/fluid/operators/ops_signature/prune_gate_by_capacity_sig.cc b/paddle/fluid/operators/ops_signature/prune_gate_by_capacity_sig.cc deleted file mode 100644 index b8bf7248cd701..0000000000000 --- a/paddle/fluid/operators/ops_signature/prune_gate_by_capacity_sig.cc +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature PruneGateByCapacityOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("prune_gate_by_capacity", - {"GateIdx", "ExpertCount"}, - {"n_expert", "n_worker"}, - {"NewGateIdx"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(prune_gate_by_capacity, - phi::PruneGateByCapacityOpArgumentMapping); diff --git a/paddle/fluid/operators/ops_signature/rrelu_sig.cc b/paddle/fluid/operators/ops_signature/rrelu_sig.cc deleted file mode 100644 index 18bda743e3255..0000000000000 --- a/paddle/fluid/operators/ops_signature/rrelu_sig.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature RReluOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature( - "rrelu", {"X"}, {"lower", "upper", "is_test"}, {"Out", "Noise"}); -} - -KernelSignature RReluGradGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature( - "rrelu_grad", {"X", "Noise", "Out@GRAD"}, {}, {"X@GRAD"}); -} -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(rrelu, phi::RReluOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(rrelu_grad, phi::RReluGradGradOpArgumentMapping); diff --git a/paddle/fluid/operators/ops_signature/shuffle_batch_sig.cc b/paddle/fluid/operators/ops_signature/shuffle_batch_sig.cc deleted file mode 100644 index 22a9f76d95dd3..0000000000000 --- a/paddle/fluid/operators/ops_signature/shuffle_batch_sig.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature ShuffleBatchOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("shuffle_batch", - {"X", "Seed"}, - {"startup_seed"}, - {"Out", "ShuffleIdx", "SeedOut"}); -} - -KernelSignature ShuffleBatchGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("shuffle_batch_grad", - {"ShuffleIdx", "Out@GRAD"}, - {"startup_seed"}, - {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(shuffle_batch, phi::ShuffleBatchOpArgumentMapping); - -PD_REGISTER_ARG_MAPPING_FN(shuffle_batch_grad, - phi::ShuffleBatchGradOpArgumentMapping); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc deleted file mode 100644 index 23441206a55c1..0000000000000 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" - -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class DecayedAdagradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Param"), "Input", "Param", "DecayedAdagradOp"); - OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "DecayedAdagradOp"); - OP_INOUT_CHECK( - ctx->HasInput("Moment"), "Input", "Moment", "DecayedAdagradOp"); - OP_INOUT_CHECK(ctx->HasInput("LearningRate"), - "Input", - "LearningRate", - "DecayedAdagradOp"); - PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), - framework::proto::VarType::LOD_TENSOR, - phi::errors::InvalidArgument( - "The input var's type should be phi::DenseTensor, " - "but the received is %s", - ctx->Inputs("Param").front(), - ctx->GetInputsVarType("Param").front())); - PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(), - framework::proto::VarType::LOD_TENSOR, - phi::errors::InvalidArgument( - "The input var's type should be phi::DenseTensor, " - "but the received is %s", - ctx->Inputs("Grad").front(), - ctx->GetInputsVarType("Grad").front())); - - OP_INOUT_CHECK( - ctx->HasOutput("ParamOut"), "Output", "ParamOut", "DecayedAdagradOp"); - OP_INOUT_CHECK( - ctx->HasOutput("MomentOut"), "Output", "MomentOut", "DecayedAdagradOp"); - - auto lr_dims = ctx->GetInputDim("LearningRate"); - PADDLE_ENFORCE_NE(common::product(lr_dims), - 0, - phi::errors::InvalidArgument( - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ( - common::product(lr_dims), - 1, - phi::errors::InvalidArgument("LearningRate should have one element")); - auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ( - param_dims, - ctx->GetInputDim("Grad"), - phi::errors::InvalidArgument( - "Param and Grad input of DecayedAdagradOp should have " - "the same dimension.")); - PADDLE_ENFORCE_EQ( - param_dims, - ctx->GetInputDim("Moment"), - phi::errors::InvalidArgument( - "Param and Moment input of DecayedAdagradOp should have " - "the same dimension.")); - - ctx->SetOutputDim("ParamOut", param_dims); - ctx->SetOutputDim("MomentOut", param_dims); - } - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Param"), - ctx.GetPlace()); - } -}; - -class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Param", "(Tensor) Input parameter"); - AddInput("Grad", "(Tensor) Input gradient"); - AddInput("Moment", "(Tensor) Second moment"); - AddInput("LearningRate", "(Tensor) Learning rate"); - - AddOutput("ParamOut", "(Tensor) Output parameter"); - AddOutput("MomentOut", "(Tensor) Output second moment"); - - AddAttr("decay", - "(float, default 0.95) " - "Discounting factor for coming gradient") - .SetDefault(0.95); - AddAttr("epsilon", - "(float, default 1.0e-6) " - "Constant for numerical stability") - .SetDefault(1.0e-6f); - AddComment(R"DOC( -Decayed Adagrad Optimizer. - -The update is done as follows: - -$$ -moment\_out = decay * moment + (1 - decay) * grad * grad \\ -param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon} -$$ - -The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) -does not have an epsilon attribute. It is added here for numerical -stability to avoid the division by zero error. - -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(decayed_adagrad, - DecayedAdagradShapeFunctor, - PD_INFER_META(phi::DecayedAdagradInferMeta)); - -REGISTER_OP_WITHOUT_GRADIENT(decayed_adagrad, - ops::DecayedAdagradOp, - ops::DecayedAdagradOpMaker, - DecayedAdagradShapeFunctor); diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index 7ef426cedad19..1385d039d932b 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/framework/op_version_registry.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class SparseMomentumOpInferVarType : public framework::VarTypeInference { public: @@ -107,8 +106,7 @@ else: \\ )DOC"); } -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cc b/paddle/fluid/operators/prune_gate_by_capacity_op.cc deleted file mode 100644 index 4e4bc4d291d68..0000000000000 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cc +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { - -class PruneGateByCapacityOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("GateIdx"), "Input", "GateIdx", "prun_gate_by_capacity"); - OP_INOUT_CHECK(ctx->HasInput("ExpertCount"), - "Input", - "ExpertCount", - "prun_gate_by_capacity"); - - OP_INOUT_CHECK(ctx->HasOutput("NewGateIdx"), - "Output", - "NewGateIdx", - "prun_gate_by_capacity"); - // OP_INOUT_CHECK(ctx->HasOutput("ExpertCountOut"), "Output", - // "ExpertCountOut", - // "prun_gate_by_capacity"); - // auto gate_idx_dims = ctx->GetInputDim("GateIdx"); - auto expert_count_dims = ctx->GetInputDim("ExpertCount"); - - int64_t n_expert = ctx->Attrs().Get("n_expert"); - int64_t n_worker = ctx->Attrs().Get("n_worker"); - - int64_t expert_count_num_ele = 1; - for (int i = 0; i < static_cast(expert_count_dims.size()); i++) { - expert_count_num_ele *= expert_count_dims[i]; - } - - PADDLE_ENFORCE_EQ( - expert_count_num_ele, - n_expert * n_worker, - phi::errors::Unavailable( - "The number of elements for expert_count is ( %ld ) incorrect. " - "Because the number of expert_count must equal the " - "product of n_worker ( %ld ) and n_expert ( %ld ). " - "Please input appropriate expert_count again!", - expert_count_num_ele, - n_worker, - n_expert)); - - auto gate_idx_in_dims = ctx->GetInputDim("GateIdx"); - // auto expert_count_in_dims = ctx->GetInputDim("ExpertCount"); - ctx->SetOutputDim("NewGateIdx", gate_idx_in_dims); - // ctx->SetOutputDim("ExpertCountOut", expert_count_in_dims); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto gate_idx_data_type = - OperatorWithKernel::IndicateVarDataType(ctx, "GateIdx"); - auto expert_count_data_type = - OperatorWithKernel::IndicateVarDataType(ctx, "ExpertCount"); - PADDLE_ENFORCE_EQ( - gate_idx_data_type, - expert_count_data_type, - phi::errors::InvalidArgument( - "The dtype of the gate_idx and expert_count should be same")); - PADDLE_ENFORCE_EQ(gate_idx_data_type, - framework::proto::VarType::INT64, - phi::errors::InvalidArgument( - "The dtype of the gate_idx and expert_count should " - "be same as int64")); - return phi::KernelKey(gate_idx_data_type, ctx.GetPlace()); - } -}; - -class PruneGateByCapacityOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("GateIdx", - "(Tensor), The gate_id sequence corresponding to the input data."); - AddInput("ExpertCount", - "(Tensor), The quantity value counted on the gate_id sequence of " - "the input data."); - AddAttr("n_expert", "The number of Experts on each worker") - .SetDefault(0); - AddAttr("n_worker", "The number of workers on the trainer") - .SetDefault(0); - - AddOutput("NewGateIdx", - "(Tensor), The gate_id sequence corresponding to the new input " - "data after passing through prune."); - // AddOutput( - // "ExpertCountOut", - // "(Tensor), The copy quantity value counted on the gate_id sequence of - // " - // "the input data."); - - AddComment(R"DOC( -prune_gate_by_capacity Operator. - -This operator is used to prune gate by capacity(CUDA). - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(prune_gate_by_capacity, - ops::PruneGateByCapacityOp, - ops::PruneGateByCapacityOpMaker); diff --git a/paddle/fluid/operators/pscore/fetch_barrier_op.cc b/paddle/fluid/operators/pscore/fetch_barrier_op.cc index 1928464acb9df..22c75971c7cb8 100644 --- a/paddle/fluid/operators/pscore/fetch_barrier_op.cc +++ b/paddle/fluid/operators/pscore/fetch_barrier_op.cc @@ -13,22 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class InferShapeContext; class OpDesc; class Scope; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative +} // namespace paddle::imperative -} // namespace paddle - -namespace paddle { -namespace operators { +namespace paddle::operators { class FetchBarrierOp : public framework::OperatorBase { public: @@ -72,8 +68,7 @@ class FetchBarrierOpShapeInference : public framework::InferShapeBase { void operator()(framework::InferShapeContext* ctx) const override {} }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/pscore/send_op.cc b/paddle/fluid/operators/pscore/send_op.cc index e8908758c1875..ccecf85b4a495 100644 --- a/paddle/fluid/operators/pscore/send_op.cc +++ b/paddle/fluid/operators/pscore/send_op.cc @@ -16,21 +16,18 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #include "paddle/fluid/framework/op_registry.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class InferShapeContext; class OpDesc; class Scope; template class EmptyGradOpMaker; -} // namespace framework -namespace imperative { +} // namespace paddle::framework +namespace paddle::imperative { class OpBase; -} // namespace imperative -} // namespace paddle +} // namespace paddle::imperative -namespace paddle { -namespace operators { +namespace paddle::operators { class SendOp : public framework::OperatorBase { public: @@ -102,8 +99,7 @@ class SendOpShapeInference : public framework::InferShapeBase { void operator()(framework::InferShapeContext* ctx) const override {} }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/read_file_op.cc b/paddle/fluid/operators/read_file_op.cc index 0f17ca063d8e6..ed0eb9f786503 100644 --- a/paddle/fluid/operators/read_file_op.cc +++ b/paddle/fluid/operators/read_file_op.cc @@ -24,8 +24,7 @@ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/nullary.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class ReadFileOp : public framework::OperatorWithKernel { public: @@ -50,8 +49,7 @@ This operator read a file. } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index f96a0c2679c25..6973b03f56853 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -15,9 +15,7 @@ #include "paddle/fluid/operators/reader/buffered_reader.h" #include "paddle/fluid/operators/reader/reader_op_registry.h" -namespace paddle { -namespace operators { -namespace reader { +namespace paddle::operators::reader { class CreateDoubleBufferReaderOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; @@ -89,9 +87,7 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase { } }; -} // namespace reader -} // namespace operators -} // namespace paddle +} // namespace paddle::operators::reader namespace ops = paddle::operators::reader; REGISTER_DECORATED_READER_OPERATOR(create_double_buffer_reader, diff --git a/paddle/fluid/operators/reader/py_reader.cc b/paddle/fluid/operators/reader/py_reader.cc index d863d759333b6..35f5949169b1d 100644 --- a/paddle/fluid/operators/reader/py_reader.cc +++ b/paddle/fluid/operators/reader/py_reader.cc @@ -14,9 +14,7 @@ #include "paddle/fluid/operators/reader/py_reader.h" -namespace paddle { -namespace operators { -namespace reader { +namespace paddle::operators::reader { PyReader::PyReader( const std::shared_ptr& queue, @@ -44,6 +42,4 @@ void PyReader::Shutdown() { queue_->Close(); } void PyReader::Start() { queue_->ReOpen(); } -} // namespace reader -} // namespace operators -} // namespace paddle +} // namespace paddle::operators::reader diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 66e6ceeb4fd1a..9b92e31e0af7c 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -14,15 +14,11 @@ #include "paddle/fluid/operators/reader/reader_op_registry.h" -namespace paddle { -namespace framework { +namespace paddle::framework { class VarDesc; -} // namespace framework -} // namespace paddle +} // namespace paddle::framework -namespace paddle { -namespace operators { -namespace reader { +namespace paddle::operators::reader { std::vector RestoreShapes(const std::vector& shape_concat, const std::vector& ranks) { @@ -161,7 +157,4 @@ void DecoratedReaderMakerBase::Make() { Apply(); } -} // namespace reader - -} // namespace operators -} // namespace paddle +} // namespace paddle::operators::reader diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc index 1997d1fb99fd2..73ad94c0a5c6a 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 2ed2e3278acad..fce12ae865173 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -14,7 +14,7 @@ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index ba4f188274d18..464a8e547e508 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -12,19 +12,215 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" - #include #include #include #include #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" +namespace ops = paddle::operators; namespace paddle { namespace operators { +class ReduceBaseOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ReduceBaseOp"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ReduceBaseOp"); + auto x_dims = ctx->GetInputDim("X"); + auto x_rank = x_dims.size(); + auto dims = ctx->Attrs().Get>("dim"); + PADDLE_ENFORCE_GT(dims.size(), + 0, + phi::errors::InvalidArgument( + "The input dim dimensions of ReduceBaseOp " + "should be greater than 0. But received the dim " + "dimensions of Reduce = %d.", + dims.size())); + + for (size_t i = 0; i < dims.size(); ++i) { + PADDLE_ENFORCE_LT( + dims[i], + x_rank, + phi::errors::InvalidArgument( + "The reduce dim index %d should be in the " + "range [-dimension(X), dimension(X)] " + "which dimension = %d. But received dim index = %d.", + i, + x_rank, + dims[i])); + PADDLE_ENFORCE_GE( + dims[i], + -x_rank, + phi::errors::InvalidArgument( + "The reduce dim index %d should be in the " + "range [-dimension(X), dimension(X)] " + "which dimension = %d. But received dim index = %d.", + i, + x_rank, + dims[i])); + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + } + sort(dims.begin(), dims.end()); + bool reduce_all = ctx->Attrs().Get("reduce_all"); + bool keep_dim = ctx->Attrs().Get("keep_dim"); + if (reduce_all) { + if (keep_dim) + ctx->SetOutputDim("Out", + common::make_ddim(std::vector(x_rank, 1))); + else + ctx->SetOutputDim("Out", {1}); + } else { + auto dims_vector = common::vectorize(x_dims); + if (keep_dim) { + for (size_t i = 0; i < dims.size(); ++i) { + dims_vector[dims[i]] = 1; + } + } else { + const int kDelFlag = -2; + for (size_t i = 0; i < dims.size(); ++i) { + dims_vector[dims[i]] = kDelFlag; + } + dims_vector.erase( + remove(dims_vector.begin(), dims_vector.end(), kDelFlag), + dims_vector.end()); + } + if (!keep_dim && dims_vector.size() == 0) { + dims_vector.push_back(1); + } + auto out_dims = common::make_ddim(dims_vector); + ctx->SetOutputDim("Out", out_dims); + if (dims.size() > 0 && dims[0] != 0) { + // Only pass LoD when not reducing on the first dim. + ctx->ShareLoD("X", /*->*/ "Out"); + } + } + } + + // oneDNN's reduction kernel is optimized only for reducing throughout the + // most outer dims, so in case of another type of reduction, it would be + // better to fallback to native implementation + static bool HasOptimizedOneDNNKernel(const framework::ExecutionContext& ctx) { + // native reduce kernels don't support bf16 + // so oneDNN kernel is enforced in that case + if (ctx.Input("X")->dtype() == phi::DataType::BFLOAT16) + return true; + + if (!ctx.HasAttr("dim") || !ctx.HasAttr("reduce_all")) { + return false; + } + + auto reduce_dims = ctx.Attr>("dim"); + const bool reduce_all = ctx.Attr("reduce_all"); + int ndims = ctx.Input("X")->dims().size(); + + if (reduce_all) { + return true; + } + + for (size_t i = 0; i < reduce_dims.size(); ++i) { + if (reduce_dims[i] < 0) reduce_dims[i] = ndims + reduce_dims[i]; + } + sort(reduce_dims.begin(), reduce_dims.end()); + for (size_t i = 0; i < reduce_dims.size(); ++i) { + if (reduce_dims[reduce_dims.size() - i - 1] != + static_cast(ndims - i - 1)) { + return false; + } + } + + return true; + } + + phi::KernelKey GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + // choose cudnn kernel if the runtime supported. + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_DNNL + if (ctx.Input("X")->dims().size() > 5 || + !HasOptimizedOneDNNKernel(ctx)) { + this->SetDnnFallback(true); + } + // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_DNNL + + if (input_data_type == framework::proto::VarType::FP16) { + PADDLE_ENFORCE_EQ( + ctx.GetPlace().GetType() == phi::AllocationType::GPU || + ctx.GetPlace().GetType() == phi::AllocationType::XPU || + ctx.GetPlace().GetType() == phi::AllocationType::CUSTOM, + true, + phi::errors::InvalidArgument( + "float16 can only be used on GPU or XPU place")); + } + return phi::KernelKey(input_data_type, ctx.GetPlace()); + } +}; + +class ReduceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ReduceBaseOp"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), + "Input", + "Out@GRAD", + "ReduceBaseOp"); + auto x_dims = ctx->GetInputDim("X"); + auto x_rank = x_dims.size(); + // TODO(dev): We should delete Infershape and migrate it into + // UnchangeInferMeta.In case of 'dim' is Variable, it will + // not exist in Attrs but in Inputs. + if (ctx->HasAttr("dim")) { + auto dims = ctx->Attrs().Get>("dim"); + for (size_t i = 0; i < dims.size(); ++i) { + PADDLE_ENFORCE_LT( + dims[i], + x_rank, + phi::errors::InvalidArgument( + "The reduce dim index %d should be in the " + "range [-dimension(X), dimension(X)], " + "which dimension = %d. But received dim index = %d.", + i, + x_rank, + dims[i])); + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + } + } + + auto x_grad_name = framework::GradVarName("X"); + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + ctx->ShareLoD("X", /*->*/ x_grad_name); + } + } + + protected: + phi::KernelKey GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + int out_dtype = ctx.Attr("out_dtype"); + auto input_data_type = + (out_dtype >= 0) + ? static_cast(out_dtype) + : OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); + + // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_DNNL + // max 5D tensor is supported + if (ctx.Input("X")->dims().size() > 5) { + dnn_fallback_ = true; + } + // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_DNNL + + return phi::KernelKey(input_data_type, ctx.GetPlace()); + } +}; // NOTE(dengkaipeng): Input(Out) is unnecessary in reduce_mean_grad // calcualtion, but will incur a reduce_mean_grad op after @@ -65,6 +261,7 @@ class ReduceMeanDoubleGradDescMaker : public framework::GradOpDescMakerBase { return ops; } }; + class ReduceMeanDoubleGradOpBaseMaker : public imperative::GradOpBaseMakerBase { public: using imperative::GradOpBaseMakerBase::GradOpBaseMakerBase; @@ -89,6 +286,56 @@ class ReduceMeanDoubleGradOpBaseMaker : public imperative::GradOpBaseMakerBase { } }; DECLARE_NO_NEED_BUFFER_VARS_INFERER(ReduceMeanGradNoNeedBufferVarInferer, "X"); + +class ReduceBaseOpMaker : public paddle::framework::OpProtoAndCheckerMaker { + public: + void Make() final { + AddInput("X", + "(Tensor) The input tensor. Tensors with rank at most 6 are " + "supported."); + AddOutput("Out", "(Tensor) The result tensor."); + AddAttr>( + "dim", + "(list, default {0}) The dimensions to reduce. " + "Must be in the range [-rank(input), rank(input)). " + "If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. " + "Note that reducing on the first dim will make the LoD info lost.") + .SetDefault({0}) + .SupportTensor(); + AddAttr("keep_dim", + "(bool, default false) " + "If true, retain the reduced dimension with length 1.") + .SetDefault(false); + AddAttr("reduce_all", + "(bool, default false) " + "If true, output a scalar reduced along all dimensions.") + .SetDefault(false); + AddAttr("in_dtype", + "(int, default -1)" + "The dtype of input, default value is -1, the user could not " + "set this value.") + .SetDefault(-1); + AddAttr( + "out_dtype", + "(int, default -1)" + "The dtype of output, default value is -1, the dtype is same as intput") + .SetDefault(-1); + AddComment(string::Sprintf(R"DOC( +%s Operator. + +This operator computes the %s of input tensor along the given dimension. +The result tensor has 1 fewer dimension than the input unless keep_dim is true. +If reduce_all is true, just reduce along all dimensions and output a scalar. + +)DOC", + GetOpType(), + GetName())); + } + + protected: + virtual std::string GetName() const = 0; + virtual std::string GetOpType() const = 0; +}; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h b/paddle/fluid/operators/reduce_ops/reduce_mean_op.h deleted file mode 100644 index eb82be83ba517..0000000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/operators/reduce_ops/reduce_op.h" - -namespace paddle { -namespace operators { - -struct MeanFunctor { - template - void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { - y->device(place) = x->mean(dim); - } -}; - -struct MeanGradFunctor { - template - void operator()(const DeviceContext& place, - X* x, - Y* y, - DX* dx, - DY* dy, - const Dim& dim, - int size) { - dx->device(place) = dy->broadcast(dim) / dx->constant(size); - } -}; - -// TODO(zengjinle): Should refine the numeric stability of FP16 reduce_mean -// and reduce_mean_grad later. -struct FP16MeanGradFunctor { - template - void operator()(const DeviceContext& place, - X* x, - Y* y, - DX* dx, - DY* dy, - const Dim& dim, - int size) { - dx->device(place) = (dy->template cast().broadcast(dim) / - dx->template cast().constant(size)) - .template cast(); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h deleted file mode 100644 index 44a82397dcc07..0000000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ /dev/null @@ -1,895 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/framework/data_type_transform.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" -#include "paddle/phi/kernels/funcs/math_function.h" -// only can include the headers in paddle/phi/api dirs -#include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/framework/phi_utils.h" -#include "paddle/phi/kernels/cpu/reduce.h" - -#if defined(__HIPCC__) || defined(__NVCC__) || defined(__xpu__) -#include "paddle/phi/kernels/gpu/reduce.h" -#include "paddle/phi/kernels/gpu/reduce_grad.h" -#endif - -namespace paddle { -namespace operators { - -#define HANDLE_DIM(NDIM, RDIM) \ - if (ndim == NDIM && rdim == RDIM) { \ - paddle::operators:: \ - ReduceFunctor( \ - context.template device_context(), \ - *input, \ - output, \ - dims, \ - keep_dim); \ - } - -using DDim = phi::DDim; - -inline void GetShuffledDim(const DDim& src_dims, - DDim* dst_dims, - const std::vector& reduced_dims, - std::vector* perm_axis) { - // check if it's a reduced dim - std::vector src_dims_check(src_dims.size(), false); - size_t src_size = src_dims.size(); - size_t reduce_size = reduced_dims.size(); - for (size_t i = 0; i < reduce_size; ++i) { - dst_dims->at(src_size - reduce_size + i) = src_dims[reduced_dims[i]]; - (*perm_axis)[src_size - reduce_size + i] = reduced_dims[i]; - src_dims_check[reduced_dims[i]] = true; - } - - size_t offset = 0; - for (size_t i = 0; i < src_dims_check.size(); ++i) { - bool is_reduced = src_dims_check[i]; - if (!is_reduced) { - (*perm_axis)[offset] = i; - dst_dims->at(offset++) = src_dims[i]; - } - } -} - -static inline std::vector GetReduceDim(const std::vector& dims, - int dim_size, - bool reduce_all) { - std::vector reduce_dims; - if (reduce_all) { - reduce_dims.resize(dim_size); - int reduce_size = reduce_dims.size(); - for (int i = 0; i < reduce_size; ++i) { - reduce_dims[i] = i; - } - } else { - for (auto e : dims) { - PADDLE_ENFORCE_LT(e, - dim_size, - phi::errors::InvalidArgument( - "ReduceBaseOp: invalid axis, when x_dims is %d, " - "axis[i] should less than x_dims, but got %d.", - dim_size, - e)); - reduce_dims.push_back(e >= 0 ? e : e + dim_size); - } - } - return reduce_dims; -} -template -void GetShuffledInput(const framework::ExecutionContext& context, - const phi::DenseTensor* input, - phi::DenseTensor* shuffled_input, - const std::vector& dims) { - DDim shuffled_dims(input->dims()); - std::vector perm_axis(input->dims().size()); - GetShuffledDim(input->dims(), &shuffled_dims, dims, &perm_axis); - - shuffled_input->Resize(shuffled_dims); - shuffled_input->mutable_data(context.GetPlace()); - - phi::funcs::TransposeNormal trans; - trans(context.template device_context(), - *input, - shuffled_input, - perm_axis); -} - -inline void GetOriginDimFromShuffled(const DDim& src_dim, - const std::vector& dims, - std::vector* origin_dim) { - DDim shuffled_dims(src_dim); - size_t n = src_dim.size(); - std::vector perm_axis(n); - GetShuffledDim(src_dim, &shuffled_dims, dims, &perm_axis); - for (size_t i = 0; i < n; ++i) { - (*origin_dim)[perm_axis[i]] = i; - } -} - -template -void HandleLargeDim(const framework::ExecutionContext& context, - const phi::DenseTensor* input, - phi::DenseTensor* output, - const std::vector& dims, - bool keep_dim) { - // shuffle the reduced dim to the end - phi::DenseTensor shuffled_input; - GetShuffledInput(context, input, &shuffled_input, dims); - - // transpose to 2D tensor whose shape is {unreduced, reduced}. - const int64_t unreduced = output->numel(); - const int64_t input_numel = shuffled_input.numel(); - // assume: 0 / 0 == 0, which allow process 0 dim tensor - const int64_t reduced = (unreduced != 0) ? (input_numel / unreduced) : 0; - - PADDLE_ENFORCE_EQ( - unreduced * reduced, - input_numel, - phi::errors::InvalidArgument( - "Reducing failed in HandleLargeDim, when try to transpose (%d) " - "operands into 2D tensor with shape (%d, %d).", - input_numel, - unreduced, - reduced)); - - shuffled_input.Resize({unreduced, reduced}); - - DDim output_dim = output->dims(); - output->Resize({unreduced}); - paddle::operators::ReduceFunctor( - context.template device_context(), - shuffled_input, - output, - {1}, - keep_dim); - output->Resize(output_dim); -} - -template -void HandleLargeDimGrad(const framework::ExecutionContext& context, - const phi::DenseTensor* x, - const phi::DenseTensor* out, - const phi::DenseTensor* dout, - phi::DenseTensor* dx, - Functor functor, - const std::vector& dims) { - const int64_t unreduced = out->numel(); - const int64_t x_numel = x->numel(); - // assume: 0 / 0 == 0, which allow process 0 dim tensor - const int64_t reduced = (unreduced != 0) ? (x_numel / unreduced) : 0; - - PADDLE_ENFORCE_EQ( - unreduced * reduced, - x_numel, - phi::errors::InvalidArgument( - "Reducing failed in HandleLargeDimGrad, when try to transpose (%d) " - "operands into 2D tensor with shape (%d, %d).", - x_numel, - unreduced, - reduced)); - - DDim out_dim(out->dims()); - DDim x_dim(x->dims()); - // transpose and reshape X - phi::DenseTensor shuffled_x; - GetShuffledInput(context, x, &shuffled_x, dims); - DDim shuffled_dim = shuffled_x.dims(); - shuffled_x.Resize({unreduced, reduced}); - // reshape dX {unreduced, reduced} - dx->Resize({unreduced, reduced}); - ReduceGradFunctor( - context.template device_context(), - shuffled_x, - *out, - *dout, - dx, - functor, - {1}); - // transpose dX - std::vector origin_axis(x_dim.size()); - GetOriginDimFromShuffled(x_dim, dims, &origin_axis); - phi::DenseTensor dx_tmp; - framework::TensorCopy(*dx, context.GetPlace(), &dx_tmp); - dx_tmp.Resize(shuffled_dim); - dx->Resize(x_dim); - phi::funcs::TransposeNormal trans; - trans(context.template device_context(), - dx_tmp, - dx, - origin_axis); -} - -template -struct ReduceKernelFunctor { - const phi::DenseTensor* input; - phi::DenseTensor* output; - std::vector dims; - bool keep_dim; - bool reduce_all; - const framework::ExecutionContext& context; - ReduceKernelFunctor(const phi::DenseTensor* input, - phi::DenseTensor* output, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - const framework::ExecutionContext& context) - : input(input), - output(output), - dims(dims), - keep_dim(keep_dim), - reduce_all(reduce_all), - context(context) {} - - template - void apply() const { - output->mutable_data(context.GetPlace()); - if (reduce_all) { - // Flatten and reduce 1-D tensor - auto x = EigenVector::Flatten(*input); - auto out = EigenScalar::From(*output); - auto& place = - *context.template device_context().eigen_device(); - auto reduce_dim = Eigen::array({{0}}); - Functor functor; - functor(place, &x, &out, reduce_dim); - } else { - int ndim = input->dims().size(); - int rdim = dims.size(); - if (ndim > 6) { - HandleLargeDim( - context, input, output, dims, keep_dim); - } else { - HANDLE_DIM(6, 5); - HANDLE_DIM(6, 4); - HANDLE_DIM(6, 3); - HANDLE_DIM(6, 2); - HANDLE_DIM(6, 1); - HANDLE_DIM(5, 4); - HANDLE_DIM(5, 3); - HANDLE_DIM(5, 2); - HANDLE_DIM(5, 1); - HANDLE_DIM(4, 3); - HANDLE_DIM(4, 2); - HANDLE_DIM(4, 1); - HANDLE_DIM(3, 2); - HANDLE_DIM(3, 1); - HANDLE_DIM(2, 1); - HANDLE_DIM(1, 1); - } - } - } -}; -template -class ReduceKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - bool reduce_all = context.Attr("reduce_all"); - auto* output = context.Output("Out"); - auto dims = context.Attr>("dim"); - bool keep_dim = context.Attr("keep_dim"); - int out_dtype = context.Attr("out_dtype"); - framework::proto::VarType::Type cast_out_dtype; - auto* input = context.Input("X"); - - if (out_dtype < 0) { - cast_out_dtype = static_cast( - framework::TransToProtoVarType(input->dtype())); - } else { - cast_out_dtype = static_cast(out_dtype); - } - - auto& dev_ctx = context.device_context(); - output->mutable_data( - dev_ctx.GetPlace(), - static_cast(cast_out_dtype)); - - std::vector tmp_dims(dims.begin(), dims.end()); - - // call new kernel - phi::Reduce::TYPE, - T, - Functor>( - static_cast::TYPE&>(dev_ctx), - *input, - reduce_all, - tmp_dims, - keep_dim, - framework::TransToPhiDataType(cast_out_dtype), - output); - } -}; - -template -void LaunchReduceGradKernel(const framework::ExecutionContext& context, - const phi::DenseTensor* input0, - const phi::DenseTensor* input1, - const phi::DenseTensor* input2, - phi::DenseTensor* output, - Functor functor, - const std::vector& dims, - bool reduce_all = false) { - if (reduce_all) { - auto x = EigenVector::Flatten(*input0); - auto x_reduce = EigenVector::Flatten(*input1); - auto x_reduce_grad = EigenVector::Flatten(*input2); - auto x_grad = EigenVector::Flatten(*output); - auto& place = - *context.template device_context().eigen_device(); - auto broadcast_dim = - Eigen::array({{static_cast(input0->numel())}}); - functor(place, - &x, - &x_reduce, - &x_grad, - &x_reduce_grad, - broadcast_dim, - broadcast_dim[0]); - } else { - int rank = input0->dims().size(); - switch (rank) { - case 1: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - case 2: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - case 3: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - case 4: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - case 5: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - case 6: - ReduceGradFunctor( - context.template device_context(), - *input0, - *input1, - *input2, - output, - functor, - dims); - break; - default: - HandleLargeDimGrad( - context, input0, input1, input2, output, functor, dims); - break; - } - } -} - -template -class ReduceGradKernel : public framework::OpKernel { - public: - void ComputeFromInput(const phi::DenseTensor* input2, - const framework::ExecutionContext& context) const { - bool reduce_all = context.Attr("reduce_all"); - auto dims = context.Attr>("dim"); - auto* input0 = context.Input("X"); - auto* input1 = context.Input("Out"); - - auto* output = - context.Output(framework::GradVarName("X")); - output->mutable_data(context.GetPlace()); - - // The dims has full dim, set the reduce_all is True - const auto& input_dim_size = - context.Input("X")->dims().size(); - std::set dims_set(dims.begin(), dims.end()); - bool full_dim = true; - for (auto i = 0; i < input_dim_size; i++) { - if (dims_set.find(i) == dims_set.end()) { - full_dim = false; - break; - } - } - reduce_all = (reduce_all || full_dim); - // NOTE: EigenTensor::From() uses tensor->data() - // if op has NoNeedBufferVarsInferer, the corresponding kNoNeedBufferX or - // kNoNeedBufferY should set true - // and use fake var that has same dims. - if (kNoNeedBufferX) { - input0 = output; - } - if (kNoNeedBufferY) { - input1 = input2; - } - - const std::vector const_dims = dims; - - // NOTE(dengkaipeng): Out is unnecessary in some reduce kernel and - // not be set as Input in grad Maker, use Out_grad to replace here - if (!input1) input1 = input2; - Functor functor; - LaunchReduceGradKernel(context, - input0, - input1, - input2, - output, - functor, - const_dims, - reduce_all); - } - - void Compute(const framework::ExecutionContext& context) const override { - int in_dtype = context.Attr("in_dtype"); - if (in_dtype >= 0) { - phi::DenseTensor tmp_tensor; - auto* pre_input = - context.Input(framework::GradVarName("Out")); - auto in_kernel_type = - phi::KernelKey(framework::TransToProtoVarType(pre_input->dtype()), - context.GetPlace()); - auto out_kernel_type = - phi::KernelKey(static_cast(in_dtype), - context.GetPlace()); - framework::TransDataType( - in_kernel_type, out_kernel_type, *pre_input, &tmp_tensor); - ComputeFromInput(&tmp_tensor, context); - - } else { - auto* input2 = - context.Input(framework::GradVarName("Out")); - ComputeFromInput(input2, context); - } - } -}; - -class ReduceBaseOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ReduceBaseOp"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ReduceBaseOp"); - auto x_dims = ctx->GetInputDim("X"); - auto x_rank = x_dims.size(); - auto dims = ctx->Attrs().Get>("dim"); - PADDLE_ENFORCE_GT(dims.size(), - 0, - phi::errors::InvalidArgument( - "The input dim dimensions of ReduceBaseOp " - "should be greater than 0. But received the dim " - "dimensions of Reduce = %d.", - dims.size())); - - for (size_t i = 0; i < dims.size(); ++i) { - PADDLE_ENFORCE_LT( - dims[i], - x_rank, - phi::errors::InvalidArgument( - "The reduce dim index %d should be in the " - "range [-dimension(X), dimension(X)] " - "which dimension = %d. But received dim index = %d.", - i, - x_rank, - dims[i])); - PADDLE_ENFORCE_GE( - dims[i], - -x_rank, - phi::errors::InvalidArgument( - "The reduce dim index %d should be in the " - "range [-dimension(X), dimension(X)] " - "which dimension = %d. But received dim index = %d.", - i, - x_rank, - dims[i])); - if (dims[i] < 0) dims[i] = x_rank + dims[i]; - } - sort(dims.begin(), dims.end()); - bool reduce_all = ctx->Attrs().Get("reduce_all"); - bool keep_dim = ctx->Attrs().Get("keep_dim"); - if (reduce_all) { - if (keep_dim) - ctx->SetOutputDim("Out", - common::make_ddim(std::vector(x_rank, 1))); - else - ctx->SetOutputDim("Out", {1}); - } else { - auto dims_vector = common::vectorize(x_dims); - if (keep_dim) { - for (size_t i = 0; i < dims.size(); ++i) { - dims_vector[dims[i]] = 1; - } - } else { - const int kDelFlag = -2; - for (size_t i = 0; i < dims.size(); ++i) { - dims_vector[dims[i]] = kDelFlag; - } - dims_vector.erase( - remove(dims_vector.begin(), dims_vector.end(), kDelFlag), - dims_vector.end()); - } - if (!keep_dim && dims_vector.size() == 0) { - dims_vector.push_back(1); - } - auto out_dims = common::make_ddim(dims_vector); - ctx->SetOutputDim("Out", out_dims); - if (dims.size() > 0 && dims[0] != 0) { - // Only pass LoD when not reducing on the first dim. - ctx->ShareLoD("X", /*->*/ "Out"); - } - } - } - - // oneDNN's reduction kernel is optimized only for reducing throughout the - // most outer dims, so in case of another type of reduction, it would be - // better to fallback to native implementation - static bool HasOptimizedOneDNNKernel(const framework::ExecutionContext& ctx) { - // native reduce kernels don't support bf16 - // so oneDNN kernel is enforced in that case - if (ctx.Input("X")->dtype() == phi::DataType::BFLOAT16) - return true; - - if (!ctx.HasAttr("dim") || !ctx.HasAttr("reduce_all")) { - return false; - } - - auto reduce_dims = ctx.Attr>("dim"); - const bool reduce_all = ctx.Attr("reduce_all"); - int ndims = ctx.Input("X")->dims().size(); - - if (reduce_all) { - return true; - } - - for (size_t i = 0; i < reduce_dims.size(); ++i) { - if (reduce_dims[i] < 0) reduce_dims[i] = ndims + reduce_dims[i]; - } - sort(reduce_dims.begin(), reduce_dims.end()); - for (size_t i = 0; i < reduce_dims.size(); ++i) { - if (reduce_dims[reduce_dims.size() - i - 1] != - static_cast(ndims - i - 1)) { - return false; - } - } - - return true; - } - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - // choose cudnn kernel if the runtime supported. - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_DNNL - if (ctx.Input("X")->dims().size() > 5 || - !HasOptimizedOneDNNKernel(ctx)) { - this->SetDnnFallback(true); - } - // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_DNNL - - if (input_data_type == framework::proto::VarType::FP16) { - PADDLE_ENFORCE_EQ( - ctx.GetPlace().GetType() == phi::AllocationType::GPU || - ctx.GetPlace().GetType() == phi::AllocationType::XPU || - ctx.GetPlace().GetType() == phi::AllocationType::CUSTOM, - true, - phi::errors::InvalidArgument( - "float16 can only be used on GPU or XPU place")); - } - return phi::KernelKey(input_data_type, ctx.GetPlace()); - } -}; - -class ReduceOpUseInputPlace : public ReduceBaseOp { - public: - using ReduceBaseOp::ReduceBaseOp; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - phi::KernelKey kt = OperatorWithKernel::GetExpectedKernelType(ctx); - kt.set_backend( - phi::TransToPhiBackend(ctx.Input("X")->place())); - return kt; - } -}; - -class ReduceGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ReduceBaseOp"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - "Out@GRAD", - "ReduceBaseOp"); - auto x_dims = ctx->GetInputDim("X"); - auto x_rank = x_dims.size(); - // TODO(dev): We should delete Infershape and migrate it into - // UnchangeInferMeta.In case of 'dim' is Variable, it will - // not exist in Attrs but in Inputs. - if (ctx->HasAttr("dim")) { - auto dims = ctx->Attrs().Get>("dim"); - for (size_t i = 0; i < dims.size(); ++i) { - PADDLE_ENFORCE_LT( - dims[i], - x_rank, - phi::errors::InvalidArgument( - "The reduce dim index %d should be in the " - "range [-dimension(X), dimension(X)], " - "which dimension = %d. But received dim index = %d.", - i, - x_rank, - dims[i])); - if (dims[i] < 0) dims[i] = x_rank + dims[i]; - } - } - - auto x_grad_name = framework::GradVarName("X"); - if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); - ctx->ShareLoD("X", /*->*/ x_grad_name); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - int out_dtype = ctx.Attr("out_dtype"); - auto input_data_type = - (out_dtype >= 0) - ? static_cast(out_dtype) - : OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - - // NOTE(jiahongyu): Below codes originally enclosed by PADDLE_WITH_DNNL - // max 5D tensor is supported - if (ctx.Input("X")->dims().size() > 5) { - dnn_fallback_ = true; - } - // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_DNNL - - return phi::KernelKey(input_data_type, ctx.GetPlace()); - } -}; - -class ReduceBaseOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() final { - AddInput("X", - "(Tensor) The input tensor. Tensors with rank at most 6 are " - "supported."); - AddOutput("Out", "(Tensor) The result tensor."); - AddAttr>( - "dim", - "(list, default {0}) The dimensions to reduce. " - "Must be in the range [-rank(input), rank(input)). " - "If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. " - "Note that reducing on the first dim will make the LoD info lost.") - .SetDefault({0}) - .SupportTensor(); - AddAttr("keep_dim", - "(bool, default false) " - "If true, retain the reduced dimension with length 1.") - .SetDefault(false); - AddAttr("reduce_all", - "(bool, default false) " - "If true, output a scalar reduced along all dimensions.") - .SetDefault(false); - AddAttr("in_dtype", - "(int, default -1)" - "The dtype of input, default value is -1, the user could not " - "set this value.") - .SetDefault(-1); - AddAttr( - "out_dtype", - "(int, default -1)" - "The dtype of output, default value is -1, the dtype is same as intput") - .SetDefault(-1); - AddComment(string::Sprintf(R"DOC( -%s Operator. - -This operator computes the %s of input tensor along the given dimension. -The result tensor has 1 fewer dimension than the input unless keep_dim is true. -If reduce_all is true, just reduce along all dimensions and output a scalar. - -)DOC", - GetOpType(), - GetName())); - } - - protected: - virtual std::string GetName() const = 0; - virtual std::string GetOpType() const = 0; -}; - -#if defined(__HIPCC__) || defined(__NVCC__) || defined(__xpu__) -template - class ReduceBaseOp, - template - class TransformOp> -class ReduceCudaKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - bool reduce_all = context.Attr("reduce_all"); - const phi::DenseTensor* input = context.Input("X"); - phi::DenseTensor* output = context.Output("Out"); - auto out_dtype = context.Attr("out_dtype"); - auto pt_out_dtype = paddle::framework::TransToPhiDataType( - static_cast(out_dtype)); - std::vector dims = context.Attr>("dim"); -#ifdef PADDLE_WITH_XPU_KP - auto& dev_ctx = context.template device_context(); -#else - auto& dev_ctx = context.cuda_device_context(); -#endif - if (out_dtype >= 0) { - output->mutable_data(dev_ctx.GetPlace(), pt_out_dtype); - } else { - output->mutable_data(dev_ctx.GetPlace(), input->dtype()); - } - - std::vector dims_int64{dims.begin(), dims.end()}; - - phi::Reduce( - dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output); - } -}; - -#ifndef PADDLE_WITH_XPU_KP -template class TransformOp> -class ReduceCudaGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - bool reduce_all = context.Attr("reduce_all"); - std::vector dims = context.Attr>("dim"); - auto* in_x = context.Input("X"); - - auto* d_out = - context.Input(framework::GradVarName("Out")); - auto* d_x = context.Output(framework::GradVarName("X")); - auto out_dtype = context.Attr("in_dtype"); - auto pt_out_dtype = framework::TransToPhiDataType( - static_cast(out_dtype)); - // get reduce_dim and reduce_num for reduce_mean_grad - int dim_size = in_x->dims().size(); - std::vector reduce_dims = GetReduceDim(dims, dim_size, reduce_all); - auto update_dims = common::vectorize(d_x->dims()); - int reduce_num = 1; - for (auto i : reduce_dims) { - reduce_num *= (in_x->dims())[i]; - update_dims[i] = 1; - } - // make new tensor - phi::DenseTensor new_d_out(d_out->type()); - new_d_out.ShareDataWith(*d_out); - new_d_out.Resize(common::make_ddim(update_dims)); - auto& dev_ctx = context.cuda_device_context(); - if (out_dtype > 0) { - d_x->mutable_data(dev_ctx.GetPlace(), pt_out_dtype); - } else { - d_x->mutable_data(dev_ctx.GetPlace(), d_out->dtype()); - } - auto pt_d_out = std::make_unique(new_d_out); - auto pt_d_x = std::make_unique(*d_x); - if (out_dtype <= 0) { - pt_out_dtype = d_out->dtype(); - } - - using MPType = typename phi::dtype::MPTypeTrait::Type; - phi::ReduceGrad>(dev_ctx, - pt_d_out.get(), - pt_d_x.get(), - pt_out_dtype, - TransformOp(reduce_num)); - } -}; - -template -struct EqualFunctor { - inline T initial() { return static_cast(0.0f); } - - inline HOSTDEVICE T operator()(const T a, const T b) const { - return static_cast(a == b); - } -}; - -template -struct DivideFunctor { - inline T initial() { return static_cast(1.0f); } - - inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; } -}; -#endif -#endif - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -#define REGISTER_REDUCE_OP(op_name) \ - class __##op_name##Maker__ : public ops::ReduceBaseOpMaker { \ - protected: \ - virtual std::string GetName() const { return #op_name; } \ - virtual std::string GetOpType() const { return "Reduce " #op_name; } \ - }; \ - REGISTER_OPERATOR( \ - op_name, \ - ops::ReduceBaseOp, \ - __##op_name##Maker__, \ - paddle::framework::DefaultGradOpMaker, \ - paddle::framework::DefaultGradOpMaker); \ - REGISTER_OPERATOR(op_name##_grad, ops::ReduceGradOp) - -#define REGISTER_REDUCE_OP_WITHOUT_GRAD(op_name, ...) \ - class __##op_name##Maker__ : public ops::ReduceBaseOpMaker { \ - protected: \ - virtual std::string GetName() const { return #op_name; } \ - virtual std::string GetOpType() const { return "Reduce " #op_name; } \ - }; \ - REGISTER_OPERATOR( \ - op_name, \ - ops::ReduceBaseOp##__VA_ARGS__, \ - __##op_name##Maker__, \ - paddle::framework::EmptyGradOpMaker, \ - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_function.h b/paddle/fluid/operators/reduce_ops/reduce_op_function.h deleted file mode 100644 index b8043dcd94ba0..0000000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_op_function.h +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" - -namespace paddle { -namespace operators { - -using DDim = phi::DDim; -template -using EigenTensor = phi::EigenTensor; -template -using EigenScalar = phi::EigenScalar; -template -using EigenVector = phi::EigenVector; - -template -void ReduceFunctor(const DeviceContext& context, - const phi::DenseTensor& input, - phi::DenseTensor* output, - const std::vector& dims, - bool keep_dim) { - auto x = EigenTensor::From(input); - auto x_rank = static_cast(x.dimensions().size()); - auto reduce_dim = Eigen::array(); - std::vector dims_ref = dims; - for (size_t i = 0; i < dims_ref.size(); ++i) { - if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i]; - reduce_dim[i] = dims_ref[i]; - } - // construct the squeezed output tensor - DDim out_dims = output->dims(); - if (keep_dim && x_rank > 1) { - const int kDelFlag = -2; - auto dims_vector = common::vectorize(out_dims); - for (size_t i = 0; i < dims_ref.size(); ++i) { - dims_vector[dims_ref[i]] = kDelFlag; - } - dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag), - dims_vector.end()); - out_dims = common::make_ddim(dims_vector); - } - auto& place = *context.eigen_device(); - Functor functor; - - if (D == 1) { - auto out = EigenScalar::From(*output); - functor(place, &x, &out, reduce_dim); - } else { - auto out = EigenTensor::From(*output, out_dims); - functor(place, &x, &out, reduce_dim); - } -} - -template -void ReduceGradFunctor(const DeviceContext& context, - const phi::DenseTensor& input0, - const phi::DenseTensor& input1, - const phi::DenseTensor& input2, - phi::DenseTensor* output, - Functor functor, - const std::vector& dims) { - auto x = EigenTensor::From(input0); - auto x_grad = EigenTensor::From(*output); - auto x_rank = static_cast(x.dimensions().size()); - auto x_dims = input0.dims(); - auto reduced_dims_v = common::vectorize(x_dims); - std::vector dims_ref = dims; - Eigen::array broadcast_dim; - for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; - - int broad_cats_times = 1; - for (size_t i = 0; i < dims_ref.size(); ++i) { - if (dims_ref[i] < 0) { - dims_ref[i] = x_rank + dims_ref[i]; - } - reduced_dims_v[dims_ref[i]] = 1; - broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]]; - broad_cats_times *= x_dims[dims_ref[i]]; - } - auto reduced_dims = common::make_ddim(reduced_dims_v); - auto x_reduce = EigenTensor::From(input1, reduced_dims); - auto x_reduce_grad = EigenTensor::From(input2, reduced_dims); - - auto& place = *context.eigen_device(); - - functor(place, - &x, - &x_reduce, - &x_grad, - &x_reduce_grad, - broadcast_dim, - broad_cats_times); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index e276ef2082fb6..2ebdd3efa5346 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -18,8 +18,7 @@ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class RepeatInterleaveOp : public framework::OperatorWithKernel { public: @@ -160,8 +159,7 @@ class RepeatInterleaveGradMaker : public framework::SingleGradOpMaker { DECLARE_NO_NEED_BUFFER_VARS_INFERER(RepeatInterleaveGradNoNeedBufferVarsInferer, "X"); -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/rrelu_op.cc b/paddle/fluid/operators/rrelu_op.cc deleted file mode 100644 index 3111ad4e5015d..0000000000000 --- a/paddle/fluid/operators/rrelu_op.cc +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class RReluOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class RReluOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input of RReLU op."); - AddOutput("Out", "The output of RReLU op."); - AddOutput("Noise", "The random sampled RReLU noise.") - .AsIntermediate() - .AsExtra(); - AddAttr("is_test", - "(bool, default false) Set to true for inference only, false " - "for training. Some layers may run faster when this is true.") - .SetDefault(false); - float default_lower = 1. / 8.; - AddAttr("lower", "Lower bound of the uniform distribution.") - .SetDefault(default_lower) - .AddCustomChecker([](const float& lower) { - PADDLE_ENFORCE_EQ(lower >= 0.0f && lower < 1.0f, - true, - phi::errors::InvalidArgument( - "'RRelu_lower' must be between 0.0 and 1.0.")); - }); - float defalut_upper = 1. / 3.; - AddAttr("upper", "Upper bound of the uniform distribution.") - .SetDefault(defalut_upper) - .AddCustomChecker([](const float& upper) { - PADDLE_ENFORCE_EQ(upper > 0.0f && upper <= 1.0f, - true, - phi::errors::InvalidArgument( - "'RRelu_upper' must be between 0.0 and 1.0.")); - }); - AddComment(R"DOC( -RReLU Operator. - -Applies the randomized leaky rectified liner unit function, element-wise, -as described in the paper: - -`Empirical Evaluation of Rectified Activations in Convolutional Network`_. - -The function is defined as: - -.. math:: - \text{RReLU}(x) = - \begin{cases} - x & \text{if } x \geq 0 \\ - ax & \text{ otherwise } - \end{cases} - -where :math:`a` is randomly sampled from uniform distribution -:math:`\mathcal{U}(\text{lower}, \text{upper})`. - - See: https://arxiv.org/pdf/1505.00853.pdf - -)DOC"); - } -}; - -class RReluGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -template -class RReluGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("rrelu_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Noise", this->Output("Noise")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(rrelu, - RReluInferShapeFunctor, - PD_INFER_META(phi::RReluInferMeta)); - -REGISTER_OPERATOR(rrelu, - ops::RReluOp, - ops::RReluOpMaker, - ops::RReluGradOpMaker, - ops::RReluGradOpMaker, - RReluInferShapeFunctor); - -DECLARE_INFER_SHAPE_FUNCTOR(rrelu_grad, - RReluGradInferShapeFunctor, - PD_INFER_META(phi::RReluGradInferMeta)); -REGISTER_OPERATOR(rrelu_grad, ops::RReluGradOp, RReluGradInferShapeFunctor); diff --git a/paddle/fluid/operators/share_data_op.cc b/paddle/fluid/operators/share_data_op.cc index 074ca142c9567..39bb37907f841 100644 --- a/paddle/fluid/operators/share_data_op.cc +++ b/paddle/fluid/operators/share_data_op.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -namespace paddle { -namespace operators { +namespace paddle::operators { class ShareDataOp : public framework::OperatorWithKernel { public: @@ -58,8 +57,7 @@ Return a tensor $Out$ that shares data with the input tensor $X$ and without ten } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc deleted file mode 100644 index 014cf8157d8ea..0000000000000 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "glog/logging.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_type_inference.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/timer.h" -#include "paddle/phi/core/mixed_vector.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" - -namespace paddle { -namespace operators { -class ShuffleBatchOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - phi::errors::NotFound("Input(X) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Seed"), - true, - phi::errors::NotFound("Input(Seed) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - phi::errors::NotFound("Output(Out) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("ShuffleIdx"), - true, - phi::errors::NotFound("Output(ShuffleIdx) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("SeedOut"), - true, - phi::errors::NotFound("Output(SeedOut) should not be null.")); - - ctx->ShareDim("X", "Out"); - ctx->ShareLoD("X", "Out"); - ctx->ShareDim("Seed", "SeedOut"); - ctx->ShareLoD("Seed", "SeedOut"); - ctx->SetOutputDim("ShuffleIdx", common::make_ddim({-1})); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } - - phi::KernelKey GetKernelTypeForVar( - const std::string &var_name, - const phi::DenseTensor &tensor, - const phi::KernelKey &expected_kernel_type) const override { - if (var_name == "Seed") { - return phi::KernelKey(phi::Backend::ALL_BACKEND, - expected_kernel_type.layout(), - expected_kernel_type.dtype()); - } - return framework::OperatorWithKernel::GetKernelTypeForVar( - var_name, tensor, expected_kernel_type); - } -}; - -class ShuffleBatchOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(phi::DenseTensor) The input tensor of shuffle_batch op."); - AddInput("Seed", "(phi::DenseTensor) The input seed tensor."); - AddAttr( - "startup_seed", - "If input tensor 'Seed' is not initialized, the 'startup_seed' " - "will be used to replace it. The seed after shuffle batch will " - "be saved in 'SeedOut'. ") - .SetDefault(0); - AddOutput("Out", - "(phi::DenseTensor) The output tensor of shuffle_batch op."); - AddOutput("ShuffleIdx", "(Tensor) Record forword shuffle order"); - AddOutput("SeedOut", "(phi::DenseTensor) Saved new generated seed."); - AddComment(R"DOC( -Shuffle Batch Operator. - -This operator is used to shuffle input $X$'s elements. - -There is 2 input. The product of input dims (except last dim) numbers of elements will be shuffled. $Seed$ is tensor of seed. - -There are 3 outputs. $Out$ is shuffled tensor of input. $ShuffleIdx$ is the tensor used to record shuffle order. $SeedOut$ is same tensor of $Seed$. -)DOC"); - } -}; - -class ShuffleBatchOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("ShuffleIdx"), - true, - phi::errors::NotFound("Input(ShuffleIdx) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput(framework::GradVarName("Out")), - true, - phi::errors::NotFound("Grad Input(Out) should not be null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput(framework::GradVarName("X")), - true, - phi::errors::NotFound("Grad Output(X) should not be null")); - - ctx->ShareDim(framework::GradVarName("Out"), framework::GradVarName("X")); - ctx->ShareLoD(framework::GradVarName("Out"), framework::GradVarName("X")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -template -class ShuffleBatchGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("shuffle_batch_grad"); - op->SetInput("ShuffleIdx", this->Output("ShuffleIdx")); - op->SetAttrMap(this->Attrs()); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(shuffle_batch, - ops::ShuffleBatchOp, - ops::ShuffleBatchOpMaker, - ops::ShuffleBatchGradOpMaker, - ops::ShuffleBatchGradOpMaker); -REGISTER_OPERATOR(shuffle_batch_grad, ops::ShuffleBatchOpGrad); diff --git a/paddle/fluid/operators/sync_batch_norm_op.cc b/paddle/fluid/operators/sync_batch_norm_op.cc index 2fc8268f71086..103b9d550f4c5 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cc +++ b/paddle/fluid/operators/sync_batch_norm_op.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/batch_norm_op.h" -namespace paddle { -namespace operators { +namespace paddle::operators { template class SyncBatchNormGradMaker : public framework::SingleGradOpMaker { public: @@ -46,8 +45,7 @@ class SyncBatchNormGradMaker : public framework::SingleGradOpMaker { } }; -} // namespace operators -} // namespace paddle +} // namespace paddle::operators namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc deleted file mode 100644 index 6e3804fcb0a92..0000000000000 --- a/paddle/fluid/operators/tdm_child_op.cc +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/fluid/operators/tdm_child_op.h" - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace operators { -class TDMChildOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "X(Tensor), dtype support int32/int64, X variable is the " - "node id of TDM-Tree"); - AddInput( - "TreeInfo", - "TreeInfo(Tensor), dtype support int32/int64, it stores the node " - "information in the following format: item_id(shape=1), " - "layer_id(shape=1), parent_id(shape=1), child_id(shape=child_nums)"); - AddAttr("child_nums", - "child_nums(int)" - "The child nums of one node, if the node hasn't enough child, " - "it should padding 0 until child nums equal to child_nums"); - AddOutput("Child", - "Return the children's node_id of input node, " - "if input don't have child, return 0"); - AddOutput("LeafMask", - "LeafMask has the same shape with Child" - "If child is leaf node, LeafMask value = 1, else = 0"); - AddAttr("dtype", - "(int, default INT32) " - "Output data type.") - .SetDefault(2); - AddComment(R"DOC(" - **Tdm Child** - According to the input node_id on the given tree, return the corresponding child node_id and - whether child is a leaf node by LeafMask.")DOC"); - } -}; - -class TDMChildOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - phi::errors::InvalidArgument( - "Inputs(X) of TdmChild should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), - true, - phi::errors::InvalidArgument( - "Inputs(TreeInfo) of TdmChild should not be null.")); - - int child_nums = ctx->Attrs().Get("child_nums"); - PADDLE_ENFORCE_GT( - child_nums, - 0, - phi::errors::InvalidArgument( - "ValueError: The value of the 'child_nums' must greater than 0. " - "But received child_nums value = %d, ", - child_nums)); - - auto info_dims = ctx->GetInputDim("TreeInfo"); - auto input_dims = ctx->GetInputDim("X"); - - PADDLE_ENFORCE_EQ( - info_dims.size(), - 2, - phi::errors::InvalidArgument( - "ShapeError: The dimensions of the 'tree info' must be 2. " - "But received tree info's dimensions = %d, " - "tree info's shape = [%s].", - info_dims.size(), - info_dims)); - - auto output_dims = common::vectorize(input_dims); - output_dims.push_back(child_nums); - ctx->SetOutputDim("Child", common::make_ddim(output_dims)); - ctx->SetOutputDim("LeafMask", common::make_ddim(output_dims)); - - if (ctx->GetOutputsVarType("Child")[0] == - framework::proto::VarType::LOD_TENSOR) { - ctx->ShareLoD("X", /*->*/ "Child"); - ctx->ShareLoD("X", /*->*/ "LeafMask"); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - tdm_child, - ops::TDMChildOp, - ops::TDMChildOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h deleted file mode 100644 index b645566736a9d..0000000000000 --- a/paddle/fluid/operators/tdm_child_op.h +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/common/flags.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/mixed_vector.h" - -namespace paddle { -namespace operators {} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/tdm_sampler_op.cc b/paddle/fluid/operators/tdm_sampler_op.cc deleted file mode 100644 index db2dd6b4ced37..0000000000000 --- a/paddle/fluid/operators/tdm_sampler_op.cc +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace operators { - -class TDMSamplerOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "X(Tensor), Input variable which" - "mapping the leaf node idx of tdm tree," - "dtype support int32/int64"); - AddInput("Travel", - "Travel(Tensor), must has the same dtype with Layer" - "Contains path information of all leaf nodes to root node," - " dtype support int32/64"); - AddInput("Layer", - "Layer(Tensor), must has the same dtype with Travel " - "Indicates which nodes are in each layer"); - AddAttr("output_positive", - "output_positive(bool)" - "Whether positive samples are included in the output") - .SetDefault(true); - AddAttr>( - "neg_samples_num_list", - "neg_samples_num_list(python:list[int], C++:vector)" - "The num of negative samples in each layer") - .SetDefault({}); - AddAttr>("layer_offset_lod", - "offset lod information of Layer") - .SetDefault({}); - AddAttr("seed", - "(int) The seed used in sampler. If it is 0, " - "the sampler will generate a seed randomly.") - .SetDefault(0); - AddAttr("dtype", - "(int, default INT32) " - "Output data type.") - .SetDefault(2); - AddOutput("Out", - "Sampling result lodTensor, with shape [batch_size, layer_num, " - "neg_num_of_layer]"); - AddOutput("Labels", - "Labels of sampling result, has the same shape with Out." - "pos samples mapping value 1, neg sample mapping value 0") - .AsDispensable(); - AddOutput( - "Mask", - "Padding flag of Sampling result, if sampling res comes from padding," - "it will be 0, else 1, lodTensor, with shape [batch_size, " - "layer_num, neg_num_of_layer]"); - AddComment(R"DOC(" - **TDM Sampler** - According to the input positive samples at leaf node, do negative sampling layer by layer on the given tree.")DOC"); - } -}; - -class TDMSamplerOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - phi::errors::InvalidArgument( - "Inputs(Input) of TdmSampler should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Travel"), - true, - phi::errors::InvalidArgument( - "Inputs(Travel) of TdmSampler should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Layer"), - true, - phi::errors::InvalidArgument( - "Inputs(Layer) of TdmSampler should not be null.")); - auto neg_samples_num_vec = - ctx->Attrs().Get>("neg_samples_num_list"); - auto output_positive_flag = ctx->Attrs().Get("output_positive"); - - int64_t sample_res_length = 0; - for (auto sample_nums : neg_samples_num_vec) { - sample_res_length += sample_nums + (int64_t)output_positive_flag; - } - - auto input_dims = ctx->GetInputDim("X"); - auto ddim = common::make_ddim({-1, sample_res_length}); - if (ctx->IsRuntime()) { - auto output_dims = common::vectorize(input_dims); - auto batch_size = output_dims[0]; - ctx->SetOutputDim("Out", - common::make_ddim({batch_size, sample_res_length})); - ctx->SetOutputDim("Labels", - common::make_ddim({batch_size, sample_res_length})); - ctx->SetOutputDim("Mask", - common::make_ddim({batch_size, sample_res_length})); - } else { - ctx->SetOutputDim("Out", ddim); - ctx->SetOutputDim("Labels", ddim); - ctx->SetOutputDim("Mask", ddim); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - tdm_sampler, - ops::TDMSamplerOp, - ops::TDMSamplerOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc deleted file mode 100644 index 19334ca2dad6a..0000000000000 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/transfer_layout_op.h" - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace framework { -class OpDesc; -class InferShapeContext; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -class TransferLayoutOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - // kernel's device type is decided by input tensor place - auto *in = ctx.InputVar("X"); - auto *in_tensor = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in); - // NOTE(zhiqiu): hot fix, allow empty tensor of kMKLDNN layout to run this - // op - if (in_tensor->layout() != DataLayout::ONEDNN) { - PADDLE_ENFORCE_EQ(in_tensor->IsInitialized(), - true, - phi::errors::PreconditionNotMet( - "The tensor of Input(X) is not initialized.")); - } - auto place = - in_tensor->IsInitialized() ? in_tensor->place() : phi::CPUPlace(); - phi::DataType dtype = in_tensor->IsInitialized() ? in_tensor->dtype() - : phi::DataType::FLOAT32; - return phi::KernelKey(phi::TransToProtoVarType(dtype), place); - } - - phi::KernelKey GetKernelTypeForVar( - const std::string &var_name, - const phi::DenseTensor &tensor, - const phi::KernelKey &expected_kernel_type) const override { - return phi::KernelKey(phi::Backend::ALL_BACKEND, - expected_kernel_type.layout(), - expected_kernel_type.dtype()); - } -}; - -class TransferLayoutInferVarType : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - ctx->SyncTypeAndDataType("X", "Out"); - } -}; - -class TransferLayoutKernel { - public: - void operator()(const framework::ExecutionContext &ctx) const { - auto *x = ctx.InputVar("X"); - auto *out = ctx.OutputVar("Out"); - auto &dev_ctx = ctx.device_context(); - auto src_layout = ctx.Attr("src_layout"); - auto dst_layout = ctx.Attr("dst_layout"); - auto input_name = ctx.InputName("X"); - TransferLayoutFunctor( - x, out, dev_ctx, src_layout, dst_layout, input_name)(); - } -}; - -class TransferLayoutOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(phi::DenseTensor) The input Tensor"); - AddOutput("Out", - "(phi::DenseTensor) The Output Tensor with desired layout"); - // NOTE(zhiqiu): in most case, the src_layout is not needed, the op can use - // the layout - // of input X. However, in some mkldnn kernel, the src layout computed by - // GetKernelTypeForVar is different with the layout of tensor X. - AddAttr("src_layout", - "kAnyLayout = 0, kNHWC = 1, kNCHW = 2, kMKLDNN = 3, default " - "-1 means unspecified and use the tensor's layout.") - .SetDefault(-1); - AddAttr("dst_layout", - "kAnyLayout = 0, kNHWC = 1, kNCHW = 2, kMKLDNN = 3"); - AddComment(R"DOC( - TransferLayout Operator)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(transfer_layout, - TransferLayoutInferShapeFunctor, - PD_INFER_META(phi::TransferLayoutInferMeta)); -REGISTER_OPERATOR( - transfer_layout, - ops::TransferLayoutOp, - ops::TransferLayoutOpProtoMaker, - ops::TransferLayoutInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - TransferLayoutInferShapeFunctor); - -REGISTER_OP_VERSION(transfer_layout) - .AddCheckpoint(R"ROC(refine transfer_layout, add src_layout attribute)ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "src_layout", - "(int, the layout of the input tensor", - -1)); diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h deleted file mode 100644 index 1b4ef2d1b5abb..0000000000000 --- a/paddle/fluid/operators/transfer_layout_op.h +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/framework/data_transform.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/platform/device_context.h" - -namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace framework { -class Variable; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -using DataLayout = phi::DataLayout; - -class TransferLayoutFunctor { - public: - TransferLayoutFunctor(const framework::Variable *in, - framework::Variable *out, - const platform::DeviceContext &dev_ctx, - const int src_layout, - const int dst_layout, - std::string in_name) - : in_(in), - out_(out), - dev_ctx_(dev_ctx), - src_layout_(src_layout), - dst_layout_(dst_layout), - in_name_(in_name) {} - - void operator()() const { - auto &in_tensor = *framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_); - phi::DenseTensor out_tensor; - - auto out_layout = static_cast(dst_layout_); - out_tensor.set_layout(out_layout); - -#ifdef PADDLE_WITH_DNNL - // NOTE(zhiqiu): to handle the special case in ApplyDataTransform() in - // data_transfer.cc - auto in_layout = static_cast(src_layout_); - auto *tensor_out = out_->GetMutable(); - VLOG(4) << in_layout << "->" << out_layout << " " << in_tensor.layout(); - if (!in_tensor.IsInitialized() && in_layout == DataLayout::ONEDNN && - out_layout == DataLayout::kNHWC) { - tensor_out->Resize(in_tensor.dims()); - tensor_out->set_layout(out_layout); - phi::funcs::MatchShapeToLayout(tensor_out, in_layout, out_layout); - return; - } - if (in_layout == DataLayout::ONEDNN || out_layout == DataLayout::ONEDNN) { - PADDLE_ENFORCE_NE( - in_layout, - out_layout, - phi::errors::PreconditionNotMet( - "No layout transform needed between two oneDNN OPKernels.")); - - if (in_layout != DataLayout::ONEDNN && out_layout == DataLayout::ONEDNN) { - // Case1 - transform from Non-ONEDNN OPKernel to ONEDNN OPKernel - // Just set layout/format. No real transform occur - - auto out_format = phi::funcs::OneDNNFormatForSize( - in_tensor.dims().size(), phi::funcs::ToOneDNNFormat(in_layout)); - out_tensor.ShareDataWith(in_tensor); - // For NHWC data we need reshape of tensors as MKL-DNN - // is expecting NHWC dims description order - if (in_layout == DataLayout::kNHWC) { - VLOG(4) << "kNHWC"; - phi::funcs::MatchShapeToLayout(&out_tensor, in_layout, out_layout); - phi::OneDNNContext::tls().set_cur_paddle_data_layout(in_layout); - } - auto out_tz = out_tensor.dims().size() == 0 - ? std::vector{1} - : common::vectorize(out_tensor.dims()); - dnnl::memory::data_type in_type = - phi::funcs::ToOneDNNDataType(in_tensor.dtype()); - - dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format); - out_tensor.set_mem_desc(out_mem_desc); - } else { - auto target_layout = - phi::OneDNNContext::tls().get_cur_paddle_data_layout(); - // NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in - // fetch_op.cc - if (out_layout == DataLayout::kNCHW && - in_name_ == framework::GradVarName("Filter")) { - target_layout = out_layout; - } - VLOG(4) << "TransDataLayoutFromOneDNN: " << in_layout << "->" - << target_layout; - // Case2 - transform from ONEDNN OPKernel to Non-ONEDNN OPKernel - // Do transform via ONEDNN lib - phi::funcs::TransDataLayoutFromOneDNN(in_layout, - target_layout, - in_tensor, - &out_tensor, - dev_ctx_.GetPlace()); - } - } else { - // Case3 - transform between Non-ONEDNN OPKernels - TransDataLayout(dev_ctx_, in_tensor, &out_tensor); - } -#else - // Case3 - transform between Non-ONEDNN OPKernels - TransDataLayout(dev_ctx_, in_tensor, &out_tensor); -#endif - framework::SetTensorToVariable(*in_, out_tensor, out_); - } - - private: - void TransDataLayout(const platform::DeviceContext &dev_ctx, - const phi::DenseTensor &in, - phi::DenseTensor *out) const { - PADDLE_ENFORCE_EQ( - common::arity(in.dims()), - 4, - phi::errors::InvalidArgument( - "Input dimension arity only can be 4, the input dimension is %s.", - in.dims())); - - auto src_dim = in.dims(); - std::vector dst_dim; - - auto axis = framework::GetAxis(in.layout(), out->layout()); - dst_dim.resize(axis.size()); - for (size_t i = 0; i < axis.size(); i++) { - dst_dim[i] = src_dim[axis[i]]; - } - - out->Resize(common::make_ddim(dst_dim)); - out->mutable_data(in.place(), in.type()); - - framework::VisitDataType( - framework::TransToProtoVarType(in.dtype()), - framework::CastDataLayout(&dev_ctx, axis, in, out)); - } - - const framework::Variable *in_; - framework::Variable *out_; - const platform::DeviceContext &dev_ctx_; - const int src_layout_; - const int dst_layout_; - std::string in_name_; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt index f9b7948de3329..32dda6620d26a 100644 --- a/paddle/fluid/pir/dialect/CMakeLists.txt +++ b/paddle/fluid/pir/dialect/CMakeLists.txt @@ -79,6 +79,9 @@ set(op_src_files_tmp set(op_vjp_src_file_tmp ${op_vjp_source_file_tmp}) +set(op_cc_split_num 4) +set(bwd_op_cc_split_num 2) + # Auto code gen execute_process( COMMAND ${PYTHON_EXECUTABLE} ${op_parse_file} --op_yaml_path @@ -95,15 +98,22 @@ execute_process( --op_compat_yaml_file ${op_compat_yaml_file} --namespaces ${op_namespace} --dialect_name ${dialect_name} --op_def_h_file ${op_header_file_tmp} --op_info_file ${op_info_file_tmp} --op_def_cc_file ${op_src_files_tmp} - --op_vjp_cc_file ${op_vjp_src_file_tmp} --with_distributed - ${WITH_DISTRIBUTE}) + --op_vjp_cc_file ${op_vjp_src_file_tmp} --op_cc_split_num + ${op_cc_split_num} --bwd_op_cc_split_num ${bwd_op_cc_split_num} + --with_distributed ${WITH_DISTRIBUTE}) + +set(split_op_source_files + ${PIR_DIALECT_BINARY_DIR}/pd_op1.cc ${PIR_DIALECT_BINARY_DIR}/pd_op2.cc + ${PIR_DIALECT_BINARY_DIR}/pd_op3.cc ${PIR_DIALECT_BINARY_DIR}/pd_op4.cc) +set(split_bwd_op_source_files ${PIR_DIALECT_BINARY_DIR}/pd_op_bwd1.cc + ${PIR_DIALECT_BINARY_DIR}/pd_op_bwd2.cc) set(generated_files_pd_op "${op_header_file}" "${op_info_file}" - "${op_source_file}" + "${split_op_source_files}" + "${split_bwd_op_source_files}" "${op_vjp_source_file}" - "${bwd_op_source_file}" "${fused_op_source_file}" "${bwd_fused_op_source_file}" "${pir_op_source_file}" @@ -177,7 +187,7 @@ set(python_c_header_file_tmp ${python_c_header_file}.tmp) set(python_c_source_file_tmp ${python_c_source_file}.tmp) set(trimmed_op_yaml_files - ${op_fwd_yaml},${op_bwd_yaml},${fused_op_fwd_yaml},${fused_op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml} + ${op_fwd_yaml},${op_bwd_yaml},${fused_op_fwd_yaml},${fused_op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml},${pir_op_fwd_sparse_yaml},${pir_op_bfd_sparse_yaml} ) execute_process( @@ -247,8 +257,8 @@ set(op_dialect_srcs ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/op_attribute.cc ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/op_type.cc ${op_info_file} - ${op_source_file} - ${bwd_op_source_file} + ${split_op_source_files} + ${split_bwd_op_source_files} ${fused_op_source_file} ${bwd_fused_op_source_file} ${pir_op_source_file} diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc index 4d921bed45f4b..195813f87243e 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc @@ -25,8 +25,7 @@ #include "paddle/pir/include/core/value.h" #include "paddle/utils/flat_hash_map.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { pir::Value shard_tensor( const pir::Value& x, @@ -64,5 +63,4 @@ pir::Value reshard(const pir::Value& x, return reshard_op.result(0); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc index fc261efe9e04c..73dcc128d1fb8 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc @@ -25,8 +25,7 @@ #include "paddle/pir/include/core/builtin_op.h" #include "paddle/pir/include/core/ir_context.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { const char* ShardTensorOp::attributes_name[1] = {"op_dist_attr"}; // NOLINT const char* ReshardOp::attributes_name[1] = {"op_dist_attr"}; // NOLINT @@ -346,8 +345,7 @@ void ReshardOp::Build(pir::Builder& builder, ::pir::PassStopGradientsDefaultly(argument); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ShardTensorOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ReshardOp) diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc index 505b178a452b0..3ec0fd959cc38 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc @@ -17,8 +17,7 @@ #include "paddle/common/enforce.h" #include "paddle/pir/include/core/operation.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { bool AllInputAreDist(const std::vector& inputs) { for (auto value : inputs) { @@ -133,6 +132,7 @@ pir::Attribute CvtToPirAttr(const phi::distributed::ArgDistAttr& dist_attr) { auto& vec = PADDLE_GET_CONST(std::vector, dist_attr); std::vector array; + array.reserve(vec.size()); for (auto& attr : vec) { array.push_back(TensorDistAttribute::get(ctx, attr.process_mesh(), @@ -159,10 +159,10 @@ pir::Attribute CreateReplicatedDistAttr(pir::Type prim_type, } return nullptr; } -pir::Type CvtToPirDistType(pir::Type prim_type, pir::Attribute dist_attr) { - if (!prim_type) return nullptr; +pir::Type CvtToPirDistType(pir::Type global_type, pir::Attribute dist_attr) { + if (!global_type) return nullptr; auto ctx = pir::IrContext::Instance(); - if (auto dense_tensor_type = prim_type.dyn_cast()) { + if (auto dense_tensor_type = global_type.dyn_cast()) { auto tensor_dist_attr = dist_attr.dyn_cast(); if (!tensor_dist_attr) { VLOG(0) << "Convert dense tensor type to dist type with attribute {" @@ -172,7 +172,7 @@ pir::Type CvtToPirDistType(pir::Type prim_type, pir::Attribute dist_attr) { "with non-empty TensorDistAttr")); } return DistDenseTensorType::get(ctx, dense_tensor_type, tensor_dist_attr); - } else if (auto vec_type = prim_type.dyn_cast()) { + } else if (auto vec_type = global_type.dyn_cast()) { auto array_attr = dist_attr.dyn_cast(); if (!array_attr) { VLOG(0) << "Convert vector type to dist type with attribute {" @@ -192,8 +192,8 @@ pir::Type CvtToPirDistType(pir::Type prim_type, pir::Attribute dist_attr) { } return pir::VectorType::get(ctx, dist_vec_type); } else { - VLOG(0) << "Convert type{" << prim_type << "} to dist type with attribute {" - << dist_attr << "}."; + VLOG(0) << "Convert type{" << global_type + << "} to dist type with attribute {" << dist_attr << "}."; PADDLE_THROW(common::errors::InvalidArgument( "Currently only support convert dense_tensor_type r vector type to " "dist.")); @@ -225,5 +225,4 @@ void CopyLeafOpToMesh(pir::Value value, ProcessMeshAttribute mesh_attr) { } } } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h index a50331a8ea395..10f76a86e600d 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h @@ -37,7 +37,7 @@ pir::Attribute CvtToPirAttr(const phi::distributed::ArgDistAttr& dist_attr); pir::Attribute CreateReplicatedDistAttr(pir::Type prim_type, ProcessMeshAttribute mesh); -pir::Type CvtToPirDistType(pir::Type prim_type, pir::Attribute dist_attr); +pir::Type CvtToPirDistType(pir::Type global_type, pir::Attribute dist_attr); /// /// When the following conditions are met: diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc index d1b70c24a1c56..5d03f093175cf 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/pir/dialect/distributed/ir/type_storage.h" #include "paddle/pir/include/core/ir_context.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { pir::DenseTensorType DistDenseTensorType::dense_tensor_type() const { return storage()->dense_tensor_type; @@ -69,7 +68,6 @@ pir::DenseTensorType DistDenseTensorType::local_type() const { offset()); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::DistDenseTensorType) diff --git a/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc index caccbe69ed091..bbfe95ea4ffa6 100644 --- a/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc +++ b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc @@ -38,8 +38,7 @@ using paddle::dialect::DistDenseTensorType; COMMON_DECLARE_bool(print_ir); -namespace paddle { -namespace dialect { +namespace paddle::dialect { pir::Type CastToLocalType(pir::Type type) { if (auto dist_type = type.dyn_cast()) { @@ -164,5 +163,4 @@ void DistToDensePass(pir::Program* prog) { } } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/distributed/transforms/fuse_allreduce_split_to_reducescatter_pass.cc b/paddle/fluid/pir/dialect/distributed/transforms/fuse_allreduce_split_to_reducescatter_pass.cc index 4191eaa4bce50..5d1a9b87431f1 100644 --- a/paddle/fluid/pir/dialect/distributed/transforms/fuse_allreduce_split_to_reducescatter_pass.cc +++ b/paddle/fluid/pir/dialect/distributed/transforms/fuse_allreduce_split_to_reducescatter_pass.cc @@ -35,7 +35,11 @@ class FusedAllReduceSplitPattern : public paddle::drr::DrrPatternBase { const auto &c_allreduce_sum_ = pat.Op(paddle::dialect::CAllreduceSum_Op::name(), {{"ring_id", pat.Attr("ring_id")}, - {"use_calc_stream", pat.Attr("use_calc_stream")}}); + {"use_calc_stream", pat.Attr("use_calc_stream")}, + {"execution_stream", pat.Attr("execution_stream")}, + {"force_record_event", pat.Attr("force_record_event")}, + {"event_to_record", pat.Attr("event_to_record")}, + {"events_to_wait", pat.Attr("events_to_wait")}}); const auto &assign = pat.Op(paddle::dialect::AssignOp::name()); const auto &full = pat.Op(paddle::dialect::FullOp::name()); const auto &split_with_num = pat.Op(paddle::dialect::SplitWithNumOp::name(), @@ -74,7 +78,11 @@ class FusedAllReduceSplitPattern : public paddle::drr::DrrPatternBase { res.Op(paddle::dialect::CReducescatterOp::name(), {{"ring_id", pat.Attr("ring_id")}, {"nranks", pat.Attr("num")}, - {"use_calc_stream", pat.Attr("use_calc_stream")}}); + {"use_calc_stream", pat.Attr("use_calc_stream")}}, + {{"execution_stream", pat.Attr("execution_stream")}, + {"force_record_event", pat.Attr("force_record_event")}, + {"event_to_record", pat.Attr("event_to_record")}, + {"events_to_wait", pat.Attr("events_to_wait")}}); c_reducescatter({&res.Tensor("input_grad_partial")}, {&res.Tensor("out")}); } diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc index f1d5b85e357d1..01687ca360257 100644 --- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc +++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc @@ -19,8 +19,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/pir/include/core/builtin_attribute.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { const char* PhiKernelOp::attributes_name[attributes_num] = { // NOLINT "op_name", @@ -260,8 +259,7 @@ phi::KernelKey OneDNNLegacyKernelOp::kernel_key() { } #endif -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp) diff --git a/paddle/fluid/pir/dialect/op_generator/api_gen.py b/paddle/fluid/pir/dialect/op_generator/api_gen.py index f7cbf42580b9d..521e9ea90bbf0 100644 --- a/paddle/fluid/pir/dialect/op_generator/api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/api_gen.py @@ -114,7 +114,7 @@ }} """ -AMP_OPTIONAL_INPUTS_TEMPLATE = """if ({optional_input}) amp_values_vector.push_back({vec_optional_input}); +AMP_OPTIONAL_INPUTS_TEMPLATE = """if ({optional_input}) {{ amp_values_vector.push_back({vec_optional_input}); }} """ AMP_NEW_INPUTS_TEMPLATE = """auto new_{input} = paddle::imperative::{cast_func}("{input}", {input}, amp_dst_dtype, op_name); diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py index 36d3a26f680a0..ed4b1bae54650 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py @@ -14,6 +14,7 @@ import argparse import logging +import math import os import pathlib import sys @@ -1130,6 +1131,21 @@ def get_mutable_attribute_grad_semantic(op_info, op_info_items): return mutable_attribute_grad_semantics +def split_ops(op_info_items: dict, cc_file, split_nums): + op_list = list(op_info_items.keys()) + ops_max_size = math.ceil(len(op_list) / split_nums) + split_op_info_items = [] + for i in range(split_nums): + split_op_info_items.append({}) + for i, op_name in enumerate(op_list): + list_idx = math.ceil((i + 1) / ops_max_size) - 1 + split_op_info_items[list_idx][op_name] = op_info_items[op_name] + split_cc_files = [] + for i in range(split_nums): + split_cc_files.append(cc_file.replace(".cc", f"{i + 1}.cc")) + return split_op_info_items, split_cc_files + + def GenOneDnnExtraAttrsDefaultValue(onednn_extra_args): INTARRAY_STR_TEMPLATE = """ pir::Attribute attr_{attr_name} = {op_attribute_type}::get(pir::IrContext::Instance(), phi::IntArray({attr})); """ @@ -2080,6 +2096,8 @@ def OpGenerator( op_info_file, op_def_cc_file, op_vjp_cc_file, + op_cc_split_num, + bwd_op_cc_split_num, onednn_yaml_file, ops_onednn_extra_yaml_file, ): @@ -2126,9 +2144,11 @@ def OpGenerator( op_infos = [] all_op_info_items = {} + new_op_def_cc_file = [] first_file = True onednn_only_op_list = [] - for yaml_file in op_yaml_files: + for idx in range(len(op_yaml_files)): + yaml_file = op_yaml_files[idx] op_yaml_items = [] with open(yaml_file, "r") as f: ops = yaml.safe_load(f) @@ -2194,13 +2214,37 @@ def OpGenerator( key_suffix = '_sp' if item.is_sparse_op else '' op_info_items[op['name'] + key_suffix] = item all_op_info_items[op['name'] + key_suffix] = item - op_infos.append(op_info_items) + + if dialect_name != "onednn_op": + cc_file = op_def_cc_file[idx] + if ( + yaml_file.split('/')[-1] == "ops.parsed.yaml" + and op_cc_split_num is not None + ): + split_op_info_items, split_cc_files = split_ops( + op_info_items, cc_file, op_cc_split_num + ) + op_infos.extend(split_op_info_items) + new_op_def_cc_file.extend(split_cc_files) + elif ( + yaml_file.split('/')[-1] == "backward.parsed.yaml" + and bwd_op_cc_split_num is not None + ): + split_op_info_items, split_cc_files = split_ops( + op_info_items, cc_file, bwd_op_cc_split_num + ) + op_infos.extend(split_op_info_items) + new_op_def_cc_file.extend(split_cc_files) + else: + op_infos.append(op_info_items) + new_op_def_cc_file.append(cc_file) if first_file: first_file = False if dialect_name == "onednn_op": op_infos = [all_op_info_items] + new_op_def_cc_file = op_def_cc_file # (3) auto code gen op_list_strs = [] declare_type_id_strs = [] @@ -2329,7 +2373,7 @@ def OpGenerator( f.write(op_info_str) # (6) write to files for xx_op.cc.tmp - for id in range(len(op_def_cc_file)): + for id in range(len(new_op_def_cc_file)): source_file_str = source_file_strs[id] for name in reversed(namespaces): source_file_str = NAMESPACE_GARD_TEMPLATE.format( @@ -2349,7 +2393,7 @@ def OpGenerator( input=source_file_str, define_type_id=define_type_id_strs[id], ) - with open(op_def_cc_file[id], 'w') as f: + with open(new_op_def_cc_file[id], 'w') as f: f.write(source_file_str) # (6) write to files for xx_vjp_op.cc.tmp @@ -2381,6 +2425,8 @@ def ParseArguments(): parser.add_argument('--op_info_file', type=str) parser.add_argument('--op_def_cc_file', type=str) parser.add_argument('--op_vjp_cc_file', type=str) + parser.add_argument('--op_cc_split_num', type=int) + parser.add_argument('--bwd_op_cc_split_num', type=int) parser.add_argument('--onednn_yaml_file', type=str) parser.add_argument('--ops_onednn_extra_yaml_file', type=str) parser.add_argument('--with_distributed', type=strtobool) @@ -2403,6 +2449,8 @@ def ParseArguments(): op_info_file = args.op_info_file op_def_cc_files = args.op_def_cc_file.split(",") op_vjp_cc_file = args.op_vjp_cc_file + op_cc_split_num = args.op_cc_split_num + bwd_op_cc_split_num = args.bwd_op_cc_split_num onednn_yaml_file = args.onednn_yaml_file ops_onednn_extra_yaml_file = args.ops_onednn_extra_yaml_file @@ -2417,6 +2465,8 @@ def ParseArguments(): op_info_file, op_def_cc_files, op_vjp_cc_file, + op_cc_split_num, + bwd_op_cc_split_num, onednn_yaml_file, ops_onednn_extra_yaml_file, ) diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py index 090aab4e3c4ed..1eb784ed8c0e9 100644 --- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py @@ -59,15 +59,36 @@ }} }}""" + STATIC_ONLY_FUNCTION_IMPL_TEMPLATE = """ static PyObject *{name}(PyObject *self, PyObject *args, PyObject *kwargs) {{ VLOG(6) << "Call static_api_{name}"; return static_api_{name}(self, args, kwargs); }}""" +SPARSE_FUNCTION_IMPL_TEMPLATE = """ +static PyObject *sparse_{name}(PyObject *self, PyObject *args, PyObject *kwargs) {{ + if (egr::Controller::Instance().GetCurrentTracer() == nullptr) {{ + VLOG(6) << "Call static_api_{name}"; + return static_api_{name}{name_suffix}(self, args, kwargs); + }} else {{ + VLOG(6) << "Call eager_api_{name}"; + return sparse::eager_api_{name}(self, args, kwargs); + }} +}}""" + +SPARSE_STATIC_ONLY_FUNCTION_IMPL_TEMPLATE = """ +static PyObject *sparse_{name}(PyObject *self, PyObject *args, PyObject *kwargs) {{ + VLOG(6) << "Call static_api_{name}"; + return static_api_{name}{name_suffix}(self, args, kwargs); +}}""" + OPS_API_TEMPLATE = """ {{"{name}", (PyCFunction)(void (*)(void)){name}, METH_VARARGS | METH_KEYWORDS, "C++ interface function for {name}."}},""" +SPARSE_OPS_API_TEMPLATE = """ +{{"sparse_{name}", (PyCFunction)(void (*)(void))sparse_{name}, METH_VARARGS | METH_KEYWORDS, "C++ interface function for sparse_{name}."}},""" + NEED_GEN_STATIC_ONLY_APIS = [ 'c_allreduce_avg_', 'c_reduce_avg', @@ -158,6 +179,7 @@ 'fused_adam_', 'fused_batch_norm_act_', 'fused_bn_add_activation_', + 'fused_elemwise_activation', 'fused_elemwise_add_activation', 'fused_scale_bias_relu_conv_bn', 'fused_scale_bias_add_relu', @@ -168,6 +190,8 @@ 'fused_elementwise_div', 'fused_elementwise_mul', 'fused_elementwise_sub', + 'fusion_group', + 'fusion_lstm', 'fusion_seqpool_cvm_concat', 'nce', 'lars_momentum', @@ -240,9 +264,17 @@ def _gen_one_function_impl(self, name): else: return FUNCTION_IMPL_TEMPLATE.format(name=name) + def _gen_sparse_one_function_impl(self, name, name_suffix): + return SPARSE_FUNCTION_IMPL_TEMPLATE.format( + name=name, name_suffix=name_suffix + ) + def _gen_one_ops_api(self, name): return OPS_API_TEMPLATE.format(name=name) + def _gen_sparse_one_ops_api(self, name): + return SPARSE_OPS_API_TEMPLATE.format(name=name) + def gen_cpp_file( self, op_yaml_files, op_compat_yaml_file, namespaces, cpp_file_path ): @@ -255,22 +287,15 @@ def gen_cpp_file( for op_name in op_info.op_phi_name: if self._need_skip(op_info, op_name): continue - sparse_op_inplace_name_suffix = '' - sparse_op_name_suffix = '' - if op_name[-1] == "_": - function_impl_str += self._gen_one_function_impl( - op_name + sparse_op_inplace_name_suffix - ) - ops_api_str += self._gen_one_ops_api( - op_name + sparse_op_inplace_name_suffix + if op_info.is_sparse_op: + op_name_suffix = "sp_" if op_name[-1] == "_" else "_sp" + function_impl_str += self._gen_sparse_one_function_impl( + op_name, op_name_suffix ) + ops_api_str += self._gen_sparse_one_ops_api(op_name) else: - function_impl_str += self._gen_one_function_impl( - op_name + sparse_op_name_suffix - ) - ops_api_str += self._gen_one_ops_api( - op_name + sparse_op_name_suffix - ) + function_impl_str += self._gen_one_function_impl(op_name) + ops_api_str += self._gen_one_ops_api(op_name) inner_body = NAMESPACE_INNER_TEMPLATE.format( function_impl=function_impl_str, ops_api=ops_api_str diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc index 0aec58d385311..1dcb931607f13 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc @@ -17,6 +17,7 @@ #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h" namespace cinn::dialect { +using paddle::dialect::details::CreateShapeOrDataForXShape; bool BroadcastOpInferSymbolicShape( pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { @@ -192,6 +193,11 @@ bool ReshapeOpInferSymbolicShape( }(); infer_context->SetShapeOrDataForValue(op->result(0), shape_data); + // NOTE(Aureliue84): Parse XShape symbolic expression which is used for + // backward process. It will be removed after normolizing ReshapeGrad(out, + // xshape) into ReshapeGrad(out, x). + infer_context->SetShapeOrDataForValue(op->result(1), + CreateShapeOrDataForXShape(x_dim_expr)); return true; } diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc index 7b6cc088a9c3b..61dbdff08e064 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc @@ -99,6 +99,18 @@ bool ReduceInferDim(pir::Operation *op, return true; } +symbol::ShapeOrDataDimExprs CreateShapeOrDataForXShape( + const symbol::ShapeOrDataDimExprs &x_dim_exprs) { + const auto InsertZeros = + [](const std::vector &dims) -> decltype(auto) { + auto out_dims = dims; + out_dims.insert(out_dims.begin(), 0); + return out_dims; + }; + const auto &x_dims = x_dim_exprs.shape(); + return symbol::TensorShapeOrDataDimExprs(InsertZeros(x_dims)); +} + void BuildCstrEqForTensorListAlongAxis( pir::InferSymbolicShapeContext *infer_context, const symbol::TensorListShapeOrDataDimExprs &shape_data_list, diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h index c6e348140981f..a510c828cdf9e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h @@ -118,6 +118,9 @@ inline ExprVec GetExprVecFromShape(const ShapeOrData &shapeordata) { } } +symbol::ShapeOrDataDimExprs CreateShapeOrDataForXShape( + const symbol::ShapeOrDataDimExprs &x_dim_exprs); + std::optional> VecExpr2Int64(const ExprVec &expr_vec); ExprVec VecInt642Expr(const std::vector &int_vec); diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc index 5c7f01606c2df..777868c691c74 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc @@ -274,7 +274,7 @@ bool ConcatOpInferSymbolicShape(pir::Operation *op, SetShapeOrDataForAxis(axis); } else { pir::Value res = op->result(0); - infer_context->SetStaticShapeForValue(res); + infer_context->SetSymbolForValueByStaticShape(res); // update axis value auto res_shape = infer_context->GetShapeOrDataForValue(res); for (size_t i = 0; i < rank; ++i) { diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc index 3909b64651c40..03f48884cf165 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc @@ -178,10 +178,6 @@ bool ScaleOpInferSymbolicShape(pir::Operation *op, } // namespace paddle::dialect -namespace cinn::dialect { -using paddle::dialect::ReverseOpInferSymbolicShape; -using paddle::dialect::ScaleOpInferSymbolicShape; -using paddle::dialect::SelectOpInferSymbolicShape; -} // namespace cinn::dialect +namespace cinn::dialect {} // namespace cinn::dialect #undef OP_SAME_OPERANDS_AND_RESULT diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index 083918ebcd6bc..a98fa1ce7c9b5 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -17,6 +17,7 @@ #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h" namespace paddle::dialect { +using paddle::dialect::details::CreateShapeOrDataForXShape; bool ArgmaxOpInferSymbolicShape(pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { @@ -586,19 +587,6 @@ bool RepeatInterleaveOpInferSymbolicShape( return true; } -symbol::ShapeOrDataDimExprs CreateShapeOrDataForXShape( - const symbol::ShapeOrDataDimExprs &x_shape) { - const std::vector result = [&] { - std::vector new_x_dims; - new_x_dims.reserve(x_shape.shape().size() + 1); - new_x_dims.push_back(symbol::DimExpr{0}); - new_x_dims.insert( - new_x_dims.end(), x_shape.shape().begin(), x_shape.shape().end()); - return new_x_dims; - }(); - return symbol::ShapeOrDataDimExprs{symbol::TensorShapeOrDataDimExprs(result)}; -} - bool ReshapeOpInferSymbolicShape( pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { const symbol::ShapeOrDataDimExprs &x_dim_expr = diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc index 783e56a3c505e..50f5e9f622ac6 100644 --- a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc @@ -22,8 +22,7 @@ #include "paddle/pir/include/core/ir_context.h" #include "paddle/pir/include/pass/utils.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { template void RewriteByInfermeta(pir::Operation* op, common::DataLayout new_layout) { @@ -38,6 +37,45 @@ void RewriteByInfermeta(pir::Operation* op, common::DataLayout new_layout) { } } +template <> +std::vector RelevantInputsImpl( + pir::Operation* op) { + auto concrete_op = op->dyn_cast(); + return {concrete_op.x(), concrete_op.residual()}; +} + +template <> +std::vector RelevantOutputsImpl( + pir::Operation* op) { + auto concrete_op = op->dyn_cast(); + return {concrete_op.y(), concrete_op.residual_out()}; +} + +template <> +common::DataLayout PreferLayoutImpl(pir::Operation* op) { + // Note(bukejiyu): add_group_norm_silu only supports NHWC layout now. + return common::DataLayout::NHWC; +} + +template <> +void RewriteByLayoutImpl(pir::Operation* op, + common::DataLayout new_layout) { + op->set_attribute( + "data_format", + pir::StrAttribute::get(pir::IrContext::Instance(), + common::DataLayoutToString(new_layout))); + + std::vector new_outputs = AddGroupNormSiluOp::InferMeta( + op->operands_source(), const_cast(&op->attributes())); + for (size_t i = 0; i < new_outputs.size(); ++i) { + op->result(i).set_type(new_outputs[i]); + } + + for (auto value : RelevantOutputsImpl(op)) { + SetNewLayoutForValue(value, new_layout); + } +} + template <> common::DataLayout PreferLayoutImpl(pir::Operation* op) { auto data_format_attr = op->attribute("data_format"); @@ -48,13 +86,27 @@ common::DataLayout PreferLayoutImpl(pir::Operation* op) { data_format_attr)); } - // Note(lyk): We exhibit the layout transformation for conv2d - // due to issues with its infermeta and kernel not functioning - // properly in NHWC layout. However, if the FLAGS_manually_trans_conv_filter - // is enabled, the transfer_layout_pass can also operate correctly. + auto concrete_op = op->dyn_cast(); + if (auto in = concrete_op.input()) { + if (auto in_type = in.type()) { + if (in_type.isa()) { + if (auto tensor_type = in_type.dyn_cast()) { + if (tensor_type.dtype().isa()) { + return common::DataLayout::NHWC; + } + } + } + } + } + return common::StringToDataLayout(data_format_attr.AsString()); } +template <> +bool CanBeModifiedImpl(pir::Operation* op) { + return false; +} + template <> void RewriteByLayoutImpl(pir::Operation* op, common::DataLayout new_layout) { @@ -78,6 +130,14 @@ common::DataLayout PreferLayoutImpl(pir::Operation* op) { auto original_layout = common::StringToDataLayout(data_format_attr.AsString()); + if (op->HasAttribute(kForceBackendAttr) && + op->attributes() + .at(kForceBackendAttr) + .dyn_cast() + .AsString() == "gpu") { + return common::DataLayout::NHWC; + } + auto concrete_op = op->dyn_cast(); if (auto in = concrete_op.input()) { if (auto in_type = in.type()) { @@ -124,6 +184,31 @@ void RewriteByLayoutImpl(pir::Operation* op, RewriteByInfermeta(op, new_layout); } +template <> +bool CanBeModifiedImpl(pir::Operation* op) { + auto data_format_attr = op->attribute("data_format"); + if (!data_format_attr) { + PADDLE_THROW(phi::errors::InvalidArgument( + "op (%s) should have attribute `data_format`, but got %s", + op, + data_format_attr)); + } + auto cur_layout = common::StringToDataLayout(data_format_attr.AsString()); + auto prefer_layout = PreferLayoutImpl(op); + auto can_be_modified = cur_layout != prefer_layout; + + for (auto value : RelevantOutputsImpl(op)) { + // TODO(lyk) if value was used in another block, we cannot rewrite this op + for (auto it = value.use_begin(); it != value.use_end(); ++it) { + if (it->owner()->GetParent() != op->GetParent()) { + return false; + } + } + } + + return can_be_modified; +} + template <> void RewriteByLayoutImpl(pir::Operation* op, common::DataLayout new_layout) { @@ -319,6 +404,5 @@ void RewriteByLayoutImpl(pir::Operation* op, RewriteByInfermeta(op, new_layout); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LayoutTransformationInterface) diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp index 05719bc1dfb2f..cb4767498bf23 100644 --- a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp @@ -105,9 +105,11 @@ bool CanBeModifiedImpl(pir::Operation* op) { class FusedConv2dAddActOp; OVERLOAD_PREFER_LAYOUT(FusedConv2dAddActOp); OVERLOAD_REWRITE_BY_LAYOUT(FusedConv2dAddActOp); +OVERLOAD_CAN_BE_MODIFIED(FusedConv2dAddActOp); class Conv2dOp; OVERLOAD_PREFER_LAYOUT(Conv2dOp); +OVERLOAD_CAN_BE_MODIFIED(Conv2dOp); OVERLOAD_REWRITE_BY_LAYOUT(Conv2dOp); class GroupNormOp; @@ -115,6 +117,12 @@ OVERLOAD_REWRITE_BY_LAYOUT(GroupNormOp); OVERLOAD_RELEVANT_INPUTS(GroupNormOp); OVERLOAD_RELEVANT_OUTPUTS(GroupNormOp); +class AddGroupNormSiluOp; +OVERLOAD_REWRITE_BY_LAYOUT(AddGroupNormSiluOp); +OVERLOAD_PREFER_LAYOUT(AddGroupNormSiluOp); +OVERLOAD_RELEVANT_INPUTS(AddGroupNormSiluOp); +OVERLOAD_RELEVANT_OUTPUTS(AddGroupNormSiluOp); + class ReshapeOp; OVERLOAD_RELEVANT_INPUTS(ReshapeOp); OVERLOAD_RELEVANT_OUTPUTS(ReshapeOp); diff --git a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc index 269bc4f115b13..f2b6702233c7d 100644 --- a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc @@ -40,8 +40,7 @@ paddle::dialect::IfOp, paddle::dialect::WhileOp, paddle::dialect::HasElementsOp, using pir::TuplePopOp; using pir::TuplePushOp; constexpr char kStopGradientAttrName[] = "stop_gradient"; // NOLINT -namespace paddle { -namespace dialect { +namespace paddle::dialect { void IfOp::Build(pir::Builder &builder, // NOLINT pir::OperationArgument &argument, // NOLINT @@ -1198,8 +1197,7 @@ void SelectOutputOp::VerifySig() { VLOG(4) << "End Verifying for: AssignArray_Op."; } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::IfOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::WhileOp) diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc index fed0c6b224097..70b0f72bee55f 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc +++ b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc @@ -17,8 +17,7 @@ #include #include "paddle/common/enforce.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { IrSelectedRows::IrSelectedRows(phi::DataType dtype, const phi::DDim& dims, phi::DataLayout layout, @@ -71,5 +70,4 @@ void* IrSelectedRows::AllocateFrom(phi::Allocator* allocator, IR_THROW("Don't use IrSelectedRows::AllocateFrom method."); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc b/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc index 020fafabb89e9..ea9a9d8b4b20f 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc +++ b/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc @@ -18,8 +18,7 @@ #include "paddle/common/enforce.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { IrTensor::IrTensor(phi::DataType dtype, const phi::DDim& dims, phi::DataLayout layout, @@ -70,5 +69,4 @@ void* IrTensor::AllocateFrom(phi::Allocator* allocator, IR_THROW("Don't use IrTensor::AllocateFrom method."); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_api.cc b/paddle/fluid/pir/dialect/operator/ir/manual_api.cc index 7fb835dd01c90..2d705364b970f 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_api.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_api.cc @@ -71,11 +71,11 @@ void set_parameter(const pir::Value& parameter, const std::string& name) { } } -void updata_parameter(const pir::Value& parameter, const std::string& name) { +void update_parameter(const pir::Value& parameter, const std::string& name) { pir::Parameter* param = ApiBuilder::Instance().GetParameter(name); PADDLE_ENFORCE_NOT_NULL(param, phi::errors::InvalidArgument( - "Parameter %s not exist, can not updata.", name)); + "Parameter %s not exist, can not update.", name)); std::unique_ptr param_new( new pir::Parameter(nullptr, 0, parameter.type())); ApiBuilder::Instance().SetParameter(name, std::move(param_new)); diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_api.h b/paddle/fluid/pir/dialect/operator/ir/manual_api.h index 86d9b9a8245cc..7a89ae9eafaa8 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_api.h +++ b/paddle/fluid/pir/dialect/operator/ir/manual_api.h @@ -36,7 +36,7 @@ pir::Value parameter(const std::string& name); void set_parameter(const pir::Value& parameter, const std::string& name); -void updata_parameter(const pir::Value& parameter, const std::string& name); +void update_parameter(const pir::Value& parameter, const std::string& name); void shadow_output(const pir::Value& persist_value, const std::string& name); diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index f266d6d172d35..96e24a6d10490 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -56,8 +56,7 @@ paddle::dialect::AddNOp, paddle::dialect::AddN_Op, paddle::dialect::AddNArrayOp, #include "paddle/phi/infermeta/spmd_rules/rules.h" #endif -namespace paddle { -namespace dialect { +namespace paddle::dialect { OpInfoTuple AddNOp::GetOpInfo() { std::vector inputs = { @@ -4513,8 +4512,7 @@ phi::DataType ArrayPopOp::GetKernelTypeForVar( return expected_kernel_dtype; } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::AddNOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::SplitGradOp) diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp.cc index 05e30459029f5..246a08910308c 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp.cc @@ -98,12 +98,22 @@ std::vector> BatchNormOp::Decomp(pir::Operation* op) { res[2].push_back(std::static_pointer_cast( std::get<2>(op_res).impl()) ->value()); - res[3].push_back(std::static_pointer_cast( - std::get<3>(op_res).impl()) - ->value()); - res[4].push_back(std::static_pointer_cast( - std::get<4>(op_res).impl()) - ->value()); + if (std::get<3>(op_res).initialized()) { + res[3].push_back(std::static_pointer_cast( + std::get<3>(op_res).impl()) + ->value()); + } else { + pir::Value saved_mean; + res[3].push_back(saved_mean); + } + if (std::get<4>(op_res).initialized()) { + res[4].push_back(std::static_pointer_cast( + std::get<4>(op_res).impl()) + ->value()); + } else { + pir::Value saved_var; + res[4].push_back(saved_var); + } pir::Value reserve_space; res[5].push_back(reserve_space); @@ -180,12 +190,23 @@ std::vector> BatchNorm_Op::Decomp(pir::Operation* op) { res[2].push_back(std::static_pointer_cast( std::get<2>(op_res).impl()) ->value()); - res[3].push_back(std::static_pointer_cast( - std::get<3>(op_res).impl()) - ->value()); - res[4].push_back(std::static_pointer_cast( - std::get<4>(op_res).impl()) - ->value()); + if (std::get<3>(op_res).initialized()) { + res[3].push_back(std::static_pointer_cast( + std::get<3>(op_res).impl()) + ->value()); + } else { + pir::Value saved_mean; + res[3].push_back(saved_mean); + } + if (std::get<4>(op_res).initialized()) { + res[4].push_back(std::static_pointer_cast( + std::get<4>(op_res).impl()) + ->value()); + } else { + pir::Value saved_var; + res[4].push_back(saved_var); + } + pir::Value reserve_space; res[5].push_back(reserve_space); diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp_vjp.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp_vjp.cc index c1aa3d776b67e..7b15459837fd9 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp_vjp.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op_decomp_vjp.cc @@ -61,7 +61,7 @@ std::vector> StackGradOp::DecompVjp( auto stop_gradients_attr = op->attribute(kAttrStopGradients) .dyn_cast() .AsVector(); - for (size_t i = 0; i < stop_gradients[0].size(); ++i) { + for (size_t i = 0; i < stop_gradients_attr.size(); ++i) { stop_gradients[0].push_back( stop_gradients_attr[i].dyn_cast().data()); } @@ -144,24 +144,31 @@ std::vector> ConcatGradOp::DecompVjp( .dyn_cast() .data(); - VLOG(6) << "Decomp call concat_grad's backward composite rule prepare"; + VLOG(4) << "Decomp call concat_grad's backward composite rule prepare"; std::vector> stop_gradients(op->results().size()); - if (combine_op_obj_x->HasAttribute(kAttrStopGradients)) { - auto stop_gradients_attr = op->attribute(kAttrStopGradients) - .dyn_cast() - .AsVector(); - for (size_t i = 0; i < stop_gradients[0].size(); ++i) { - stop_gradients[0].push_back( - stop_gradients_attr[i].dyn_cast().data()); + auto splitop = op->results()[0].first_use().owner(); + + if (splitop->HasAttribute("current_bwd_op_stop_gradients")) { + auto stop_gradients_attr = + splitop->attribute("current_bwd_op_stop_gradients") + .dyn_cast() + .AsVector(); + for (size_t i = 0; i < stop_gradients_attr.size(); ++i) { + auto stop_gradients_attr_j = + stop_gradients_attr[i].dyn_cast().AsVector(); + for (size_t j = 0; j < stop_gradients_attr_j.size(); ++j) { + stop_gradients[0].push_back( + stop_gradients_attr_j[j].dyn_cast().data()); + } } - VLOG(4) << " stop_gradients is set "; + VLOG(4) << " op stop_gradients is set "; } else { std::vector x_grad_stop_gradient(combine_op_obj_x.inputs().size(), false); stop_gradients[0] = x_grad_stop_gradient; - VLOG(4) << " stop_gradients is not set "; + VLOG(4) << " op stop_gradients is not set "; } std::vector> tensor_res; @@ -179,6 +186,7 @@ std::vector> ConcatGradOp::DecompVjp( paddle::primitive::details::concat_grad( x, out_grad, axis, x_grad); + VLOG(4) << "Call Pir Decomposed backward op concat_grad end"; std::vector> res(tensor_res.size()); for (size_t i = 0; i < tensor_res.size(); ++i) { diff --git a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc index 8a843a8881734..4eb8190eaa111 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc @@ -16,8 +16,7 @@ #include "paddle/common/enforce.h" #include "paddle/common/errors.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { const phi::IntArray &IntArrayAttribute::data() const { return storage()->GetAsKey(); } @@ -130,8 +129,7 @@ DataLayoutAttribute DataLayoutAttribute::Parse( parser.ctx, StringToDataLayoutMap().at(datalayout_token_val)); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::IntArrayAttribute) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ScalarAttribute) diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc index 481742a807ac6..275667c1edc27 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc @@ -38,8 +38,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.h" #endif -namespace paddle { -namespace dialect { +namespace paddle::dialect { struct CombineOpInferSymbolicShapeInterfaceModel : public InferSymbolicShapeInterface::Concept { @@ -1064,8 +1063,7 @@ void CustomOpDialect::RegisterCustomOp(const paddle::OpMetaInfo& op_meta) { verify_func, verify_func); } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OperatorDialect) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomOpDialect) diff --git a/paddle/fluid/pir/dialect/operator/transforms/param_to_variable.cc b/paddle/fluid/pir/dialect/operator/transforms/param_to_variable.cc index 1d93e27c59b0b..78cb8e6460769 100644 --- a/paddle/fluid/pir/dialect/operator/transforms/param_to_variable.cc +++ b/paddle/fluid/pir/dialect/operator/transforms/param_to_variable.cc @@ -21,8 +21,7 @@ #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { std::shared_ptr ParameterConvertInterface::ParameterToVariable(pir::Parameter *parameter) { if (parameter->type().isa()) { @@ -79,7 +78,6 @@ std::unique_ptr ParameterConvertInterface::VariableToParameter( } } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ParameterConvertInterface) diff --git a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc index 32c45d20f8d25..7ba06ac2944ad 100644 --- a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc +++ b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc @@ -17,8 +17,7 @@ #include #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace dialect { +namespace paddle::dialect { OpYamlInfoParser::OpYamlInfoParser(OpInfoTuple op_info_tuple, bool is_legacy_op) : op_info_tuple_(std::move(op_info_tuple)), is_legacy_op_(is_legacy_op) { @@ -239,5 +238,4 @@ int OpYamlInfoParser::GetTensorParamIndexByArgsName( } } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc index 4f752932c6ba6..3552cf88a0765 100644 --- a/paddle/fluid/pir/dialect/operator/utils/utils.cc +++ b/paddle/fluid/pir/dialect/operator/utils/utils.cc @@ -45,8 +45,6 @@ const std::unordered_set LegacyOpList = { CSyncCommStream_Op::name(), DistributedPushSparseOp::name(), FtrlOp::name(), - FusedElemwiseAddActivationOp::name(), - FusedElemwiseAddActivationGradOp::name(), DpsgdOp::name(), SendV2Op::name(), RecvV2Op::name(), @@ -85,7 +83,6 @@ const std::unordered_set LegacyOpList = { paddle::onednn::dialect::LrnOp::name(), paddle::onednn::dialect::LrnGradOp::name(), paddle::onednn::dialect::MultiGruOp::name(), - paddle::onednn::dialect::FusionLstmOp::name(), #endif CReduceAvgOp::name(), CReduceAvg_Op::name(), diff --git a/paddle/fluid/pir/drr/src/match_context_impl.h b/paddle/fluid/pir/drr/src/match_context_impl.h index a9acb5f6ed8df..ce6911fb36ecb 100644 --- a/paddle/fluid/pir/drr/src/match_context_impl.h +++ b/paddle/fluid/pir/drr/src/match_context_impl.h @@ -17,6 +17,7 @@ #include #include +#include "glog/logging.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/drr/include/drr_pattern_context.h" #include "paddle/fluid/pir/drr/src/attr_type_uilts.h" @@ -100,27 +101,32 @@ class MatchContextImpl final { tensor_map_.emplace(value_name, value); } - void BindIrOperation(const OpCall* op_call, pir::Operation* op) { + bool BindIrOperation(const OpCall* op_call, pir::Operation* op) { operation_map_.emplace(op_call, op); const auto& attrs = op_call->attributes(); for (const auto& kv : attrs) { - std::visit( + bool bind_success = std::visit( [&](auto&& arg) { if constexpr (std::is_same_v, NormalAttribute>) { - PADDLE_ENFORCE( - op->HasAttribute(kv.first), - phi::errors::NotFound( - "Not found attribute [%s] in Op [%s], please check the " - "validity of the attribute name[%s].", - kv.first, - op->name(), - kv.first)); - BindIrAttr(arg.name(), op->attribute(kv.first)); + if (op->HasAttribute(kv.first)) { + BindIrAttr(arg.name(), op->attribute(kv.first)); + return true; + } } + return false; }, kv.second); + if (!bind_success) { + LOG(WARNING) << "Not found attribute [" << kv.first << "] in Op [" + << op->name() + << "], please check the " + "validity of the attribute name[" + << kv.first << "]."; + return false; + } } + return true; } private: diff --git a/paddle/fluid/pir/drr/src/pattern_graph.cc b/paddle/fluid/pir/drr/src/pattern_graph.cc index 632f41d2adbb9..b0c2dbcd58ae2 100644 --- a/paddle/fluid/pir/drr/src/pattern_graph.cc +++ b/paddle/fluid/pir/drr/src/pattern_graph.cc @@ -20,8 +20,7 @@ #include "paddle/fluid/pir/drr/include/drr_pattern_context.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace drr { +namespace paddle::drr { const drr::OpCall &PatternGraph::AddOpCall( const std::shared_ptr &op_call) { @@ -228,5 +227,4 @@ std::ostream &operator<<(std::ostream &os, const PatternGraph &pattern_graph) { return os; } -} // namespace drr -} // namespace paddle +} // namespace paddle::drr diff --git a/paddle/fluid/pir/drr/src/rewrite_pattern.cc b/paddle/fluid/pir/drr/src/rewrite_pattern.cc index 53b7ec0c919e9..93095af050afe 100644 --- a/paddle/fluid/pir/drr/src/rewrite_pattern.cc +++ b/paddle/fluid/pir/drr/src/rewrite_pattern.cc @@ -356,7 +356,10 @@ bool DrrRewritePattern::MatchFromOutputToInput( break; } // Step 1: Bind Operation of current op to match_ctx. - source_pattern_match_ctx->BindIrOperation(drr_node, ir_node); + if (!source_pattern_match_ctx->BindIrOperation(drr_node, ir_node)) { + matched = false; + break; + } // Step 2: Bind input_tensor of current op to match_ctx. const auto& drr_input_tensors = drr_node->inputs(); @@ -391,7 +394,7 @@ bool DrrRewritePattern::MatchFromOutputToInput( ir_input_values[i].use_count()) { matched = false; VLOG(8) << drr_node->name() << " Match failed: consumers of drr intput[" - << i << "] { " << drr_node->outputs().size() + << i << "] { " << drr_input_tensors[i]->consumers().size() << " } != consumers of pir intput[" << i << "] { " << ir_input_values[i].use_count() << " }."; break; @@ -495,8 +498,9 @@ MatchContextImpl DrrRewritePattern::CreateOperations( } // set insert point - size_t max_input_op_index = 0UL; - pir::Operation* max_index_op = nullptr; + // 1. get result pattern max-idx of input op + size_t max_res_idx = 0UL; + pir::Operation* max_res_idx_op = nullptr; for (const Tensor* input : op_call.inputs()) { if (input->is_none()) { continue; @@ -506,18 +510,16 @@ MatchContextImpl DrrRewritePattern::CreateOperations( pir::Operation* ir_input_op = ir_val.defining_op(); if (op_2_temp_program_index.count(ir_input_op) == 0) { // do nothing - } else if (max_input_op_index < - op_2_temp_program_index.at(ir_input_op)) { - max_input_op_index = op_2_temp_program_index.at(ir_input_op); - max_index_op = ir_input_op; - } else if (max_input_op_index == - op_2_temp_program_index.at(ir_input_op)) { - const auto& ops_vec = temp_program[max_input_op_index]; + } else if (max_res_idx < op_2_temp_program_index.at(ir_input_op)) { + max_res_idx = op_2_temp_program_index.at(ir_input_op); + max_res_idx_op = ir_input_op; + } else if (max_res_idx == op_2_temp_program_index.at(ir_input_op)) { + const auto& ops_vec = temp_program[max_res_idx]; for (auto it = ops_vec.begin(); it != ops_vec.end(); it++) { - if (*it == max_index_op) { + if (*it == max_res_idx_op) { break; } else if (*it == ir_input_op) { - max_index_op = ir_input_op; + max_res_idx_op = ir_input_op; break; } else { // do nothing @@ -528,25 +530,29 @@ MatchContextImpl DrrRewritePattern::CreateOperations( } } } - if (max_input_op_index == 0UL) { - VLOG(6) << "Not found producer op for (" << op_call.name() << ")"; - pir::Operation* source_pattern_first_op = src_match_ctx.IrOperation( - source_pattern_graph.owned_op_call()[0].get()); - max_input_op_index = op_2_temp_program_index[source_pattern_first_op]; - rewriter.set_insertion_point(source_pattern_first_op); - } else { - rewriter.SetInsertionPointAfter(max_index_op); - } - pir::Operation* new_op = - CreateOperation(op_call, src_match_ctx, rewriter, &res_match_ctx); + // 2. get source pattern min-idx op + pir::Operation* min_src_idx_op = src_match_ctx.IrOperation( + source_pattern_graph.owned_op_call()[0].get()); + size_t min_src_idx = op_2_temp_program_index[min_src_idx_op]; + for (const auto& src_owned_op_call : source_pattern_graph.owned_op_call()) { + pir::Operation* src_owned_op = + src_match_ctx.IrOperation(src_owned_op_call.get()); + size_t src_owned_op_idx = op_2_temp_program_index[src_owned_op]; + if (min_src_idx > src_owned_op_idx) { + min_src_idx = src_owned_op_idx; + min_src_idx_op = src_owned_op; + } + } - size_t new_max_input_op_index = max_input_op_index + 1; - op_2_temp_program_index[new_op] = new_max_input_op_index; - if (new_max_input_op_index >= temp_program.size()) { - temp_program.emplace_back(); + // 3. insert new op at point max(max_res_idx+1, min_src_idx) + if (min_src_idx > max_res_idx) { + rewriter.set_insertion_point(min_src_idx_op); + } else { + rewriter.SetInsertionPointAfter(max_res_idx_op); } - temp_program[new_max_input_op_index].push_back(new_op); + + CreateOperation(op_call, src_match_ctx, rewriter, &res_match_ctx); }); return res_match_ctx; diff --git a/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h index d4aaefe81c983..8ad7a0e13d3f2 100644 --- a/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h +++ b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h @@ -17,17 +17,43 @@ #include #include -#include "glog/logging.h" #include "paddle/common/layout.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/serialize_deserialize/include/schema.h" -#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_party.h" #include "paddle/phi/common/data_type.h" #include "paddle/pir/include/core/builtin_attribute.h" #include "paddle/pir/include/core/builtin_type.h" namespace pir { +#define DECOMPRESS_DIALECT_ID(name) \ + pir::DialectIdMap::Instance()->GetDecompressDialectId(name) + +void GetDecompressOpName(std::string* op_name) { + std::pair name = getContentSplitByDot(*op_name); + *op_name = DECOMPRESS_DIALECT_ID(name.first) + "." + name.second; + return; +} + +class AttrTypeReader { + public: + static pir::Attribute ReadBuiltInAttr(const std::string attr_name, + Json* attr_json, + pir::IrContext* ctx); + + static pir::Type ReadBuiltInType(const std::string type_name, + Json* type_json, + pir::IrContext* ctx); + + static pir::Attribute ReadPaddleOperatorAttr(const std::string attr_name, + Json* attr_json, + pir::IrContext* ctx); + + static pir::Type ReadPaddleOperatorType(const std::string type_name, + Json* type_json, + pir::IrContext* ctx); +}; template T deserializeTypeFromJson(Json* type_json, pir::IrContext* ctx) { @@ -147,78 +173,26 @@ deserializeAttrFromJson( pir::Type parseType(Json* type_json) { auto type_name = type_json->at(ID).template get(); - pir::IrContext* ctx = pir::IrContext::Instance(); - - if (type_name == pir::BoolType::name()) { - VLOG(8) << "Parse BoolType ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::BFloat16Type::name()) { - VLOG(8) << "Parse BFloat16Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Float16Type::name()) { - VLOG(8) << "Parse Float16Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Float32Type::name()) { - VLOG(8) << "Parse Float32Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Float64Type::name()) { - VLOG(8) << "Parse Float64Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Int8Type::name()) { - VLOG(8) << "Parse Int8Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::UInt8Type::name()) { - VLOG(8) << "Parse UInt8Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Int16Type::name()) { - VLOG(8) << "Parse Int16Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Int32Type::name()) { - VLOG(8) << "Parse Int32Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Int64Type::name()) { - VLOG(8) << "Parse Int64Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::IndexType::name()) { - VLOG(8) << "Parse IndexType ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Complex64Type::name()) { - VLOG(8) << "Parse Complex64Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::Complex128Type::name()) { - VLOG(8) << "Parse Complex128Type ... "; - return pir::deserializeTypeFromJson(type_json, ctx); - } else if (type_name == pir::VectorType::name()) { - VLOG(8) << "Parse VectorType ... "; - std::vector content; - for (auto& type_x : type_json->at(DATA)) { - content.push_back(parseType(&type_x)); - } - return pir::VectorType::get(ctx, content); - } else if (type_name == pir::DenseTensorType::name()) { - VLOG(8) << "Parse DenseTensorType ... "; - Json data_json = type_json->at(DATA); - pir::Type dtype = parseType(&(data_json.at(0))); - std::vector dims = - data_json.at(1).template get>(); - phi::DDim ddim = phi::make_ddim(dims); - pir::DataLayout data_layout = - common::StringToDataLayout(data_json.at(2).template get()); + if (type_name == NULL_TYPE) { + return pir::Type(); + } - std::vector> lod = - data_json.at(3).template get>>(); + pir::IrContext* ctx = pir::IrContext::Instance(); + std::pair name = getContentSplitByDot(type_name); - size_t offset = data_json.at(4).get(); - return pir::DenseTensorType::get( - ctx, dtype, ddim, data_layout, lod, offset); - } else if (type_name == NULL_TYPE) { - return pir::Type(); + if (DECOMPRESS_DIALECT_ID(name.first) == pir::BuiltinDialect::name()) { + return AttrTypeReader::ReadBuiltInType(name.second, type_json, ctx); + } else if (DECOMPRESS_DIALECT_ID(name.first) == + paddle::dialect::OperatorDialect::name()) { + return AttrTypeReader::ReadPaddleOperatorType(name.second, type_json, ctx); } else { - PADDLE_ENFORCE(false, - phi::errors::InvalidArgument( - "Unknown Type %s for parse type", type_name)); + PADDLE_ENFORCE( + false, + phi::errors::InvalidArgument( + "Unknown Attr %s for parse builtin dialect attr", type_name)); } + VLOG(8) << "Finish Parse Type ... "; return pir::Type(); @@ -234,7 +208,28 @@ pir::TypeAttribute deserializeAttrFromJson( pir::Attribute parseAttr(Json* attr_json) { std::string attr_name = attr_json->at(ID).template get(); pir::IrContext* ctx = pir::IrContext::Instance(); + std::pair name = getContentSplitByDot(attr_name); + + if (DECOMPRESS_DIALECT_ID(name.first) == pir::BuiltinDialect::name()) { + return AttrTypeReader::ReadBuiltInAttr(name.second, attr_json, ctx); + } else if (DECOMPRESS_DIALECT_ID(name.first) == + paddle::dialect::OperatorDialect::name()) { + return AttrTypeReader::ReadPaddleOperatorAttr(name.second, attr_json, ctx); + } else { + PADDLE_ENFORCE( + false, + phi::errors::InvalidArgument( + "Unknown Attr %s for parse builtin dialect attr", attr_name)); + } + + VLOG(8) << "Finish Parse Attr ... "; + return pir::Attribute(); +} + +pir::Attribute AttrTypeReader::ReadBuiltInAttr(const std::string attr_name, + Json* attr_json, + pir::IrContext* ctx) { if (attr_name == pir::BoolAttribute::name()) { VLOG(8) << "Parse BoolAttribute ."; return pir::deserializeAttrFromJson(attr_json, @@ -286,7 +281,18 @@ pir::Attribute parseAttr(Json* attr_json) { VLOG(8) << "Parse StrAttribute ."; return pir::deserializeAttrFromJson( attr_json, ctx); - } else if (attr_name == paddle::dialect::IntArrayAttribute::name()) { + } else { + PADDLE_ENFORCE( + false, + phi::errors::InvalidArgument( + "Unknown Attr %s for parse builtin dialect attr", attr_name)); + } + return pir::Attribute(); +} + +pir::Attribute AttrTypeReader::ReadPaddleOperatorAttr( + const std::string attr_name, Json* attr_json, pir::IrContext* ctx) { + if (attr_name == paddle::dialect::IntArrayAttribute::name()) { VLOG(8) << "Parse IntArrayAttribute ."; return pir::deserializeAttrFromJson>(attr_json, ctx); @@ -306,11 +312,94 @@ pir::Attribute parseAttr(Json* attr_json) { } else { PADDLE_ENFORCE(false, phi::errors::InvalidArgument( - "Unknown Attr %s for parse attr", attr_name)); + "Unknown Attr %s for parse paddleoperator dialect attr", + attr_name)); } - VLOG(8) << "Finish Parse Attr ... "; - return pir::Attribute(); } +pir::Type AttrTypeReader::ReadBuiltInType(const std::string type_name, + Json* type_json, + pir::IrContext* ctx) { + if (type_name == pir::BoolType::name()) { + VLOG(8) << "Parse BoolType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::BFloat16Type::name()) { + VLOG(8) << "Parse BFloat16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float16Type::name()) { + VLOG(8) << "Parse Float16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float32Type::name()) { + VLOG(8) << "Parse Float32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float64Type::name()) { + VLOG(8) << "Parse Float64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int8Type::name()) { + VLOG(8) << "Parse Int8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::UInt8Type::name()) { + VLOG(8) << "Parse UInt8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int16Type::name()) { + VLOG(8) << "Parse Int16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int32Type::name()) { + VLOG(8) << "Parse Int32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int64Type::name()) { + VLOG(8) << "Parse Int64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::IndexType::name()) { + VLOG(8) << "Parse IndexType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex64Type::name()) { + VLOG(8) << "Parse Complex64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex128Type::name()) { + VLOG(8) << "Parse Complex128Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::VectorType::name()) { + VLOG(8) << "Parse VectorType ... "; + std::vector content; + for (auto& type_x : type_json->at(DATA)) { + content.push_back(parseType(&type_x)); + } + return pir::VectorType::get(ctx, content); + } else if (type_name == pir::DenseTensorType::name()) { + VLOG(8) << "Parse DenseTensorType ... "; + Json data_json = type_json->at(DATA); + pir::Type dtype = parseType(&(data_json.at(0))); + + std::vector dims = + data_json.at(1).template get>(); + phi::DDim ddim = phi::make_ddim(dims); + pir::DataLayout data_layout = + common::StringToDataLayout(data_json.at(2).template get()); + + std::vector> lod = + data_json.at(3).template get>>(); + + size_t offset = data_json.at(4).get(); + return pir::DenseTensorType::get( + ctx, dtype, ddim, data_layout, lod, offset); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Type %s for parse builtintype", type_name)); + } + return pir::Type(); +} + +pir::Type AttrTypeReader::ReadPaddleOperatorType(const std::string type_name, + Json* type_json, + pir::IrContext* ctx) { + PADDLE_ENFORCE( + false, + phi::errors::InvalidArgument( + "Unknown Type %s for parse paddleoperator dialect type", type_name)); + return pir::Type(); +} + } // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h index 2ae9f22d21a9c..ed8364bacc229 100644 --- a/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h @@ -15,7 +15,7 @@ #include #include "paddle/common/enforce.h" -#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_party.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/program.h" @@ -47,6 +47,8 @@ class ProgramReader { Json* operesult_attrs_json); pir::Attribute ReadAttribute(Json* attr_json); pir::Type ReadType(Json* type_json); + + pir::Operation* ReadParameterOp(Json* op_json); }; } // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h index 96baf995d5aeb..ea58e51fbed0d 100644 --- a/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_party.h" #include "paddle/pir/include/core/program.h" namespace pir { @@ -76,6 +76,9 @@ class ProgramWriter { Json WriteAttribute(const std::string& op_attr_name, const pir::Attribute& attr); Json WriteType(const pir::Type& type); + + // special op for optimize json file size + Json WriteParameterOP(const pir::Operation& op); }; } // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/schema.h b/paddle/fluid/pir/serialize_deserialize/include/schema.h index dcfdfc09114a5..19e40053e36af 100644 --- a/paddle/fluid/pir/serialize_deserialize/include/schema.h +++ b/paddle/fluid/pir/serialize_deserialize/include/schema.h @@ -12,7 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once - +#include "glog/logging.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" +#include "paddle/pir/include/core/builtin_dialect.h" +#include "paddle/pir/include/dialect/control_flow/ir/cf_dialect.h" namespace pir { /** * IMPORTANT!!! @@ -23,8 +26,8 @@ namespace pir { // all IR structure's identifier (region, block, op, attr, type value etc) // which can be string , int64_t etc. -#define ID "id" - +#define ID "#" +#define VALUE_ID "%" // program's key: #define REGIONS "regions" @@ -43,9 +46,11 @@ namespace pir { #define BLOCKOPS "ops" // operation's key: +// input // which is json array with opoperand json object(ID) #define OPOPERANDS "I" +// output // which is json array with value json object(ID and TYPE_TYPE) #define OPRESULTS "O" @@ -68,4 +73,27 @@ namespace pir { // NULL_TYPE #define NULL_TYPE "NULL" + +// special op compress + +#define PARAMETEROP "p" + +std::pair getContentSplitByDot( + const std::string& str); + +class DialectIdMap { + public: + static DialectIdMap* Instance(); + DialectIdMap(); + void insert(const std::string& key, const std::string& value); + + std::string GetCompressDialectId(const std::string& name); + + std::string GetDecompressDialectId(const std::string& id); + + private: + std::unordered_map CompressDialect; + std::unordered_map DecompressDialect; +}; + } // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h index a6cae97f135d9..ddda2f5863f0c 100644 --- a/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h +++ b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h @@ -18,16 +18,36 @@ #include #include -#include "glog/logging.h" - +#include "paddle/common/layout.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/serialize_deserialize/include/schema.h" -#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_party.h" #include "paddle/phi/common/data_type.h" #include "paddle/pir/include/core/builtin_attribute.h" #include "paddle/pir/include/core/builtin_type.h" namespace pir { +#define COMPRESS_DIALECT_NAME(attr_template) \ + pir::DialectIdMap::Instance()->GetCompressDialectId( \ + (attr_template).dialect().name()) + +void GetCompressOpName(std::string* op_name) { + std::pair name = getContentSplitByDot(*op_name); + *op_name = pir::DialectIdMap::Instance()->GetCompressDialectId(name.first) + + "." + name.second; + return; +} + +class AttrTypeWriter { + public: + static Json WriteBuiltInAttr(const pir::Attribute& attr); + + static Json WriteBuiltInType(const pir::Type& type); + + static Json WritePaddleOperatorAttr(const pir::Attribute& attr); + + static Json WritePaddleOperatorType(const pir::Type& type); +}; /** serializeTypeToJson is a template function to serialize * a pir type to a json object. a pir type may have value or no value * Value free types only have ID, while value based types have @@ -49,7 +69,7 @@ namespace pir { template Json serializeTypeToJson(const T& type) { Json json_obj; - json_obj[ID] = type.name(); + json_obj[ID] = COMPRESS_DIALECT_NAME(type) + "." + type.name(); return json_obj; } @@ -73,18 +93,18 @@ Json serializeTypeToJson(const T& type) { template Json serializeAttrToJson(const T& attr) { Json json_obj; - json_obj[ID] = attr.name(); + json_obj[ID] = COMPRESS_DIALECT_NAME(attr) + "." + attr.name(); json_obj[DATA] = attr.data(); return json_obj; } -#define SERIALIZE_ATTR_TO_JSON(type, data) \ - template <> \ - Json serializeAttrToJson(const type& attr) { \ - Json json_obj; \ - json_obj[ID] = attr.name(); \ - json_obj[DATA] = data; \ - return json_obj; \ +#define SERIALIZE_ATTR_TO_JSON(type, data) \ + template <> \ + Json serializeAttrToJson(const type& attr) { \ + Json json_obj; \ + json_obj[ID] = COMPRESS_DIALECT_NAME(attr) + "." + attr.name(); \ + json_obj[DATA] = data; \ + return json_obj; \ } SERIALIZE_ATTR_TO_JSON(pir::StrAttribute, attr.AsString()); @@ -97,12 +117,13 @@ SERIALIZE_ATTR_TO_JSON(paddle::dialect::IntArrayAttribute, attr.data().GetData()); SERIALIZE_ATTR_TO_JSON(paddle::dialect::DataTypeAttribute, phi::DataTypeToString(attr.data())); - +SERIALIZE_ATTR_TO_JSON(paddle::dialect::DataLayoutAttribute, + common::DataLayoutToString(attr.data())); template <> Json serializeAttrToJson( const paddle::dialect::ScalarAttribute& attr) { Json json_obj; - json_obj[ID] = attr.name(); + json_obj[ID] = COMPRESS_DIALECT_NAME(attr) + "." + attr.name(); Json content = Json::array(); auto scalar = attr.data(); @@ -151,7 +172,7 @@ template <> Json serializeAttrToJson( const paddle::dialect::PlaceAttribute& attr) { Json json_obj; - json_obj[ID] = attr.name(); + json_obj[ID] = COMPRESS_DIALECT_NAME(attr) + "." + attr.name(); Json content = Json::array(); auto place = attr.data(); content.push_back(static_cast(place.GetType())); @@ -162,6 +183,112 @@ Json serializeAttrToJson( } Json writeType(const pir::Type& type) { + Json type_json = Json::object(); + if (!type) { + type_json[ID] = NULL_TYPE; + return type_json; + } + if (type.dialect().name() == pir::BuiltinDialect::name()) { + VLOG(6) << "write BuiltinType ... "; + return AttrTypeWriter::WriteBuiltInType(type); + } else if (type.dialect().name() == + paddle::dialect::OperatorDialect::name()) { + VLOG(6) << "write PaddleOperatorType ... "; + return AttrTypeWriter::WritePaddleOperatorType(type); + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Type %s when write type")); + } + VLOG(8) << "Finish write Type ... "; + + return type_json; +} + +SERIALIZE_ATTR_TO_JSON(pir::TypeAttribute, writeType(attr.data())); + +Json writeAttr(const pir::Attribute& attr) { + if (attr.dialect().name() == pir::BuiltinDialect::name()) { + VLOG(8) << "write BuiltinAttr ... "; + return AttrTypeWriter::WriteBuiltInAttr(attr); + } else if (attr.dialect().name() == + paddle::dialect::OperatorDialect::name()) { + VLOG(8) << "write PaddleOperatorAttr ... "; + return AttrTypeWriter::WritePaddleOperatorAttr(attr); + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Attr %s when write attr")); + } + + VLOG(8) << "Finish write attr ... "; + + return Json::object(); +} + +Json AttrTypeWriter::WriteBuiltInAttr(const pir::Attribute& attr) { + Json attr_json = Json::object(); + if (attr.isa()) { + VLOG(8) << "write BoolAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write FloatAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DoubleAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int32Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write IndexAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write ArrayAttribute ."; + auto attr_ = attr.dyn_cast(); + Json val = Json::array(); + for (size_t i = 0; i < attr_.size(); i++) { + val.push_back(writeAttr(attr_.at(i))); + } + attr_json[ID] = COMPRESS_DIALECT_NAME(attr_) + "." + attr_.name(); + attr_json[DATA] = val; + return attr_json; + } else if (attr.isa()) { + VLOG(8) << "write TypeAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write TensorNameAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex128Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write StrAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Attr %s when write Buitin dialect attr")); + } + return attr_json; +} + +Json AttrTypeWriter::WriteBuiltInType(const pir::Type& type) { Json type_json = Json::object(); if (type.isa()) { VLOG(8) << "Write BoolType ... "; @@ -220,7 +347,7 @@ Json writeType(const pir::Type& type) { } else if (type.isa()) { VLOG(8) << "Write VectorType ... "; auto type_ = type.dyn_cast(); - type_json[ID] = type_.name(); + type_json[ID] = COMPRESS_DIALECT_NAME(type_) + "." + type_.name(); Json content = Json::array(); for (auto type_x : type_.data()) { content.push_back(writeType(type_x)); @@ -231,7 +358,7 @@ Json writeType(const pir::Type& type) { VLOG(8) << "Write DenseTensorType ... "; auto type_ = type.dyn_cast(); - type_json[ID] = type_.name(); + type_json[ID] = COMPRESS_DIALECT_NAME(type_) + "." + type_.name(); Json content = Json::array(); content.push_back(writeType(type_.dtype())); @@ -248,77 +375,16 @@ Json writeType(const pir::Type& type) { content.push_back(type_.offset()); type_json[DATA] = content; return type_json; - } else if (!type) { - type_json[ID] = NULL_TYPE; - return type_json; } else { - PADDLE_ENFORCE( - false, phi::errors::InvalidArgument("Unknown Type when write type")); + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Type when write builtin dialect type")); } - VLOG(8) << "Finish write Type ... "; - return type_json; } -SERIALIZE_ATTR_TO_JSON(pir::TypeAttribute, writeType(attr.data())); - -Json writeAttr(const pir::Attribute& attr) { - Json attr_json = Json::object(); - if (attr.isa()) { - VLOG(8) << "write BoolAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write FloatAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write DoubleAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write Int32Attribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write Int64Attribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write IndexAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write ArrayAttribute ."; - auto attr_ = attr.dyn_cast(); - Json val = Json::array(); - for (size_t i = 0; i < attr_.size(); i++) { - val.push_back(writeAttr(attr_.at(i))); - } - attr_json[ID] = attr_.name(); - attr_json[DATA] = val; - return attr_json; - } else if (attr.isa()) { - VLOG(8) << "write TypeAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write TensorNameAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write Complex64Attribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write Complex128Attribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { - VLOG(8) << "write StrAttribute ."; - return pir::serializeAttrToJson( - attr.dyn_cast()); - } else if (attr.isa()) { +Json AttrTypeWriter::WritePaddleOperatorAttr(const pir::Attribute& attr) { + if (attr.isa()) { VLOG(8) << "write IntArrayAttribute ."; return pir::serializeAttrToJson( attr.dyn_cast()); @@ -334,13 +400,25 @@ Json writeAttr(const pir::Attribute& attr) { VLOG(8) << "write PlaceAttribute ."; return pir::serializeAttrToJson( attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DataLayoutAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); } else { PADDLE_ENFORCE( - false, phi::errors::InvalidArgument("Unknown Attr %s when write attr")); + false, + phi::errors::InvalidArgument( + "Unknown Attr %s when write paddle.operatordialect attr")); } - VLOG(8) << "Finish write& attr ... "; + return Json::object(); +} - return attr_json; +Json AttrTypeWriter::WritePaddleOperatorType(const pir::Type& type) { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Type when write paddle.operatordialect type")); + + return Json::object(); } } // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/third_part.h b/paddle/fluid/pir/serialize_deserialize/include/third_party.h similarity index 100% rename from paddle/fluid/pir/serialize_deserialize/include/third_part.h rename to paddle/fluid/pir/serialize_deserialize/include/third_party.h diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc index 44c68051bf908..9b94498a13f79 100644 --- a/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc @@ -88,15 +88,72 @@ void ProgramReader::ReadBlock(Json* block_json, pir::Block* block) { VLOG(4) << "Finish Read " << block_name << "."; return; } +pir::ArrayAttribute GetOneBoolArrayAttribute(pir::IrContext* ctx, + Json* attr_json) { + std::vector val; + bool bool_value = attr_json->template get() != 0; + val.push_back(pir::BoolAttribute::get(ctx, bool_value)); + return pir::ArrayAttribute::get(ctx, val); +} + +pir::Operation* ProgramReader::ReadParameterOp(Json* op_json) { + // attr is_distributed; is_parameter; need_clip; parameter_name; persistable; + // stop_gradient; trainable; + std::vector inputs; + Json& opresult_json = op_json->at(OPRESULTS); + std::vector output_types; + + int64_t value_id_ = opresult_json.at(VALUE_ID).template get(); + output_types.push_back(ReadType(&(opresult_json.at(TYPE_TYPE)))); + VLOG(6) << "Finish Read value " << value_id_ << "."; + + Json& attrs_json = op_json->at(ATTRS); + pir::AttributeMap attributes; + pir::IrContext* ctx = pir::IrContext::Instance(); + attributes.insert( + {"is_distributed", GetOneBoolArrayAttribute(ctx, &attrs_json.at(0))}); + attributes.insert( + {"is_parameter", GetOneBoolArrayAttribute(ctx, &attrs_json.at(1))}); + attributes.insert( + {"need_clip", GetOneBoolArrayAttribute(ctx, &attrs_json.at(2))}); + attributes.insert({"parameter_name", + pir::StrAttribute::get( + ctx, attrs_json.at(3).template get())}); + + if (op_json->contains(OPRESULTS_ATTRS)) { + Json& other_attrs_json = op_json->at(OPRESULTS_ATTRS); + attributes.insert({"persistable", + GetOneBoolArrayAttribute(ctx, &other_attrs_json.at(0))}); + attributes.insert({"stop_gradient", + GetOneBoolArrayAttribute(ctx, &other_attrs_json.at(1))}); + attributes.insert( + {"trainable", GetOneBoolArrayAttribute(ctx, &other_attrs_json.at(2))}); + } + + pir::IrContext* ctx_ = pir::IrContext::Instance(); + // prepare opinfo + pir::OpInfo op_info = ctx_->GetRegisteredOpInfo(pir::ParameterOp::name()); + // deserialize op + pir::Operation* op = + Operation::Create(inputs, attributes, output_types, op_info); + + id_value_map[value_id_] = op->result(0); + VLOG(4) << "Finish Read Operation " << op->name() << "."; + return op; +} pir::Operation* ProgramReader::ReadOp(Json* op_json) { auto op_name = op_json->at(ID).template get(); + if (op_name == PARAMETEROP) { + return ReadParameterOp(op_json); + } + GetDecompressOpName(&op_name); VLOG(4) << "Read op_name = " << op_name << "."; // deserialize opoperands (find value) Json& operands_json = op_json->at(OPOPERANDS); std::vector inputs; for (auto& operand_json : operands_json) { - int64_t id = operand_json.at(ID).template get(); + int64_t id = operand_json.at(VALUE_ID).template get(); inputs.push_back(id_value_map[id]); } VLOG(6) << "Finish Read OP's OpOperand."; @@ -105,7 +162,7 @@ pir::Operation* ProgramReader::ReadOp(Json* op_json) { std::vector output_types; std::vector output_ids; for (auto& opresult_json : opresults_json) { - int64_t value_id_ = opresult_json.at(ID).template get(); + int64_t value_id_ = opresult_json.at(VALUE_ID).template get(); output_ids.push_back(value_id_); output_types.push_back(ReadType(&(opresult_json.at(TYPE_TYPE)))); VLOG(6) << "Finish Read value " << value_id_ << "."; diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc index 35219cc2a7d77..037974efaeea3 100644 --- a/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc @@ -135,11 +135,11 @@ Json ProgramWriter::WriteValue(const pir::Value& value) { Json var_json; if (value) { value_id_map[value] = value_id_; - var_json[ID] = value_id_; + var_json[VALUE_ID] = value_id_; VLOG(6) << "Finish write value " << value_id_ << "."; value_id_++; } else { - var_json[ID] = 0; // NULL_TYPE + var_json[VALUE_ID] = 0; // NULL_TYPE VLOG(6) << "Finish write NULL_TYPE value."; } @@ -149,9 +149,58 @@ Json ProgramWriter::WriteValue(const pir::Value& value) { return var_json; } +#define ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE(attr_name) \ + static_cast(op.attributes() \ + .at(attr_name) \ + .dyn_cast() \ + .at(0) \ + .dyn_cast() \ + .data()) +Json ProgramWriter::WriteParameterOP(const pir::Operation& op) { + // attr_name ; type + // is_distributed; array(bool) + // is_parameter; array(bool) + // need_clip; array(bool) + // parameter_name; string + // persistable; array(bool) + // stop_gradient; array(bool) + // trainable; array(bool) + Json op_json = Json::object(); + op_json[ID] = PARAMETEROP; + // serialize opoperands + VLOG(4) << "Begin write Operation " << op.name() << "."; + op_json[OPRESULTS] = WriteValue(op.result(0)); + Json attrs_json = Json::array(); + attrs_json.emplace_back( + ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("is_distributed")); + attrs_json.emplace_back( + ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("is_parameter")); + attrs_json.emplace_back(ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("need_clip")); + attrs_json.emplace_back(op.attributes() + .at("parameter_name") + .dyn_cast() + .AsString()); + op_json[ATTRS] = attrs_json; + Json other_attrs_json = Json::array(); + other_attrs_json.emplace_back( + ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("persistable")); + other_attrs_json.emplace_back( + ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("stop_gradient")); + other_attrs_json.emplace_back( + ONE_BOOL_ARRAY_ATTRIBUTE_CAST_TEMPLATE("trainable")); + if (trainable_) { + op_json[OPRESULTS_ATTRS] = other_attrs_json; + } + return op_json; +} Json ProgramWriter::WriteOp(const pir::Operation& op) { + if (op.isa()) { + return WriteParameterOP(op); + } Json op_json = Json::object(); - op_json[ID] = op.name(); + auto op_name = op.name(); + GetCompressOpName(&op_name); + op_json[ID] = op_name; // serialize opoperands VLOG(4) << "Begin write Operation " << op.name() << "."; Json operands_json = Json::array(); @@ -195,10 +244,10 @@ Json ProgramWriter::WriteOpOperand(const pir::OpOperand& op_operand) { Json operand_json = Json::object(); if (op_operand.source()) { int64_t id = value_id_map[op_operand.source()]; - operand_json[ID] = id; + operand_json[VALUE_ID] = id; VLOG(6) << "Finish write OpOperand " << id << "."; } else { - operand_json[ID] = 0; // NULL_VALUE + operand_json[VALUE_ID] = 0; // NULL_VALUE VLOG(6) << "Finish write NULL_VALUE OpOperand."; } diff --git a/paddle/fluid/pir/serialize_deserialize/src/schema.cc b/paddle/fluid/pir/serialize_deserialize/src/schema.cc new file mode 100644 index 0000000000000..770260d025f91 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/schema.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/schema.h" +#include "paddle/phi/core/enforce.h" +namespace pir { + +std::pair getContentSplitByDot( + const std::string& str) { + size_t pos = str.find('.'); + if (pos == std::string::npos) { + return {str, str}; + } + return {str.substr(0, pos), str.substr(pos + 1)}; +} + +DialectIdMap* DialectIdMap::Instance() { + static DialectIdMap map; + return ↦ +} +DialectIdMap::DialectIdMap() { + insert(pir::BuiltinDialect::name(), "0"); + insert(paddle::dialect::OperatorDialect::name(), "1"); + insert(pir::ControlFlowDialect::name(), "2"); + insert(paddle::dialect::CustomOpDialect::name(), "3"); +} +void DialectIdMap::insert(const std::string& key, const std::string& value) { + CompressDialect[key] = value; + DecompressDialect[value] = key; +} + +std::string DialectIdMap::GetCompressDialectId(const std::string& name) { + if (CompressDialect.find(name) != CompressDialect.end()) { + return CompressDialect[name]; + } else { + VLOG(0) << "can't find dialect " << name + << "'s compress id, return original dialectname, it's better to " + "insert compress id in DialectIdMap() func"; + return name; + } + return ""; +} + +std::string DialectIdMap::GetDecompressDialectId(const std::string& id) { + if (DecompressDialect.find(id) != DecompressDialect.end()) { + return DecompressDialect[id]; + } else { + PADDLE_ENFORCE( + false, + phi::errors::InvalidArgument( + "Unknown id %s for decompress dialect, pleace check your file", + id)); + } + return ""; +} + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc b/paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.cc similarity index 51% rename from paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc rename to paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.cc index 0b93a496d4dde..2e46903b1734f 100644 --- a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h" +#include "paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" @@ -37,11 +37,19 @@ For example: | output ------------------------------------------------------ -After the pass is applied: +After the pass is applied:XPU X Scale | Bias \ | / - group_norm_silu + group_norm_silu_xpu + | + Out +------------------------------------------------------ +After the pass is applied:GPU + X + Scale | Bias + \ | / + add_group_norm_silu | Out */ @@ -54,31 +62,62 @@ class GroupNormSiluPattern : public paddle::drr::DrrPatternBase { void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &groupnorm = pat.Op( - paddle::dialect::GroupNormOp::name(), - {{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}); + const auto &group_norm = pat.Op(paddle::dialect::GroupNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}); const auto &silu = pat.Op(paddle::dialect::SiluOp::name()); - groupnorm({&pat.Tensor("X"), &pat.Tensor("Scale"), &pat.Tensor("Bias")}, - {&pat.Tensor("Y"), &pat.Tensor("Mean"), &pat.Tensor("Variance")}); + group_norm( + {&pat.Tensor("X"), &pat.Tensor("Scale"), &pat.Tensor("Bias")}, + {&pat.Tensor("Y"), &pat.Tensor("Mean"), &pat.Tensor("Variance")}); silu({&pat.Tensor("Y")}, {&pat.Tensor("Out")}); +#ifdef PADDLE_WITH_CUDA + pat.AddConstraint([this](const paddle::drr::MatchContext &match_ctx) { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("X")); + if (!x_dtype.isa() && + !x_dtype.isa()) { + return false; + } + return true; + }); +#endif + +#ifdef PADDLE_WITH_CUDA + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &add_group_norm_silu_op = + res.Op(paddle::dialect::AddGroupNormSiluOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"activation", res.StrAttr("silu")}}); + add_group_norm_silu_op({&res.Tensor("X"), + &res.InputNoneTensor(), + &res.Tensor("Scale"), + &res.Tensor("Bias")}, + {&res.Tensor("Out"), + &res.OutputNoneTensor(), + &res.Tensor("Mean"), + &res.Tensor("Variance")}); +#endif +#ifdef PADDLE_WITH_XPU paddle::drr::ResultPattern res = pat.ResultPattern(); - const auto &group_norm_silu_xpu = res.Op( paddle::dialect::GroupNormSiluXpuOp::name(), {{{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}}); group_norm_silu_xpu( {&res.Tensor("X"), &res.Tensor("Scale"), &res.Tensor("Bias")}, {&res.Tensor("Out")}); +#endif } }; -class GroupNormSiluXpuFusePass : public pir::PatternRewritePass { +class GroupNormSiluFusePass : public pir::PatternRewritePass { public: - GroupNormSiluXpuFusePass() - : pir::PatternRewritePass("group_norm_silu_xpu_fuse_pass", 2) {} + GroupNormSiluFusePass() + : pir::PatternRewritePass("group_norm_silu_fuse_pass", 2) {} pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { pir::RewritePatternSet ps(context); @@ -90,10 +129,10 @@ class GroupNormSiluXpuFusePass : public pir::PatternRewritePass { } // namespace namespace pir { -std::unique_ptr CreateGroupNormSiluXpuFusePass() { - return std::make_unique(); +std::unique_ptr CreateGroupNormSiluFusePass() { + return std::make_unique(); } } // namespace pir -REGISTER_IR_PASS(group_norm_silu_xpu_fuse_pass, GroupNormSiluXpuFusePass); +REGISTER_IR_PASS(group_norm_silu_fuse_pass, GroupNormSiluFusePass); diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h b/paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.h similarity index 92% rename from paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h rename to paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.h index 665c7dcb03f16..475444a19c424 100644 --- a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h +++ b/paddle/fluid/pir/transforms/general/group_norm_silu_fuse_pass.h @@ -21,6 +21,6 @@ namespace pir { class Pass; -IR_API std::unique_ptr CreateGroupNormSiluXpuFusePass(); +IR_API std::unique_ptr CreateGroupNormSiluFusePass(); } // namespace pir diff --git a/paddle/fluid/pir/transforms/general/transfer_layout_pass.cc b/paddle/fluid/pir/transforms/general/transfer_layout_pass.cc index fcbfcbb910e1e..61113f8e9dfc5 100644 --- a/paddle/fluid/pir/transforms/general/transfer_layout_pass.cc +++ b/paddle/fluid/pir/transforms/general/transfer_layout_pass.cc @@ -278,7 +278,7 @@ struct FlowGraph { } } - std::unordered_set nhwc_nodes; + std::unordered_set mutable_nodes; for (auto& op : *(program.block())) { auto layout_transform_iface = op.dyn_cast(); @@ -286,10 +286,14 @@ struct FlowGraph { continue; } + if (!layout_transform_iface.CanBeModified(&op)) { + continue; + } + auto prefer_layout = layout_transform_iface.PreferLayout(&op); if (prefer_layout == common::DataLayout::NHWC) { Node op_node(&op); - nhwc_nodes.insert(op_node); + mutable_nodes.insert(op_node); AddEdge(op_node, dst_node(), INF); VLOG(10) << "[PreProcess] node: " << op_node << " should be set to NHWC"; @@ -302,7 +306,7 @@ struct FlowGraph { // operation who have a dertermined layout and spread its layout to // its output and inputs recursively. std::queue q; - for (auto& n : nhwc_nodes) { + for (auto& n : mutable_nodes) { q.push(n); } std::unordered_set is_node_layout_visited; @@ -362,13 +366,14 @@ struct FlowGraph { // a point of cut edge. So we set its outputs and inputs to // immutable. Node in_node = Node(v.defining_op()); - nhwc_nodes.erase(in_node); - VLOG(10) << "erase node: " << in_node << " from nhwc set"; + mutable_nodes.erase(in_node); + VLOG(10) << "erase node: " << in_node << " from mutable set"; for (auto it = v.use_begin(); it != v.use_end(); ++it) { Node out_node(it->owner()); - nhwc_nodes.erase(out_node); - VLOG(10) << "erase node: " << out_node << " from nhwc set"; + mutable_nodes.erase(out_node); + VLOG(10) + << "erase node: " << out_node << " from mutable set"; } } return !can_be_transformed; @@ -380,8 +385,8 @@ struct FlowGraph { continue; } - VLOG(10) << "add node to nhwc set: " << node; - nhwc_nodes.insert(node); + VLOG(10) << "add node to mutable set: " << node; + mutable_nodes.insert(node); VLOG(10) << "processing node successor: " << node; @@ -403,7 +408,7 @@ struct FlowGraph { continue; } is_node_layout_visited.insert(node); - if (nhwc_nodes.count(node) == 0) { + if (mutable_nodes.count(node) == 0) { VLOG(10) << "add node to nchw set: " << node; AddEdge(src_node(), node, INF); } @@ -542,7 +547,7 @@ using Edge = FlowGraph::Edge; class TransferLayoutPass : public pir::Pass { public: - TransferLayoutPass() : pir::Pass("transfer_layout_pass", 3) {} + TransferLayoutPass() : pir::Pass("transfer_layout_pass", 2) {} bool CanApplyOn(pir::Operation* op) const override { if (!op->isa()) { diff --git a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc index 35afabe3ad1dc..bea1e68e9c077 100644 --- a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc @@ -141,11 +141,13 @@ class RmsNormFusePattern : public paddle::drr::DrrPatternBase { class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { private: const bool extra_add_; + const bool trans_extra_add_; public: - explicit AddRmsNormFusePattern(bool extra_add) : extra_add_(extra_add) {} + AddRmsNormFusePattern(bool extra_add, bool trans_extra_add) + : extra_add_(extra_add), trans_extra_add_{trans_extra_add} {} - uint32_t benefit() const override { return extra_add_ ? 2 : 1; } + uint32_t benefit() const override { return extra_add_ ? 4 : 3; } std::string name() const override { return "AddRmsNormFusePattern"; } @@ -176,7 +178,9 @@ class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { if (extra_add_) { const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); pat.Tensor("add_out1") = - add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); + trans_extra_add_ + ? add1(pat.Tensor("any_tensor"), pat.Tensor("add_out")) + : add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); } paddle::drr::ResultPattern res = pat.ResultPattern(); const auto &res_rms_norm = @@ -207,11 +211,13 @@ class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { private: const bool extra_add_; + const bool trans_extra_add_; public: - explicit AddLayerNormFusePattern(bool extra_add) : extra_add_(extra_add) {} + AddLayerNormFusePattern(bool extra_add, bool trans_extra_add) + : extra_add_(extra_add), trans_extra_add_{trans_extra_add} {} - uint32_t benefit() const override { return extra_add_ ? 2 : 1; } + uint32_t benefit() const override { return extra_add_ ? 4 : 3; } std::string name() const override { return "AddLayerNormFusePattern"; } void operator()(paddle::drr::DrrPatternContext *ctx) const override { @@ -231,22 +237,20 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { if (extra_add_) { const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); pat.Tensor("add_out1") = - add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); + trans_extra_add_ + ? add1(pat.Tensor("any_tensor"), pat.Tensor("add_out")) + : add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); } paddle::drr::ResultPattern res = pat.ResultPattern(); const auto &cast_op_dtype = res.ComputeAttr( [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { - auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); - return paddle::dialect::TransToPhiDataType(x_dtype); + return phi::DataType::FLOAT32; }); - const auto &cast_op_1 = + const auto cast_1_op = res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); - res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); - const auto &cast_op_2 = + const auto cast_2_op = res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); - res.Tensor("casted_w") = cast_op_2(res.Tensor("w")); - const auto &fuse_layer_norm = res.Op(paddle::dialect::FusedBiasResidualLayernormOp::name(), {{"epsilon", pat.Attr("epsilon")}, @@ -256,14 +260,15 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { {"quant_round_type", res.Int32Attr(0)}, {"quant_max_bound", res.Float32Attr(0.0)}, {"quant_min_bound", res.Float32Attr(0.0)}}); - + res.Tensor("w_cast") = cast_1_op(res.Tensor("w")); + res.Tensor("bias_cast") = cast_1_op(res.Tensor("bias")); fuse_layer_norm( { &res.Tensor("x"), - &res.Tensor("casted_bias"), - &res.Tensor("residual"), - &res.Tensor("casted_w"), &res.InputNoneTensor(), + &res.Tensor("residual"), + &res.Tensor("w_cast"), + &res.Tensor("bias_cast"), }, {&res.Tensor("layer_norm_out"), &res.Tensor("add_out"), @@ -272,6 +277,120 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { } }; +class AddGroupNormFusePattern : public paddle::drr::DrrPatternBase { + private: + const bool extra_add_; + const bool trans_extra_add_; + + public: + AddGroupNormFusePattern(bool extra_add, bool trans_extra_add) + : extra_add_(extra_add), trans_extra_add_{trans_extra_add} {} + + uint32_t benefit() const override { return extra_add_ ? 4 : 3; } + std::string name() const override { return "AddGroupNormFusePattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + const auto &add = pat.Op(paddle::dialect::AddOp::name()); + const auto &group_norm = pat.Op(paddle::dialect::GroupNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}); + pat.Tensor("add_out") = add(pat.Tensor("x"), pat.Tensor("residual")); + group_norm( + {&pat.Tensor("add_out"), &pat.Tensor("scale"), &pat.Tensor("bias")}, + {&pat.Tensor("group_out"), + &pat.Tensor("mean_out_0"), + &pat.Tensor("variance_out_0")}); + // TODO(bukejiyu) :DRR support matching placeholder op, + // the following needs to be deleted + if (extra_add_) { + const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); + pat.Tensor("add_out1") = + trans_extra_add_ + ? add1(pat.Tensor("any_tensor"), pat.Tensor("add_out")) + : add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); + } + pat.AddConstraint([this](const paddle::drr::MatchContext &match_ctx) { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + if (!x_dtype.isa() && + !x_dtype.isa()) { + return false; + } + return true; + }); + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &add_group_norm_silu_op = + res.Op(paddle::dialect::AddGroupNormSiluOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"activation", res.StrAttr("")}}); + + add_group_norm_silu_op({&res.Tensor("x"), + &res.Tensor("residual"), + &res.Tensor("scale"), + &res.Tensor("bias")}, + {&res.Tensor("group_out"), + &res.Tensor("add_out"), + &res.Tensor("mean_out"), + &res.Tensor("variance_out")}); + } +}; + +class AddGroupNormWithActPattern : public paddle::drr::DrrPatternBase { + public: + uint32_t benefit() const override { return 2; } + std::string name() const override { return "AddGroupNormWithActPattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + const auto &add_group_norm_silu_op = + pat.Op(paddle::dialect::AddGroupNormSiluOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"activation", pat.Attr("activation")}}); + const auto &silu = pat.Op(paddle::dialect::SiluOp::name()); + add_group_norm_silu_op({&pat.Tensor("x"), + &pat.Tensor("residual"), + &pat.Tensor("scale"), + &pat.Tensor("bias")}, + {&pat.Tensor("group_out"), + &pat.Tensor("add_out"), + &pat.Tensor("mean_out_0"), + &pat.Tensor("variance_out_0")}); + pat.Tensor("silu_out") = silu(pat.Tensor("group_out")); + pat.AddConstraint([this](const paddle::drr::MatchContext &match_ctx) { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + if (!x_dtype.isa() && + !x_dtype.isa()) { + return false; + } + auto activation = match_ctx.Attr("activation"); + if (activation != "") { + return false; + } + return true; + }); + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &res_add_group_norm_silu_op = + res.Op(paddle::dialect::AddGroupNormSiluOp::name(), + {{"epsilon", pat.Attr("epsilon")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"activation", res.StrAttr("silu")}}); + res_add_group_norm_silu_op({&res.Tensor("x"), + &res.Tensor("residual"), + &res.Tensor("scale"), + &res.Tensor("bias")}, + {&res.Tensor("silu_out"), + &res.Tensor("add_out"), + &res.Tensor("mean_out"), + &res.Tensor("variance_out")}); + } +}; + class AddNormFusePass : public pir::PatternRewritePass { public: AddNormFusePass() : pir::PatternRewritePass("add_norm_fuse_pass", 2) {} @@ -290,13 +409,36 @@ class AddNormFusePass : public pir::PatternRewritePass { // x-------- // add-rms_norm ---> rms_norm // residual- - ps.Add(paddle::drr::Create(context, !extra_add)); - ps.Add(paddle::drr::Create(context, extra_add)); + ps.Add( + paddle::drr::Create(context, !extra_add, false)); + ps.Add( + paddle::drr::Create(context, extra_add, true)); + ps.Add( + paddle::drr::Create(context, extra_add, false)); + // x-------- // add-layer_norm ----> fused_bias_residual_layernorm // residual- - ps.Add(paddle::drr::Create(context, !extra_add)); - ps.Add(paddle::drr::Create(context, extra_add)); + ps.Add(paddle::drr::Create( + context, !extra_add, false)); + ps.Add( + paddle::drr::Create(context, extra_add, true)); + ps.Add(paddle::drr::Create( + context, extra_add, false)); + + // x-------- + // add-group_norm ----> add_group_norm_silu + // residual- + ps.Add(paddle::drr::Create( + context, !extra_add, true)); + ps.Add( + paddle::drr::Create(context, extra_add, true)); + ps.Add(paddle::drr::Create( + context, extra_add, false)); + + // add_group_norm_silu-silu --->add_group_norm_silu + ps.Add(paddle::drr::Create(context)); + // group-silu->add_group_norm_silu moved to group_norm_silu_fuse_pass return ps; } }; diff --git a/paddle/fluid/pir/transforms/gpu/conv2d_add_act_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/conv2d_add_act_fuse_pass.cc index 96851cfeac559..754422312e47a 100644 --- a/paddle/fluid/pir/transforms/gpu/conv2d_add_act_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/conv2d_add_act_fuse_pass.cc @@ -35,8 +35,8 @@ class Conv2dAddActFusePassDrrPattern : public paddle::drr::DrrPatternBase { private: std::string act_name_; bool cutlass_pattern_; - const std::unordered_set conv2d_depthwise_act_set_ = { - "relu", "swish", "sigmoid"}; + const std::unordered_set conv2d_depthwise_act_set_ = {"relu", + "swish"}; public: static const int CUTLASS_NHWC_ALIGNMENT = 8; @@ -152,62 +152,6 @@ class Conv2dAddActFusePassDrrPattern : public paddle::drr::DrrPatternBase { [this](const paddle::drr::MatchContext &match_ctx) -> std::string { return cutlass_pattern_ ? "gpu" : "gpudnn"; }); - const auto &perm_weight_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ || data_format == "NHWC") { - return {0, 2, 3, 1}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &perm_input_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return {0, 2, 3, 1}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &perm_bias_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - auto bias_shape = pir::GetShapeFromValue(match_ctx.Tensor("bias")); - if (cutlass_pattern_ && data_format == "NCHW") { - if (bias_shape.size() == 4) { - return {0, 2, 3, 1}; - } else if (bias_shape.size() == 3) { - return {0, 2, 1}; - } else { - return {0}; - } - } else { - std::vector dst_vector(bias_shape.size()); - std::iota(dst_vector.begin(), dst_vector.end(), 0); - return dst_vector; - } - }); - const auto &data_format_conv = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::string { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return "NHWC"; - } else { - return data_format; - } - }); - // TODO(bukejiyu) When the transfer_layout_pass is supported, - // transpose_op will be deleted. - const auto &transpose_op_w = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_weight_shape}}); - const auto &transpose_op_input = res.Op( - paddle::dialect::TransposeOp::name(), {{"perm", perm_input_shape}}); - const auto &transpose_op_bias = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_bias_shape}}); - res.Tensor("filter_transpose") = transpose_op_w(res.Tensor("filter")); - res.Tensor("input_transpose") = transpose_op_input(res.Tensor("input")); - res.Tensor("bias_transpose") = transpose_op_bias(res.Tensor("bias")); const auto &fused_conv2d_add_act = res.Op( paddle::dialect::FusedConv2dAddActOp::name(), {{ @@ -216,7 +160,7 @@ class Conv2dAddActFusePassDrrPattern : public paddle::drr::DrrPatternBase { {"padding_algorithm", pat.Attr("padding_algorithm")}, {"dilations", pat.Attr("dilations")}, {"groups", pat.Attr("groups")}, - {"data_format", data_format_conv}, + {"data_format", pat.Attr("data_format")}, {"activation", res.StrAttr(act_name_)}, {"split_channels", res.VectorInt32Attr({})}, {"exhaustive_search", res.BoolAttr(false)}, @@ -224,24 +168,11 @@ class Conv2dAddActFusePassDrrPattern : public paddle::drr::DrrPatternBase { {"fuse_alpha", res.Float32Attr(0.0f)}, }}, {{{paddle::dialect::kForceBackendAttr, force_backend_runtime_attr}}}); - fused_conv2d_add_act({&res.Tensor("input_transpose"), - &res.Tensor("filter_transpose"), - &res.Tensor("bias_transpose"), + fused_conv2d_add_act({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), &res.InputNoneTensor()}, - {&res.Tensor("fuesd_conv2d_add_act_out")}); - const auto &perm_out_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return {0, 3, 1, 2}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &transpose_op_out = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_out_shape}}); - res.Tensor("act_out") = - transpose_op_out(res.Tensor("fuesd_conv2d_add_act_out")); + {&res.Tensor("act_out")}); } }; @@ -278,11 +209,9 @@ class Conv2dAdd2ActFusePattern if (next_op->isa()) { act_name = "relu"; } -#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 8000 && CUDNN_VERSION < 8700 +#if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8700 if (next_op->isa()) { act_name = "tanh"; - } else if (next_op->isa()) { - act_name = "sigmoid"; } #endif if (act_name == "") { @@ -346,11 +275,10 @@ class Conv2dAddActFusePass : public pir::PatternRewritePass { paddle::dialect::FusedConv2dAddActOp::name()}); // NOTE(liuyuanle): cudnn [8.7, 8.9 now) version has bug when act is -// sigmoid/tanh. Ref to issue +// tanh. Ref to issue // https://github.com/PaddlePaddle/Paddle/issues/50853 #if CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8700 - const std::unordered_set cudnn_act_set( - {"relu", "sigmoid", "tanh"}); + const std::unordered_set cudnn_act_set({"relu", "tanh"}); #else const std::unordered_set cudnn_act_set({"relu"}); #endif diff --git a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc index 994fbdf2ce69f..89a023197a27e 100644 --- a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc @@ -138,62 +138,6 @@ class Conv2dAddFusePattern : public paddle::drr::DrrPatternBase { [this](const paddle::drr::MatchContext &match_ctx) -> std::string { return cutlass_pattern_ ? "gpu" : "gpudnn"; }); - const auto &perm_weight_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ || data_format == "NHWC") { - return {0, 2, 3, 1}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &perm_input_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return {0, 2, 3, 1}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &perm_bias_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - auto bias_shape = pir::GetShapeFromValue(match_ctx.Tensor("bias")); - if (cutlass_pattern_ && data_format == "NCHW") { - if (bias_shape.size() == 4) { - return {0, 2, 3, 1}; - } else if (bias_shape.size() == 3) { - return {0, 2, 1}; - } else { - return {0}; - } - } else { - std::vector dst_vector(bias_shape.size()); - std::iota(dst_vector.begin(), dst_vector.end(), 0); - return dst_vector; - } - }); - const auto &data_format_conv = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::string { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return "NHWC"; - } else { - return data_format; - } - }); - // TODO(bukejiyu) When the transfer_layout_pass is supported, - // transpose_op will be deleted. - const auto &transpose_op_w = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_weight_shape}}); - const auto &transpose_op_input = res.Op( - paddle::dialect::TransposeOp::name(), {{"perm", perm_input_shape}}); - const auto &transpose_op_bias = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_bias_shape}}); - res.Tensor("filter_transpose") = transpose_op_w(res.Tensor("filter")); - res.Tensor("input_transpose") = transpose_op_input(res.Tensor("input")); - res.Tensor("bias_transpose") = transpose_op_bias(res.Tensor("bias")); const auto &fused_conv2d_add_act = res.Op( paddle::dialect::FusedConv2dAddActOp::name(), {{ @@ -202,7 +146,7 @@ class Conv2dAddFusePattern : public paddle::drr::DrrPatternBase { {"padding_algorithm", pat.Attr("padding_algorithm")}, {"dilations", pat.Attr("dilations")}, {"groups", pat.Attr("groups")}, - {"data_format", data_format_conv}, + {"data_format", pat.Attr("data_format")}, {"activation", res.StrAttr("identity")}, {"split_channels", res.VectorInt32Attr({})}, {"exhaustive_search", res.BoolAttr(false)}, @@ -211,25 +155,11 @@ class Conv2dAddFusePattern : public paddle::drr::DrrPatternBase { }}, {{{paddle::dialect::kForceBackendAttr, force_backend_runtime_attr}}}); - fused_conv2d_add_act( - {&res.Tensor("input_transpose"), - &res.Tensor("filter_transpose"), - &res.Tensor("bias_transpose"), - &res.InputNoneTensor()}, - {&res.Tensor("fuesd_conv2d_add_act_out"), &res.OutputNoneTensor()}); - const auto &perm_out_shape = res.ComputeAttr( - [this](const paddle::drr::MatchContext &match_ctx) -> std::vector { - auto data_format = match_ctx.Attr("data_format"); - if (cutlass_pattern_ && data_format == "NCHW") { - return {0, 3, 1, 2}; - } else { - return {0, 1, 2, 3}; - } - }); - const auto &transpose_op_out = res.Op(paddle::dialect::TransposeOp::name(), - {{"perm", perm_out_shape}}); - res.Tensor("add_out") = - transpose_op_out(res.Tensor("fuesd_conv2d_add_act_out")); + fused_conv2d_add_act({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.InputNoneTensor()}, + {&res.Tensor("add_out"), &res.OutputNoneTensor()}); } }; diff --git a/paddle/fluid/pir/transforms/gpu/matmul_add_act_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/matmul_add_act_fuse_pass.cc index 0da1499a730c5..ecb450201a787 100644 --- a/paddle/fluid/pir/transforms/gpu/matmul_add_act_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/matmul_add_act_fuse_pass.cc @@ -60,11 +60,17 @@ class MatmulAddPattern : public paddle::drr::DrrPatternBase { pat.AddConstraint([&](const paddle::drr::MatchContext &match_ctx) { auto w_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w")); - if (!w_dtype.isa() && - !w_dtype.isa() && - !w_dtype.isa() && - !w_dtype.isa()) { - return false; + if (fused_op_name_ == paddle::dialect::GemmEpilogueOp::name()) { + if (!w_dtype.isa() && + !w_dtype.isa()) { + return false; + } + } else { + if (!w_dtype.isa() && + !w_dtype.isa() && + !w_dtype.isa()) { + return false; + } } auto w_dims = pir::GetShapeFromValue(match_ctx.Tensor("w")); auto x_dims = pir::GetShapeFromValue(match_ctx.Tensor("x")); diff --git a/paddle/fluid/pir/transforms/onednn/onednn_placement_pass.cc b/paddle/fluid/pir/transforms/onednn/onednn_placement_pass.cc index 38ee474c6352a..cfae35b765941 100644 --- a/paddle/fluid/pir/transforms/onednn/onednn_placement_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/onednn_placement_pass.cc @@ -53,6 +53,9 @@ class OneDNNPlacementPattern : public pir::OpRewritePattern { for (auto &attr : runtime_info.extra_args_default_value) { attributes[attr.first] = attr.second; } + if (attributes.find("is_test") != attributes.end()) { + attributes["is_test"] = rewriter.bool_attr(true); + } pir::Operation *op_item_inner = rewriter.Build(op->operands_source(), attributes, diff --git a/paddle/fluid/pir/transforms/passes.h b/paddle/fluid/pir/transforms/passes.h index db6a50a8ec3ad..01a122f2de6cc 100644 --- a/paddle/fluid/pir/transforms/passes.h +++ b/paddle/fluid/pir/transforms/passes.h @@ -38,6 +38,7 @@ USE_PIR_PASS(conv2d_add_fuse_pass); USE_PIR_PASS(conv2d_add_act_fuse_pass); USE_PIR_PASS(embedding_eltwise_layernorm_fuse_pass); USE_PIR_PASS(add_norm_fuse_pass); +USE_PIR_PASS(group_norm_silu_fuse_pass); USE_PIR_PASS(fused_dot_product_attention_pass); USE_PIR_PASS(fused_flash_attn_pass); USE_PIR_PASS(remove_redundant_transpose_pass); @@ -75,7 +76,6 @@ USE_PIR_PASS(onednn_placement_pass); #ifdef PADDLE_WITH_XPU USE_PIR_PASS(add_layernorm_xpu_fuse_pass); -USE_PIR_PASS(group_norm_silu_xpu_fuse_pass); USE_PIR_PASS(conv2d_bn_xpu_fuse_pass); #endif diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc index feac98cc91f05..bf3df2a9623c5 100644 --- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc @@ -62,8 +62,7 @@ COMMON_DECLARE_bool(use_mkldnn); COMMON_DECLARE_bool(print_ir); // COMMON_DECLARE_string(pir_onednn_kernel_blacklist); -namespace paddle { -namespace dialect { +namespace paddle::dialect { pir::Type ConvertOpTypeToKernelType(pir::IrContext* ctx, pir::Type op_type, @@ -3189,5 +3188,4 @@ std::unique_ptr PdOpLowerToKernelPass(pir::Program* prog, return program; } -} // namespace dialect -} // namespace paddle +} // namespace paddle::dialect diff --git a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc index 9d522d8b2f0fe..0fe9e7efbfec2 100644 --- a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc +++ b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc @@ -22,8 +22,7 @@ PD_DECLARE_bool(use_stream_safe_cuda_allocator); COMMON_DECLARE_bool(new_executor_use_cuda_graph); -namespace paddle { -namespace platform { +namespace paddle::platform { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) void InitCUDNNRelatedHandle(phi::GPUContext* dev_ctx) { @@ -186,5 +185,4 @@ std::unique_ptr EndCUDAGraphCapture() { } #endif -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/platform/denormal.cc b/paddle/fluid/platform/denormal.cc index d6a0e749f93c8..3aa52da56f05b 100644 --- a/paddle/fluid/platform/denormal.cc +++ b/paddle/fluid/platform/denormal.cc @@ -38,8 +38,7 @@ #include #endif -namespace paddle { -namespace platform { +namespace paddle::platform { static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) { #ifdef DENORM_USE_INTRINSICS @@ -80,5 +79,4 @@ ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() { } ScopedFlushDenormal::ScopedFlushDenormal() { SetDenormalState(true, true); } -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc index 358d52d03d31b..d7b164862cd7e 100644 --- a/paddle/fluid/platform/device/gpu/gpu_info.cc +++ b/paddle/fluid/platform/device/gpu/gpu_info.cc @@ -68,8 +68,7 @@ PADDLE_DEFINE_EXPORTED_uint64(cuda_memory_async_pool_realease_threshold, "Amount of reserved memory in bytes to hold onto " "before trying to release memory back to the OS"); -namespace paddle { -namespace platform { +namespace paddle::platform { void GpuMemoryUsage(size_t *available, size_t *total) { size_t actual_available, actual_total; @@ -719,5 +718,4 @@ void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream) { phi::backends::gpu::GpuMemsetAsync(dst, value, count, stream); } -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index 496b253dff5b3..980b7cb35410b 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -80,8 +80,14 @@ namespace dynload { __macro(cublasSgetriBatched); \ __macro(cublasDgetrfBatched); \ __macro(cublasDgetriBatched); \ + __macro(cublasCgetrfBatched); \ + __macro(cublasCgetriBatched); \ + __macro(cublasZgetrfBatched); \ + __macro(cublasZgetriBatched); \ __macro(cublasSmatinvBatched); \ __macro(cublasDmatinvBatched); \ + __macro(cublasCmatinvBatched); \ + __macro(cublasZmatinvBatched); \ __macro(cublasSgetrsBatched); \ __macro(cublasDgetrsBatched); diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index aa8fd62aa85cc..21a45648fba63 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cudnn.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name @@ -58,6 +56,4 @@ CUDNN_DNN_ROUTINE_EACH_R9(DEFINE_WRAP); bool HasCUDNN() { return phi::dynload::HasCUDNN(); } -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/dynload/curand.cc b/paddle/fluid/platform/dynload/curand.cc index 9a6686515ea2b..b2aaff1d15427 100644 --- a/paddle/fluid/platform/dynload/curand.cc +++ b/paddle/fluid/platform/dynload/curand.cc @@ -14,14 +14,10 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/curand.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/dynload/cusolver.cc b/paddle/fluid/platform/dynload/cusolver.cc index bf8394f3f02ca..1085aaebe052c 100644 --- a/paddle/fluid/platform/dynload/cusolver.cc +++ b/paddle/fluid/platform/dynload/cusolver.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cusolver.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name @@ -30,6 +28,4 @@ CUSOLVER_ROUTINE_EACH_R1(DEFINE_WRAP); CUSOLVER_ROUTINE_EACH_R2(DEFINE_WRAP); #endif -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/dynload/nccl.cc b/paddle/fluid/platform/dynload/nccl.cc index 7b0ea3bb7f3c1..ee270918b59c7 100644 --- a/paddle/fluid/platform/dynload/nccl.cc +++ b/paddle/fluid/platform/dynload/nccl.cc @@ -14,9 +14,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/nccl.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name @@ -38,6 +36,4 @@ NCCL_RAND_ROUTINE_EACH_AFTER_2703(DEFINE_WRAP) NCCL_RAND_ROUTINE_EACH_AFTER_21100(DEFINE_WRAP) #endif -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/dynload/nvrtc.cc b/paddle/fluid/platform/dynload/nvrtc.cc index 242aa912ad838..b157c8c239ca5 100644 --- a/paddle/fluid/platform/dynload/nvrtc.cc +++ b/paddle/fluid/platform/dynload/nvrtc.cc @@ -16,9 +16,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/nvrtc.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name @@ -26,6 +24,4 @@ NVRTC_ROUTINE_EACH(DEFINE_WRAP); bool HasNVRTC() { return phi::dynload::HasNVRTC(); } -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/dynload/warpctc.cc b/paddle/fluid/platform/dynload/warpctc.cc index 48c78a130732e..0861ffc7a0c33 100644 --- a/paddle/fluid/platform/dynload/warpctc.cc +++ b/paddle/fluid/platform/dynload/warpctc.cc @@ -14,14 +14,10 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/warpctc.h" -namespace paddle { -namespace platform { -namespace dynload { +namespace paddle::platform::dynload { #define DEFINE_WRAP(__name) DynLoad__##__name __name WARPCTC_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace platform -} // namespace paddle +} // namespace paddle::platform::dynload diff --git a/paddle/fluid/platform/monitor.cc b/paddle/fluid/platform/monitor.cc index 40e0e226779b8..596667e2fa782 100644 --- a/paddle/fluid/platform/monitor.cc +++ b/paddle/fluid/platform/monitor.cc @@ -14,10 +14,6 @@ #include "paddle/fluid/platform/monitor.h" -namespace paddle { -namespace platform {} // namespace platform -} // namespace paddle - DEFINE_INT_STATUS(STAT_total_feasign_num_in_mem) DEFINE_INT_STATUS(STAT_epoch_finish) DEFINE_INT_STATUS(STAT_gpu0_mem_size) diff --git a/paddle/fluid/platform/profiler/cuda_tracer.cc b/paddle/fluid/platform/profiler/cuda_tracer.cc index a462521db5144..ba559f24abfc8 100644 --- a/paddle/fluid/platform/profiler/cuda_tracer.cc +++ b/paddle/fluid/platform/profiler/cuda_tracer.cc @@ -33,10 +33,7 @@ } \ } while (0) -namespace paddle { -namespace platform { - -namespace details { +namespace paddle::platform::details { std::unordered_map CreateThreadIdMapping() { std::unordered_map mapping; std::unordered_map ids = GetAllThreadIds(); @@ -45,7 +42,8 @@ std::unordered_map CreateThreadIdMapping() { } return mapping; } -} // namespace details +} // namespace paddle::platform::details +namespace paddle::platform { CudaTracer::CudaTracer() = default; @@ -194,5 +192,4 @@ void CudaTracer::ReleaseBuffer(uint8_t* buffer) { paddle::framework::AlignedFree(buffer); } -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc index 8dd865e14e1c9..91ec92fe80b9b 100644 --- a/paddle/fluid/platform/profiler/event_python.cc +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/extra_info.h" -namespace paddle { -namespace platform { +namespace paddle::platform { HostPythonNode::~HostPythonNode() { // delete all runtime or device nodes and recursive delete children @@ -192,5 +191,4 @@ std::unique_ptr LoadProfilerResult(std::string filename) { return result; } -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/platform/timer.cc b/paddle/fluid/platform/timer.cc index 855a3d47e38bb..7dc854e71bd02 100644 --- a/paddle/fluid/platform/timer.cc +++ b/paddle/fluid/platform/timer.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/fluid/platform/timer.h" -namespace paddle { -namespace platform { +namespace paddle::platform { void Timer::Reset() { _start.tv_sec = 0; @@ -59,5 +58,4 @@ int64_t Timer::Tickus() { (_now.tv_usec - _start.tv_usec); } -} // namespace platform -} // namespace paddle +} // namespace paddle::platform diff --git a/paddle/fluid/prim/api/all.cc b/paddle/fluid/prim/api/all.cc index 85e1718ec982a..8ecba4f542415 100644 --- a/paddle/fluid/prim/api/all.cc +++ b/paddle/fluid/prim/api/all.cc @@ -13,7 +13,3 @@ // limitations under the License. #include "paddle/fluid/prim/api/all.h" - -namespace paddle { -namespace prim {} // namespace prim -} // namespace paddle diff --git a/paddle/fluid/prim/api/api.yaml b/paddle/fluid/prim/api/api.yaml index a951ed4431a57..61e056678d19f 100644 --- a/paddle/fluid/prim/api/api.yaml +++ b/paddle/fluid/prim/api/api.yaml @@ -38,6 +38,7 @@ - pad - sqrt - cumsum +- cumprod - put_along_axis - sin - cos diff --git a/paddle/fluid/prim/api/auto_code_generated/template/static_prim_api.cc.j2 b/paddle/fluid/prim/api/auto_code_generated/template/static_prim_api.cc.j2 index 55b65bf05163f..b1b675a78589a 100644 --- a/paddle/fluid/prim/api/auto_code_generated/template/static_prim_api.cc.j2 +++ b/paddle/fluid/prim/api/auto_code_generated/template/static_prim_api.cc.j2 @@ -1,5 +1,5 @@ {% from "utils.cc.j2" import static_prim_api %} -// Generated by /paddle/fluid/prim/api/auto_code_generated/static_gen.py. +// Generated by /paddle/fluid/prim/api/auto_code_generated/static_gen.py. // DO NOT EDIT! #include diff --git a/paddle/fluid/prim/api/auto_code_generated/template/utils.cc.j2 b/paddle/fluid/prim/api/auto_code_generated/template/utils.cc.j2 index 78a270ef37d5b..5e34af02f2857 100644 --- a/paddle/fluid/prim/api/auto_code_generated/template/utils.cc.j2 +++ b/paddle/fluid/prim/api/auto_code_generated/template/utils.cc.j2 @@ -25,7 +25,7 @@ {% endfilter %} op->CheckAttrs(); op->InferVarType(block); - op->InferShape(*block); + op->InferShape(*block); {% if outputs|length > 1 %} return std::make_tuple{{sequence('(', ')', ', ', output_names)}}; {% elif outputs|length == 1 %} @@ -56,7 +56,7 @@ template <> {%- macro static_prim_api_sig_ret(outputs) -%} {%- set names = [] -%} {%- for i in outputs -%} {%- do names.append(i.typename|to_paddle_output_type) -%} {%- endfor -%} - {%- if names|length > 1 -%} + {%- if names|length > 1 -%} std::tuple<{{sequence('', '', ', ', names)}}> {%- else -%} {{names[0]}} @@ -80,7 +80,7 @@ if ({{input.name}}) { std::transform({{input.name}}.get().begin(), {{input.name}}.get().end(), {{input.name}}_names.begin(), [](const Tensor& t) { return std::static_pointer_cast(t.impl())->Name(); }); - op->SetInput("{{input.fluid_name | to_pascal}}", {{input.name}}_names); + op->SetInput("{{input.fluid_name | to_pascal}}", {{input.name}}_names); } {%- else -%} if ({{input.name}}) { @@ -96,7 +96,7 @@ std::vector {{input.name}}_names({{input.name}}.size());; std::transform({{input.name}}.begin(), {{input.name}}.end(), {{input.name}}_names.begin(), [](const Tensor& t) { return std::static_pointer_cast(t.impl())->Name(); }); -op->SetInput("{{input.fluid_name | to_pascal}}", {{input.name}}_names); +op->SetInput("{{input.fluid_name | to_pascal}}", {{input.name}}_names); {%- else -%} op->SetInput("{{input.fluid_name | to_pascal}}", {std::static_pointer_cast({{input.name}}.impl())->Name()}); {%- endif -%} @@ -180,7 +180,7 @@ paddle::framework::TransToProtoVarType({{src_name}}) {%- set is_set = [] -%} {#- why not use boolean, ref: https://stackoverflow.com/questions/17925674/jinja2-local-global-variable -#} {%- if not is_set -%} {#- use DataType attr as default output dtype -#} {%- for attr in attrs -%} - {%- if attr.typename is datatype -%} + {%- if attr.typename is datatype -%} {{attr.name}} {%- do is_set.append(1) -%} {%- endif -%} diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 0465f73a44593..17bc345917064 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -744,13 +744,20 @@ void slice_grad(const Tensor& input, paddings.push_back(offsets[i]); paddings.push_back((in_dims[i] - out_dims[i]) - offsets[i]); } + Tensor reshape_out_grad; + if (out_grad.shape().size() == 0) { + reshape_out_grad = full({1}, 1, input.dtype()); + } else { + reshape_out_grad = out_grad; + } + if (decrease_size > 0 && (decrease_size != static_cast(in_dims.size()))) { auto out_tmp = - pad(reshape(out_grad, origin_out_shape), paddings, 0.0); + pad(reshape(reshape_out_grad, origin_out_shape), paddings, 0.0); set_output(out_tmp, input_grad); } else { - auto out_tmp = pad(out_grad, paddings, 0.0); + auto out_tmp = pad(reshape_out_grad, paddings, 0.0); set_output(out_tmp, input_grad); } } @@ -1127,11 +1134,13 @@ void prod_grad(const Tensor& x, } else { reduce_all = false; } - auto x_grad_tmp = Tensor(); - auto out_tmp = Tensor(); + auto out_grad_tmp = Tensor(); + auto x_reshape = Tensor(); + std::vector unchange_axis, change_axis, transpose_shape, + cumprod_shape; + std::vector transpose_dim, origin_position; if (x_dim_size == 1) { - x_grad_tmp = out_grad.expand(IntArray(x_dim)); - out_tmp = out.expand(IntArray(x_dim)); + out_grad_tmp = out_grad.expand(IntArray(x_dim)); } else { if (!keep_dim) { auto axis_ = std::vector(); @@ -1149,16 +1158,69 @@ void prod_grad(const Tensor& x, } auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); auto out_grad_ = reshape(out_grad, out_grad_shape); - x_grad_tmp = out_grad_.expand(IntArray(x_dim)); - auto out_ = reshape(out, out_grad_shape); - out_tmp = out_.expand(IntArray(x_dim)); + out_grad_tmp = out_grad_.expand(IntArray(x_dim)); } else { - x_grad_tmp = out_grad.expand(IntArray(x_dim)); - out_tmp = out.expand(IntArray(x_dim)); + out_grad_tmp = out_grad.expand(IntArray(x_dim)); } } - auto x_grad_res = x_grad_tmp * out_tmp * (1 / x); - set_output(x_grad_res, x_grad); + auto axis_ = std::vector(); + if (reduce_all) { + int64_t numel = 1; + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + numel *= x_dim[i]; + } + cumprod_shape.push_back(numel); + x_reshape = reshape(x, cumprod_shape); + auto left_cumprod = cumprod(x_reshape, -1, true, false); + auto right_cumprod = cumprod(x_reshape, -1, true, true); + auto x_grad_tmp = left_cumprod * right_cumprod; + auto x_grad_tmp2 = reshape(x_grad_tmp, x.shape()); + auto x_grad_res = x_grad_tmp2 * out_grad_tmp; + set_output(x_grad_res, x_grad); + } else { + int64_t unchange_size = x_dim_size - axis_size; + int64_t unchange_index = 0; + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_.push_back(axis[i] + x_dim_size); + } else { + axis_.push_back(axis[i]); + } + } + for (int64_t i = 0; i < x_dim_size; i++) { + auto it = find(axis_.begin(), axis_.end(), i); + if (it != axis_.end()) { + int64_t index = it - axis_.begin(); + origin_position.push_back(static_cast(unchange_size + index)); + } else { + unchange_axis.push_back(i); + origin_position.push_back(static_cast(unchange_index)); + unchange_index += 1; + } + } + int64_t numel = 1; + for (int64_t i = 0; i < unchange_size; i++) { + transpose_shape.push_back(x_dim[unchange_axis[i]]); + cumprod_shape.push_back(x_dim[unchange_axis[i]]); + transpose_dim.push_back(static_cast(unchange_axis[i])); + } + for (int64_t i = 0; i < axis_size; i++) { + transpose_shape.push_back(x_dim[axis_[i]]); + transpose_dim.push_back(static_cast(axis_[i])); + numel *= x_dim[axis_[i]]; + } + cumprod_shape.push_back(numel); + auto x_transpose = transpose(x, transpose_dim); + x_reshape = reshape(x_transpose, cumprod_shape); + auto left_cumprod = cumprod(x_reshape, -1, true, false); + auto right_cumprod = cumprod(x_reshape, -1, true, true); + auto x_grad_tmp = left_cumprod * right_cumprod; + auto x_grad_reshape = reshape(x_grad_tmp, transpose_shape); + auto x_grad_tmp2 = transpose(x_grad_reshape, origin_position); + auto x_grad_res = x_grad_tmp2 * out_grad_tmp; + set_output(x_grad_res, x_grad); + } } } diff --git a/paddle/fluid/prim/api/manual_prim/eager_prim_api.cc b/paddle/fluid/prim/api/manual_prim/eager_prim_api.cc index d667f0fabd71e..ab317a702a85d 100644 --- a/paddle/fluid/prim/api/manual_prim/eager_prim_api.cc +++ b/paddle/fluid/prim/api/manual_prim/eager_prim_api.cc @@ -16,8 +16,7 @@ #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/prim/api/manual_prim/prim_manual_api.h" -namespace paddle { -namespace prim { +namespace paddle::prim { template <> Tensor full(const IntArray& shape, @@ -44,5 +43,4 @@ Tensor slice(const Tensor& input, return ::slice_ad_func(input, axes, starts, ends, infer_flags, decrease_axis); } -} // namespace prim -} // namespace paddle +} // namespace paddle::prim diff --git a/paddle/fluid/prim/api/manual_prim/static_prim_api.cc b/paddle/fluid/prim/api/manual_prim/static_prim_api.cc index c45a473b4a8d3..f362440623f5e 100644 --- a/paddle/fluid/prim/api/manual_prim/static_prim_api.cc +++ b/paddle/fluid/prim/api/manual_prim/static_prim_api.cc @@ -34,8 +34,7 @@ #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace prim { +namespace paddle::prim { template <> Tensor full(const IntArray& shape, @@ -152,5 +151,4 @@ Tensor slice(const Tensor& input, return out; } -} // namespace prim -} // namespace paddle +} // namespace paddle::prim diff --git a/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc b/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc index 2f76e8bbd966f..43ab21ccd3e06 100644 --- a/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc +++ b/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc @@ -23,8 +23,7 @@ #include "paddle/fluid/prim/utils/static/static_global_utils.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/core/utils/data_type.h" -namespace paddle { -namespace prim { +namespace paddle::prim { using Tensor = paddle::Tensor; template <> TEST_API Tensor empty(const paddle::experimental::IntArray& shape, @@ -69,5 +68,4 @@ void by_pass(const paddle::Tensor& x, paddle::Tensor* real_out) { set_output(out, real_out); } -} // namespace prim -} // namespace paddle +} // namespace paddle::prim diff --git a/paddle/fluid/prim/utils/static/static_global_utils.cc b/paddle/fluid/prim/utils/static/static_global_utils.cc index 3d1aa2158048d..71179429dc997 100644 --- a/paddle/fluid/prim/utils/static/static_global_utils.cc +++ b/paddle/fluid/prim/utils/static/static_global_utils.cc @@ -14,12 +14,10 @@ #include "paddle/fluid/prim/utils/static/static_global_utils.h" -namespace paddle { -namespace prim { +namespace paddle::prim { StaticCompositeContext* StaticCompositeContext::static_composite_context_ = new StaticCompositeContext(); thread_local bool StaticCompositeContext::enable_bwd_prim_ = false; thread_local bool StaticCompositeContext::enable_fwd_prim_ = false; thread_local bool StaticCompositeContext::enable_eager_prim_ = false; -} // namespace prim -} // namespace paddle +} // namespace paddle::prim diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index 06df447a600b8..22971d21eec40 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -41,7 +41,8 @@ std::unordered_set decomp_op_contain_none = {"pd_op.squeeze", "pd_op.unsqueeze", "pd_op.flatten", "pd_op.batch_norm", - "pd_op.batch_norm_"}; + "pd_op.batch_norm_", + "pd_op.dropout"}; // std::unordered_set dynamic_shape_blacklist = { "pd_op.squeeze", diff --git a/paddle/fluid/primitive/codegen/templates/backend/generated/generated_eager_backend.cc.j2 b/paddle/fluid/primitive/codegen/templates/backend/generated/generated_eager_backend.cc.j2 index 7f9f4b5b8676f..b8910ff5b9d9a 100644 --- a/paddle/fluid/primitive/codegen/templates/backend/generated/generated_eager_backend.cc.j2 +++ b/paddle/fluid/primitive/codegen/templates/backend/generated/generated_eager_backend.cc.j2 @@ -12,7 +12,7 @@ namespace backend { {%- macro args(inputs, attrs) -%} {#- Arguments are variable pass into method -#} {{common.sequence('', '', ', ', inputs)}} - {%- if attrs|length > 0 -%} {{", "}} {%- endif -%} {#- append comma between + {%- if attrs|length > 0 -%} {{", "}} {%- endif -%} {#- append comma between nputs and attrs -#} {{common.sequence('', '', ', ', attrs)}} {%- endmacro -%} @@ -37,7 +37,7 @@ return ::{{name}}_ad_func({{common.args(input_names, attr_names)}}); {% for api in apis %} {%- if api.is_prim and api.name not in backend_black_list and api.name[-1] != '_' -%} {{sig(api.name, api.inputs, api.attrs, api.outputs | trip_intermediate)}} { -{{body(api.name, api.inputs, api.attrs, api.outputs | trip_intermediate)}} +{{body(api.name, api.inputs, api.attrs, api.outputs | trip_intermediate)}} } {% endif %} diff --git a/paddle/fluid/primitive/codegen/templates/backend/generated/generated_static_backend.cc.j2 b/paddle/fluid/primitive/codegen/templates/backend/generated/generated_static_backend.cc.j2 index 26f81d756f0b5..8e4921acbb013 100644 --- a/paddle/fluid/primitive/codegen/templates/backend/generated/generated_static_backend.cc.j2 +++ b/paddle/fluid/primitive/codegen/templates/backend/generated/generated_static_backend.cc.j2 @@ -117,20 +117,20 @@ pir::Value {{attr.name}}_res = std::static_pointer_cast({{attr.name~ {% endif %} {% endfor %} {%- set input_names = [] -%} - {%- for i in inputs -%} - {%- do input_names.append(i.name~'_res') -%} + {%- for i in inputs -%} + {%- do input_names.append(i.name~'_res') -%} {%- endfor -%} {%- if mutable_attribute_as_inputs -%} - {%- for i in attrs -%} + {%- for i in attrs -%} {%- if i is mutable_attribute -%} - {%- do input_names.append(i.name~'_res') -%} + {%- do input_names.append(i.name~'_res') -%} {%- endif -%} {%- endfor -%} {%- endif -%} {%- set attr_names = [] -%} - {%- for i in attrs -%} + {%- for i in attrs -%} {%- if not mutable_attribute_as_inputs or mutable_attribute_as_inputs and i is not mutable_attribute -%}{#- do nothing -#} - {%- do attr_names.append(common.phi2ir_attr(i)) -%} + {%- do attr_names.append(common.phi2ir_attr(i)) -%} {%- endif -%} {% endfor %} auto op_res = paddle::dialect::{{name}}({{common.args(input_names, attr_names)}}); @@ -145,14 +145,14 @@ auto op_res = paddle::dialect::{{name}}({{common.args(input_names, attr_names)}} {% set api_outputs = api.outputs | trip_intermediate %} {{sig(api.name, api.inputs, api_outputs, api.attrs)}} { {% filter indent(2, True) %} -{{body(api.name, api.inputs, api_outputs, api.attrs)}} +{{body(api.name, api.inputs, api_outputs, api.attrs)}} {% endfilter %} } {% if api.attrs is exist_mutable_attribute %} {{sig(api.name, api.inputs, api_outputs, api.attrs, True)}} { {% filter indent(2, True) %} -{{body(api.name, api.inputs, api_outputs, api.attrs, True)}} +{{body(api.name, api.inputs, api_outputs, api.attrs, True)}} {% endfilter %} } diff --git a/paddle/fluid/primitive/codegen/templates/common.j2 b/paddle/fluid/primitive/codegen/templates/common.j2 index b29401133db03..ecf5e54cae33b 100644 --- a/paddle/fluid/primitive/codegen/templates/common.j2 +++ b/paddle/fluid/primitive/codegen/templates/common.j2 @@ -8,12 +8,12 @@ template {%- set input_params = [] -%} {%- for i in inputs -%} {%- do input_params.append(i.typename|to_paddle_input_type(i.optional)~' '~i.name) -%} {%- endfor -%} {%- set attr_params = [] -%} - {%- for i in attrs -%} + {%- for i in attrs -%} {%- if not mutable_attribute_as_inputs or i is not mutable_attribute -%} {%- if default -%} - {%- do attr_params.append(i.typename|to_paddle_attr_type~' '~i.name~default_value(i)) -%} + {%- do attr_params.append(i.typename|to_paddle_attr_type~' '~i.name~default_value(i)) -%} {%- else -%} - {%- do attr_params.append(i.typename|to_paddle_attr_type~' '~i.name) -%} + {%- do attr_params.append(i.typename|to_paddle_attr_type~' '~i.name) -%} {%- endif -%} {%- else -%} {%- do input_params.append('const Tensor&'~' '~i.name~'_') -%} @@ -43,7 +43,7 @@ template {%- macro ret(outputs) -%} {%- set names = [] -%} {%- for i in outputs -%} {%- do names.append(i.typename|to_paddle_output_type(i.optional)) -%} {%- endfor -%} - {%- if names|length > 1 -%} + {%- if names|length > 1 -%} std::tuple<{{sequence('', '', ', ', names)}}> {%- else -%} {{names[0]}} diff --git a/paddle/fluid/primitive/codegen/templates/decomp/generated_decomp_vjp.j2 b/paddle/fluid/primitive/codegen/templates/decomp/generated_decomp_vjp.j2 index 460b8e3a2fcdc..592b45b84aa72 100644 --- a/paddle/fluid/primitive/codegen/templates/decomp/generated_decomp_vjp.j2 +++ b/paddle/fluid/primitive/codegen/templates/decomp/generated_decomp_vjp.j2 @@ -139,13 +139,13 @@ std::vector> {{class_name}}::DecompVjp(pir::Operation* o auto stop_gradients_attr = op->attribute(kAttrStopGradients) .dyn_cast() .AsVector(); - {% for k in range(outputs|length) %} + {% for k in range(outputs|length) %} stop_gradients[{{k}}].push_back( stop_gradients_attr[{{k}}].dyn_cast().data()); - {% endfor %} + {% endfor %} VLOG(4) << " stop_gradients is set "; } else { - {% for k in range(outputs|length) %} + {% for k in range(outputs|length) %} stop_gradients[{{k}}].push_back(false); {% endfor %} VLOG(4) << " stop_gradients is not set "; @@ -160,7 +160,7 @@ std::vector> {{class_name}}::DecompVjp(pir::Operation* o VLOG(4) << "Call Pir Decomposed backward op {{fwd_name}}"; - {% for k in range(outputs|length) %} + {% for k in range(outputs|length) %} paddle::Tensor* {{outputs[k].name}} = !stop_gradients[{{k}}][0] ? &tensor_res[{{k}}][0] : nullptr; {% endfor %} diff --git a/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 b/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 index 105175758f22d..31ec42aacd7a9 100644 --- a/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 +++ b/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 @@ -79,11 +79,11 @@ auto {{i.name}} = phi::IntArray(paddle::dialect::GetInt64Vector({{i.name}}_defin {%- for api in apis -%} {%- do api_map.update({api.name: api}) -%} {%- endfor -%} {%- for i in api.inputs -%} {%- do input_names.append(i.name) -%} {%- endfor -%} {%- set attr_names=[] -%} - {%- for i in api.attrs -%} + {%- for i in api.attrs -%} {%- if i is mutable_attribute -%} - {%- do input_names.append(i.name~'_') -%} + {%- do input_names.append(i.name~'_') -%} {%- else -%} - {%- do attr_names.append(i.name) -%} + {%- do attr_names.append(i.name) -%} {%- endif -%} {%- endfor %} {% if 'invoke' in api and api.invoke.func in api_map %} @@ -116,7 +116,7 @@ FLAGS_tensor_operants_mode = "static"; VLOG(4) << "Call Pir Decomposed backward op {{api.name}}"; {% for i in range(api.outputs|length) %} {% if api.outputs[i].typename=='Tensor' %} -paddle::Tensor* {{api.outputs[i].name}} = !stop_gradients[{{i}}][0] ? &vjp_res[{{i}}][0] : nullptr; +paddle::Tensor* {{api.outputs[i].name}} = !stop_gradients[{{i}}][0] ? &vjp_res[{{i}}][0] : nullptr; {% else %} std::vector {{api.outputs[i].name}}(stop_gradients[{{i}}].size(), nullptr); for (size_t i=0; i< stop_gradients[{{i}}].size(); i++ ) { diff --git a/paddle/fluid/primitive/composite/composite.h b/paddle/fluid/primitive/composite/composite.h index 091d540aa461a..eca7bfb3a616f 100644 --- a/paddle/fluid/primitive/composite/composite.h +++ b/paddle/fluid/primitive/composite/composite.h @@ -23,9 +23,6 @@ namespace paddle { namespace primitive { namespace details { -// empty_shape means x.shape=[] -static std::vector empty_shape; - template static Tensor get_slice(const Tensor& x, int64_t idx) { return slice(x, {0}, {idx}, {idx + 1}, {1}, {}); @@ -98,7 +95,7 @@ Tensor mean_decomp(const Tensor& x, const IntArray& axis, bool keepdim) { for (size_t i = 0; i < axis_.size(); i++) { value_ *= x_dim[axis_[i]]; } - value = full(empty_shape, value_, sum_x.dtype()); + value = full_scalar(value_, sum_x.dtype()); } Tensor res = sum_x / value; @@ -148,7 +145,7 @@ Tensor p_norm_decomp(const Tensor& x, Tensor res; if (porder == 0.0) { // 0-norm - auto zero = full(empty_shape, 0, x_tmp.dtype()); + auto zero = full_scalar(0, x_tmp.dtype()); auto none_zero = not_equal(x_tmp, zero); res = cast(none_zero, x_tmp.dtype()); res = sum(res, {axis}, x_tmp.dtype(), keepdim); @@ -169,8 +166,8 @@ Tensor p_norm_decomp(const Tensor& x, res = min(x_tmp, {axis}, keepdim); } else { // vanilla p-norm - auto porder_tensor = full(empty_shape, porder, x_tmp.dtype()); - auto inv_porder_tensor = full(empty_shape, 1 / porder, x_tmp.dtype()); + auto porder_tensor = full_scalar(porder, x_tmp.dtype()); + auto inv_porder_tensor = full_scalar(1 / porder, x_tmp.dtype()); res = elementwise_pow(x_tmp, porder_tensor); res = sum(res, {axis}, x_tmp.dtype(), keepdim); res = elementwise_pow(res, inv_porder_tensor); @@ -194,8 +191,7 @@ Tensor pow_decomp(const Tensor& x, const paddle::Scalar& y) { } check_valid_type(y.dtype()); - Tensor y_full = full(empty_shape, y, x_cast.dtype()); - + Tensor y_full = full_scalar(y, x_cast.dtype()); auto ans = elementwise_pow(x_cast, y_full); if (need_cast) { return cast(ans, org_dtype); @@ -229,7 +225,12 @@ Tensor one_hot_decomp(const Tensor& x, const Tensor& num_classes) { backend::full_with_tensor(num_classes, 0, x.dtype()); std::vector input_dim; - input_dim.push_back(x.shape()[0]); + int x_dims = 1; + for (size_t i = 0; i < x.shape().size(); i++) { + x_dims *= x.shape()[i]; + } + + input_dim.push_back(x_dims); input_dim.push_back(num_classes_tensor.shape()[0]); auto input_tensor = full(input_dim, 0, x.dtype()); @@ -239,13 +240,13 @@ Tensor one_hot_decomp(const Tensor& x, const Tensor& num_classes) { } output_dim.push_back(num_classes_tensor.shape()[0]); - auto end = full({1}, x.shape()[0], x.dtype()); + auto end = full({1}, x_dims, x.dtype()); auto start = full({1}, 0, x.dtype()); auto step = full({1}, 1, x.dtype()); auto arange_tensor = backend::arange_with_tensor(start, end, step, x.dtype()); - std::vector reshape_dim{x.shape()[0], 1}; + std::vector reshape_dim{x_dims, 1}; auto x_reshape = reshape(x, reshape_dim); auto arange_tensor_reshape = reshape(arange_tensor, reshape_dim); @@ -254,7 +255,7 @@ Tensor one_hot_decomp(const Tensor& x, const Tensor& num_classes) { index_concat.push_back(x_reshape); auto index_tensor = concat(index_concat, 1); - auto update_tensor = full({x.shape()[0]}, 1, x.dtype()); + auto update_tensor = full({x_dims}, 1, x.dtype()); auto ans = reshape( cast(scatter_nd_add(input_tensor, index_tensor, update_tensor), @@ -282,13 +283,13 @@ Tensor squared_l2_norm_decomp(const Tensor& x) { template Tensor reciprocal_decomp(const Tensor& x) { - return full(empty_shape, 1.0, x.dtype()) / x; + return full_scalar(1.0, x.dtype()) / x; } template Tensor bce_loss_decomp(const Tensor& x, const Tensor& label) { - auto one = full(empty_shape, 1, x.dtype()); - auto ans = full(empty_shape, -1, x.dtype()) * + auto one = full_scalar(1, x.dtype()); + auto ans = full_scalar(-1, x.dtype()) * (label * log(x) + (one - label) * log(one - x)); return ans; } @@ -382,7 +383,7 @@ std::tuple batch_norm_decomp( } } - Tensor half = full(empty_shape, -0.5, x_cast.dtype()); + Tensor half = full_scalar(-0.5, x_cast.dtype()); bool use_run_stat = (is_test && (!trainable_statistics)) || use_global_stats; Tensor x_hat; @@ -421,9 +422,8 @@ std::tuple batch_norm_decomp( run_var_ = assign(run_var); } Tensor y; - Tensor new_scale = - scale ? scale.get() : full(empty_shape, 1, x_cast.dtype()); - Tensor new_bias = bias ? bias.get() : full(empty_shape, 0, x_cast.dtype()); + Tensor new_scale = scale ? scale.get() : full_scalar(1, x_cast.dtype()); + Tensor new_bias = bias ? bias.get() : full_scalar(0, x_cast.dtype()); if (data_layout_ == DataLayout::kNHWC) { y = x_hat * new_scale + new_bias; } else { @@ -441,8 +441,10 @@ std::tuple batch_norm_decomp( return std::make_tuple( y, run_mean_, run_var_, batch_mean_, inv_std_, reserve_space); } else { + Tensor batch_mean_none; + Tensor inv_std_none; return std::make_tuple( - y, run_mean_, run_var_, batch_mean_, inv_std_, reserve_space); + y, run_mean_, run_var_, batch_mean_none, inv_std_none, reserve_space); } } @@ -539,13 +541,13 @@ Tensor swiglu_decomp(const Tensor& x, const paddle::optional& y) { template Tensor relu_decomp(const Tensor& x) { - return maximum(x, full(empty_shape, 0.0, x.dtype())); + return maximum(x, full_scalar(0.0, x.dtype())); } template Tensor relu6_decomp(const Tensor& x) { - auto tmp = maximum(x, full(empty_shape, 0.0, x.dtype())); - auto res = minimum(tmp, full(empty_shape, 6.0, x.dtype())); + auto tmp = maximum(x, full_scalar(0.0, x.dtype())); + auto res = minimum(tmp, full_scalar(6.0, x.dtype())); return res; } @@ -653,7 +655,7 @@ std::tuple layer_norm_decomp( auto difference = x_cast - mean_; auto var_tmp1 = difference * difference; auto variance = mean_decomp(var_tmp1, axis, true); - auto var_tmp3 = variance + full(empty_shape, epsilon, variance.dtype()); + auto var_tmp3 = variance + full_scalar(epsilon, variance.dtype()); auto rsqrt_var = rsqrt(var_tmp3); auto out = difference * rsqrt_var; @@ -798,18 +800,18 @@ std::tuple dropout_decomp( Tensor uniform_tensor; if (has_dynamic_shape(x.shape())) { auto shape_tensor = shape(x); - auto zero = full(empty_shape, 0.0, dtype_tmp); - auto one = full(empty_shape, 1.0, dtype_tmp); + auto zero = full_scalar(0.0, dtype_tmp); + auto one = full_scalar(1.0, dtype_tmp); uniform_tensor = backend::uniform(shape_tensor, zero, one, dtype_tmp, seed_tmp); } else { uniform_tensor = uniform(phi::vectorize(x.dims()), dtype_tmp, 0.0, 1.0, seed_tmp); } - auto mask = cast( - greater_equal(uniform_tensor, full(empty_shape, p, dtype_tmp)), - org_dtype); - auto ones_p = full(empty_shape, 1.0 - p.to(), org_dtype); + auto mask = + cast(greater_equal(uniform_tensor, full_scalar(p, dtype_tmp)), + org_dtype); + auto ones_p = full_scalar(1.0 - p.to(), org_dtype); if (upscale_in_train) { if (is_test) { // inference: out = input @@ -818,7 +820,7 @@ std::tuple dropout_decomp( // train: out = input * mask / ( 1.0 - p ) if (p.to() == 1.0) { // Process p=1. for avoid divide zero error (x*mask/(1.0-p)) - auto zero = full(empty_shape, 0.0, org_dtype); + auto zero = full_scalar(0.0, org_dtype); return std::make_tuple(x * zero, cast(zero, DataType::UINT8)); } else { auto ans = (x * mask) / ones_p; @@ -842,20 +844,20 @@ Tensor gelu_decomp(const Tensor& x, bool approximate) { const double PM_SQRT1_2 = 0.70710678118654752440; /* 1/sqrt(2) */ auto org_dtype = x.dtype(); - auto half = full(empty_shape, 0.5, org_dtype); - auto one = full(empty_shape, 1.0, org_dtype); + auto half = full_scalar(0.5, org_dtype); + auto one = full_scalar(1.0, org_dtype); if (approximate) { // gelu(x) = 0.5 * x * (1 + tanh(sqrt(2 / \pi) * (x + 0.044715 * x^{3}))) - auto kAlpha = full(empty_shape, PM_2_SQRTPI * PM_SQRT1_2, org_dtype); - auto GELU_CONSTANT = full(empty_shape, 0.044715, org_dtype); - auto x_pow3 = elementwise_pow(x, full(empty_shape, 3, org_dtype)); + auto kAlpha = full_scalar(PM_2_SQRTPI * PM_SQRT1_2, org_dtype); + auto GELU_CONSTANT = full_scalar(0.044715, org_dtype); + auto x_pow3 = elementwise_pow(x, full_scalar(3, org_dtype)); auto tanh_out = tanh(kAlpha * (x + x_pow3 * GELU_CONSTANT)); auto res = x * half * (one + tanh_out); return res; } else { // gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2))) - auto M_SQRT1_2T = full(empty_shape, PM_SQRT1_2, org_dtype); + auto M_SQRT1_2T = full_scalar(PM_SQRT1_2, org_dtype); auto erf_out = one + erf(x * M_SQRT1_2T); auto res = x * half * erf_out; @@ -867,10 +869,10 @@ template Tensor hardsigmoid_decomp(const Tensor& x, float slope, float offset) { const double MAX_VALUE = 1.0; const double MIN_VALUE = 0.0; - return maximum(minimum(x * full(empty_shape, slope, x.dtype()) + - full(empty_shape, offset, x.dtype()), - full(empty_shape, MAX_VALUE, x.dtype())), - full(empty_shape, MIN_VALUE, x.dtype())); + return maximum(minimum(x * full_scalar(slope, x.dtype()) + + full_scalar(offset, x.dtype()), + full_scalar(MAX_VALUE, x.dtype())), + full_scalar(MIN_VALUE, x.dtype())); } template @@ -881,15 +883,15 @@ Tensor hardswish_decomp(const Tensor& x) { // out = minimum(maximum(x + offset, 0), threshold) * x / scale auto minimum_out = - minimum(maximum(x + full(empty_shape, OFFSET, x.dtype()), - full(empty_shape, 0.0, x.dtype())), - full(empty_shape, THRESHOLD, x.dtype())); - return (minimum_out * x) / full(empty_shape, SCALE, x.dtype()); + minimum(maximum(x + full_scalar(OFFSET, x.dtype()), + full_scalar(0.0, x.dtype())), + full_scalar(THRESHOLD, x.dtype())); + return (minimum_out * x) / full_scalar(SCALE, x.dtype()); } template Tensor leaky_relu_decomp(const Tensor& x, float negative_slope) { - auto multiply_tmp = full(empty_shape, negative_slope, x.dtype()) * x; + auto multiply_tmp = full_scalar(negative_slope, x.dtype()) * x; if (negative_slope < 1.0) { return maximum(x, multiply_tmp); } else { @@ -1127,8 +1129,7 @@ std::tuple group_norm_decomp( var_ = maximum( var_tmp_, backend::full_with_tensor(shape(var_tmp_), 0, var_tmp_.dtype())); - Tensor var_inv = - rsqrt(var_ + full(empty_shape, epsilon, var_.dtype())); + Tensor var_inv = rsqrt(var_ + full_scalar(epsilon, var_.dtype())); Tensor res = (x_cast - mean_) * var_inv; out = backend::reshape(res, x_dim_t); } else { @@ -1143,7 +1144,7 @@ std::tuple group_norm_decomp( auto var_tmp_ = mean_decomp(x_cast * x_cast, c_axis, true) - mean_ * mean_; var_ = maximum(var_tmp_, full(var_tmp_.shape(), 0, var_tmp_.dtype())); - auto var_inv = rsqrt(var_ + full(empty_shape, epsilon, var_.dtype())); + auto var_inv = rsqrt(var_ + full_scalar(epsilon, var_.dtype())); auto res = (x_cast - mean_) * var_inv; out = reshape(res, x_dim); } @@ -1207,7 +1208,7 @@ Tensor square_decomp(const Tensor& x) { } Tensor two; - two = full(empty_shape, 2, x_cast.dtype()); + two = full_scalar(2, x_cast.dtype()); auto ans = elementwise_pow(x_cast, two); if (need_cast) { @@ -1224,9 +1225,8 @@ Tensor sigmoid_cross_entropy_with_logits_decomp( const paddle::optional& pos_weight, bool normalize, int ignore_index) { - auto dims = x.shape(); - const Tensor zero = full(dims, 0, x.type()); - const Tensor one = full(dims, 1, x.type()); + const Tensor zero = full_like_decomp(x, 0, x.type(), x.place()); + const Tensor one = full_like_decomp(x, 1, x.type(), x.place()); Tensor pos_weight_tensor; if (pos_weight) { pos_weight_tensor = pos_weight.get(); @@ -1235,19 +1235,20 @@ Tensor sigmoid_cross_entropy_with_logits_decomp( } auto term1 = where(x > zero, x, zero); auto term2 = x * label; - auto term3 = log(1 + exp(-abs(x))); + auto term3 = log(one + exp(-abs(x))); const Tensor tmp_out = term1 - term2 + term3 * pos_weight_tensor; - const Tensor ignore_index_tensor = full(dims, ignore_index, label.type()); + const Tensor ignore_index_tensor = + full_like_decomp(x, ignore_index, label.type(), label.place()); auto out = where(label == ignore_index_tensor, zero, tmp_out); if (normalize) { // Follow the implementation in // paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_kernel.cc - const Tensor eps1 = full(dims, 1e-6, x.type()); + const Tensor eps1 = full_like_decomp(x, 1e-6, x.type(), x.place()); auto diff = label - ignore_index_tensor; const Tensor tmp_norm = sum(where(abs(diff) > eps1, one, zero)); // Follow the implementation in // paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_kernel.cc - const Tensor eps2 = full(empty_shape, 1e-5, x.type()); + const Tensor eps2 = full_scalar(1e-5, x.type()); auto norm = where(tmp_norm > eps2, tmp_norm, eps2); out = out / norm; } @@ -1387,8 +1388,8 @@ Tensor elu_decomp(const Tensor& x, const float alpha) { if (has_dynamic_shape(x_cast.shape())) { zero = backend::full_with_tensor(shape(x_cast), 0, x_cast.dtype()); - tmp_res = full(empty_shape, alpha, x_cast.dtype()) * - (exp(x_cast) - full(empty_shape, 1, x_cast.dtype())); + tmp_res = full_scalar(alpha, x_cast.dtype()) * + (exp(x_cast) - full_scalar(1, x_cast.dtype())); } else { zero = full(x_cast.shape(), 0, x_cast.type()); tmp_res = alpha * (exp(x_cast) - 1); diff --git a/paddle/fluid/primitive/manual/manual_primitive.h b/paddle/fluid/primitive/manual/manual_primitive.h index f2ec3ebce45b3..6587adf862a6e 100644 --- a/paddle/fluid/primitive/manual/manual_primitive.h +++ b/paddle/fluid/primitive/manual/manual_primitive.h @@ -30,6 +30,15 @@ Tensor full(const IntArray& shape, return backend::full(shape, value, dtype, place); } +template +Tensor full_scalar(const Scalar& value, + DataType dtype = DataType::FLOAT32, + Place place = Place()) { + // empty_shape means x.shape=[] + std::vector empty_shape; + return backend::full(empty_shape, value, dtype, place); +} + template Tensor assign_out_(const Tensor& x, const Tensor& output) { return backend::assign_out_(x, output); diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h index 00e464859e29e..551a67fc22a1b 100644 --- a/paddle/fluid/primitive/rule/vjp/details.h +++ b/paddle/fluid/primitive/rule/vjp/details.h @@ -154,41 +154,83 @@ void sum_grad(const Tensor& x, if (!x_grad) { return; } - std::vector x_dim = common::vectorize(x.dims()); + int64_t axis_size = axis.size(); - int64_t x_dim_size = x_dim.size(); + int64_t x_dim_size = x.dims().size(); + auto x_grad_tmp = Tensor(); reduce_all = false; if (reduce_all || axis_size == 0 || axis_size == x_dim_size) { reduce_all = true; } else { reduce_all = false; } - auto x_grad_tmp = Tensor(); - if (x_dim_size == 1) { - x_grad_tmp = expand(out_grad, IntArray(x_dim)); - } else { - if (!keepdim) { - auto axis_ = std::vector(); - if (reduce_all) { - for (int64_t i = 0; i < x_dim_size; i++) { - axis_.push_back(i); + if (has_dynamic_shape(x.shape())) { + Tensor x_shape = shape(x); + if (x_dim_size == 1) { + x_grad_tmp = backend::expand(out_grad, x_shape); + } else { + if (!keepdim) { + auto axis_ = std::vector(); + if (reduce_all) { + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + } + } else { + axis_ = axis.GetData(); + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_[i] = axis[i] + x_dim_size; + } + } } - } else { - axis_ = axis.GetData(); - for (int64_t i = 0; i < axis_size; i++) { - if (axis[i] < 0) { - axis_[i] = axis[i] + x_dim_size; + Tensor out_grad_shape = shape(out_grad); + size_t total_shape_size = out_grad.shape().size() + axis_.size(); + std::vector result_shape; + size_t j = 0, k = 0; + Tensor ones = full({1}, 1, x_shape.dtype()); + for (size_t i = 0; i < total_shape_size; i++) { + if (j < axis_.size() && axis_[j] == int64_t(i)) { + result_shape.push_back(ones); + j++; + } else { + result_shape.push_back(slice( + out_grad_shape, {0}, {int64_t(k)}, {int64_t(k) + 1}, {1}, {})); + k++; } } + auto out_grad_ = backend::reshape(out_grad, concat(result_shape)); + x_grad_tmp = backend::expand(out_grad_, x_shape); + } else { + x_grad_tmp = backend::expand(out_grad, x_shape); } - auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); - auto out_grad_ = reshape(out_grad, out_grad_shape); - x_grad_tmp = expand(out_grad_, IntArray(x_dim)); - } else { + } + } else { + std::vector x_dim = common::vectorize(x.dims()); + if (x_dim_size == 1) { x_grad_tmp = expand(out_grad, IntArray(x_dim)); + } else { + if (!keepdim) { + auto axis_ = std::vector(); + if (reduce_all) { + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + } + } else { + axis_ = axis.GetData(); + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_[i] = axis[i] + x_dim_size; + } + } + } + auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); + auto out_grad_ = reshape(out_grad, out_grad_shape); + x_grad_tmp = expand(out_grad_, IntArray(x_dim)); + } else { + x_grad_tmp = expand(out_grad, IntArray(x_dim)); + } } } - set_output(x_grad_tmp, x_grad); } @@ -899,7 +941,8 @@ template void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { // This calculation is important for resnet. - auto x_grad_tmp = (0.5 / out) * out_grad; + auto factor = full_scalar(0.5, out.dtype()); + auto x_grad_tmp = (factor / out) * out_grad; set_output(x_grad_tmp, x_grad); } } @@ -908,7 +951,8 @@ template void rsqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { // This calculation is important for resnet. - auto x_grad_tmp = -0.5 * out * out * out * out_grad; + auto factor = full_scalar(-0.5, out.dtype()); + auto x_grad_tmp = factor * out * out * out * out_grad; set_output(x_grad_tmp, x_grad); } } @@ -929,7 +973,8 @@ void silu_grad(const Tensor& x, auto res = out_grad_cast * sigmoid(x_cast) * (1.0 + x_cast - out_cast); set_output(cast(res, org_dtype), x_grad); } else { - auto res = out_grad * sigmoid(x) * (1.0 + x - out); + auto one = full_scalar(1.0, x.dtype()); + auto res = out_grad * sigmoid(x) * (one + x - out); set_output(res, x_grad); } } @@ -1483,13 +1528,20 @@ void slice_grad(const Tensor& input, paddings.push_back(offsets[i]); paddings.push_back((in_dims[i] - out_dims[i]) - offsets[i]); } + Tensor reshape_out_grad; + if (out_grad.shape().size() == 0) { + reshape_out_grad = full({1}, 1, input.dtype()); + } else { + reshape_out_grad = out_grad; + } + if (decrease_size > 0 && (decrease_size != static_cast(in_dims.size()))) { auto out_tmp = - pad(reshape(out_grad, origin_out_shape), paddings, 0.0); + pad(reshape(reshape_out_grad, origin_out_shape), paddings, 0.0); set_output(out_tmp, input_grad); } else { - auto out_tmp = pad(out_grad, paddings, 0.0); + auto out_tmp = pad(reshape_out_grad, paddings, 0.0); set_output(out_tmp, input_grad); } } @@ -1548,7 +1600,8 @@ void leaky_relu_grad(const Tensor& out, template void sigmoid_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - set_output(out_grad * (out * (1 - out)), x_grad); + auto one_tensor = full_scalar(1.0, out.dtype()); + set_output(out_grad * (out * (one_tensor - out)), x_grad); } } @@ -1772,11 +1825,13 @@ void prod_grad(const Tensor& x, } else { reduce_all = false; } - auto x_grad_tmp = Tensor(); - auto out_tmp = Tensor(); + auto out_grad_tmp = Tensor(); + auto x_reshape = Tensor(); + std::vector unchange_axis, change_axis, transpose_shape, + cumprod_shape; + std::vector transpose_dim, origin_position; if (x_dim_size == 1) { - x_grad_tmp = out_grad.expand(IntArray(x_dim)); - out_tmp = out.expand(IntArray(x_dim)); + out_grad_tmp = out_grad.expand(IntArray(x_dim)); } else { if (!keep_dim) { auto axis_ = std::vector(); @@ -1794,16 +1849,69 @@ void prod_grad(const Tensor& x, } auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); auto out_grad_ = reshape(out_grad, out_grad_shape); - x_grad_tmp = out_grad_.expand(IntArray(x_dim)); - auto out_ = reshape(out, out_grad_shape); - out_tmp = out_.expand(IntArray(x_dim)); + out_grad_tmp = out_grad_.expand(IntArray(x_dim)); } else { - x_grad_tmp = out_grad.expand(IntArray(x_dim)); - out_tmp = out.expand(IntArray(x_dim)); + out_grad_tmp = out_grad.expand(IntArray(x_dim)); } } - auto x_grad_res = x_grad_tmp * out_tmp * (1 / x); - set_output(x_grad_res, x_grad); + auto axis_ = std::vector(); + if (reduce_all) { + int64_t numel = 1; + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + numel *= x_dim[i]; + } + cumprod_shape.push_back(numel); + x_reshape = reshape(x, cumprod_shape); + auto left_cumprod = cumprod(x_reshape, -1, true, false); + auto right_cumprod = cumprod(x_reshape, -1, true, true); + auto x_grad_tmp = left_cumprod * right_cumprod; + auto x_grad_tmp2 = reshape(x_grad_tmp, x.shape()); + auto x_grad_res = x_grad_tmp2 * out_grad_tmp; + set_output(x_grad_res, x_grad); + } else { + int64_t unchange_size = x_dim_size - axis_size; + int64_t unchange_index = 0; + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_.push_back(axis[i] + x_dim_size); + } else { + axis_.push_back(axis[i]); + } + } + for (int64_t i = 0; i < x_dim_size; i++) { + auto it = find(axis_.begin(), axis_.end(), i); + if (it != axis_.end()) { + int64_t index = it - axis_.begin(); + origin_position.push_back(static_cast(unchange_size + index)); + } else { + unchange_axis.push_back(i); + origin_position.push_back(static_cast(unchange_index)); + unchange_index += 1; + } + } + int64_t numel = 1; + for (int64_t i = 0; i < unchange_size; i++) { + transpose_shape.push_back(x_dim[unchange_axis[i]]); + cumprod_shape.push_back(x_dim[unchange_axis[i]]); + transpose_dim.push_back(static_cast(unchange_axis[i])); + } + for (int64_t i = 0; i < axis_size; i++) { + transpose_shape.push_back(x_dim[axis_[i]]); + transpose_dim.push_back(static_cast(axis_[i])); + numel *= x_dim[axis_[i]]; + } + cumprod_shape.push_back(numel); + auto x_transpose = transpose(x, transpose_dim); + x_reshape = reshape(x_transpose, cumprod_shape); + auto left_cumprod = cumprod(x_reshape, -1, true, false); + auto right_cumprod = cumprod(x_reshape, -1, true, true); + auto x_grad_tmp = left_cumprod * right_cumprod; + auto x_grad_reshape = reshape(x_grad_tmp, transpose_shape); + auto x_grad_tmp2 = transpose(x_grad_reshape, origin_position); + auto x_grad_res = x_grad_tmp2 * out_grad_tmp; + set_output(x_grad_res, x_grad); + } } } diff --git a/paddle/fluid/primitive/rule/vjp/manual/manual_vjp.cc b/paddle/fluid/primitive/rule/vjp/manual/manual_vjp.cc index 623225bb8c09b..b6b3461f3aca0 100644 --- a/paddle/fluid/primitive/rule/vjp/manual/manual_vjp.cc +++ b/paddle/fluid/primitive/rule/vjp/manual/manual_vjp.cc @@ -23,8 +23,7 @@ #include "paddle/fluid/primitive/utils/utils.h" #include "paddle/pir/include/core/operation.h" -namespace paddle { -namespace primitive { +namespace paddle::primitive { std::vector> add_n_vjp( const std::vector& x, @@ -186,5 +185,4 @@ std::vector> fused_attention_vjp( return vjp_res; } -} // namespace primitive -} // namespace paddle +} // namespace paddle::primitive diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 6deffc89271f9..a3086b7d7e34a 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -14,7 +14,6 @@ set(PYBIND_DEPS pass generate_pass pass_builder - parallel_executor compiled_program profiler layer @@ -130,7 +129,6 @@ set(PYBIND_SRCS protobuf.cc exception.cc op_function_common.cc - parallel_executor.cc compiled_program.cc tensor.cc place.cc @@ -310,18 +308,12 @@ if(WITH_PYTHON) list(REMOVE_ITEM GENERATOR_DEPS python) endif() target_link_libraries(eager_legacy_op_function_generator ${GENERATOR_DEPS}) - if(NOT WIN32) - add_executable(kernel_signature_generator kernel_signature_generator.cc) - target_link_libraries(kernel_signature_generator - ${OP_FUNCTION_GENERATOR_DEPS}) - endif() get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(eager_legacy_op_function_generator ${os_dependency_modules}) if(WITH_ROCM) target_link_libraries(eager_legacy_op_function_generator ${ROCM_HIPRTC_LIB}) - target_link_libraries(kernel_signature_generator ${ROCM_HIPRTC_LIB}) endif() set(op_function_output_path ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/) diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc index 87895d6b4df31..f8c53a52e8d46 100644 --- a/paddle/fluid/pybind/auto_parallel_py.cc +++ b/paddle/fluid/pybind/auto_parallel_py.cc @@ -264,8 +264,8 @@ void BindAutoParallel(py::module *m) { &ProcessMesh::dim_size)) .def("empty", &ProcessMesh::empty) .def("contains", &ProcessMesh::contains) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__copy__", [](const ProcessMesh &self) { return ProcessMesh(self); }) .def( @@ -298,8 +298,8 @@ void BindAutoParallel(py::module *m) { .def_property_readonly("machine_id", &Device::machine_id) .def_property_readonly("type", &Device::type) .def_property("capability", &Device::capability, &Device::set_capability) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__str__", &Device::to_string); py::class_(*m, "LinkCapability") @@ -317,8 +317,8 @@ void BindAutoParallel(py::module *m) { .def_property_readonly("target_id", &Link::target_id) .def_property_readonly("type", &Link::type) .def_property("capability", &Link::capability, &Link::set_capability) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__str__", &Link::to_string); py::class_(*m, "Machine") @@ -362,8 +362,8 @@ void BindAutoParallel(py::module *m) { .def("dim_size", static_cast( &DeviceMesh::dim_size)) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__copy__", [](const TensorDistAttr &self) { return TensorDistAttr(self); }) .def( @@ -435,8 +435,8 @@ void BindAutoParallel(py::module *m) { .def("is_partial", &phi::distributed::Placement::is_partial) .def("__hash__", &phi::distributed::Placement::hash) .def("__str__", &phi::distributed::Placement::to_string) - .def(py::self == py::self) - .def(py::self != py::self); + .def(py::self == py::self) // NOLINT + .def(py::self != py::self); // NOLINT auto Shard = py::class_>( @@ -464,8 +464,8 @@ void BindAutoParallel(py::module *m) { .def("get_dim", &phi::distributed::Shard::get_dim) .def("__hash__", &phi::distributed::Shard::hash) .def("__str__", &phi::distributed::Shard::to_string) - .def(py::self == py::self) - .def(py::self != py::self); + .def(py::self == py::self) // NOLINT + .def(py::self != py::self); // NOLINT auto Replicate = py::class_>( @@ -487,8 +487,8 @@ void BindAutoParallel(py::module *m) { .def(py::init<>()) .def("__hash__", &phi::distributed::Replicate::hash) .def("__str__", &phi::distributed::Replicate::to_string) - .def(py::self == py::self) - .def(py::self != py::self); + .def(py::self == py::self) // NOLINT + .def(py::self != py::self); // NOLINT auto Partial = py::class_>( @@ -514,8 +514,8 @@ void BindAutoParallel(py::module *m) { py::arg("reduce_type") = phi::ReduceType::kRedSum) .def("__hash__", &phi::distributed::Partial::hash) .def("__str__", &phi::distributed::Partial::to_string) - .def(py::self == py::self) - .def(py::self != py::self); + .def(py::self == py::self) // NOLINT + .def(py::self != py::self); // NOLINT g_placement_shard_pytype = reinterpret_cast(Shard.ptr()); g_placement_replicated_pytype = @@ -565,8 +565,8 @@ void BindAutoParallel(py::module *m) { return py::bytes(self.serialize_to_string()); }) .def("parse_from_string", &TensorDistAttr::parse_from_string) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__copy__", [](const TensorDistAttr &self) { return TensorDistAttr(self); }) .def( @@ -719,8 +719,8 @@ void BindAutoParallel(py::module *m) { return py::bytes(self.serialize_to_string()); }) .def("parse_from_string", &OperatorDistAttr::parse_from_string) - .def(py::self == py::self) - .def(py::self != py::self) + .def(py::self == py::self) // NOLINT + .def(py::self != py::self) // NOLINT .def("__copy__", [](const OperatorDistAttr &self) { return OperatorDistAttr(self); }) .def( diff --git a/paddle/fluid/pybind/control_flow_api.cc b/paddle/fluid/pybind/control_flow_api.cc index 036c4d9fd8200..61be0eb61fb3e 100644 --- a/paddle/fluid/pybind/control_flow_api.cc +++ b/paddle/fluid/pybind/control_flow_api.cc @@ -52,7 +52,6 @@ using pir::Builder; using pir::CombineOp; using pir::Operation; using pir::Program; -using pir::Region; using pir::StackCreateOp; using pir::TuplePopOp; using pir::TuplePushOp; @@ -271,8 +270,7 @@ void BuildPipeForBlock(Block* block) { } // namespace -namespace paddle { -namespace pybind { +namespace paddle::pybind { PyIfOp::PyIfOp(IfOp if_op) : IfOp(if_op) { PADDLE_ENFORCE_NOT_NULL( if_op, @@ -413,5 +411,4 @@ void BindControlFlowApi(py::module* m) { BindTuplePopOp(m); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/data_set_py.cc b/paddle/fluid/pybind/data_set_py.cc index bda9e8e653ef0..0c2f883de904d 100644 --- a/paddle/fluid/pybind/data_set_py.cc +++ b/paddle/fluid/pybind/data_set_py.cc @@ -39,8 +39,7 @@ limitations under the License. */ namespace py = pybind11; -namespace paddle { -namespace pybind { +namespace paddle::pybind { class IterableDatasetWrapper { public: @@ -400,5 +399,4 @@ void BindDataset(py::module *m) { .def("_next", &IterableDatasetWrapper::Next); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/dist_api.cc b/paddle/fluid/pybind/dist_api.cc index 31a32c3e27a14..4907f52979277 100644 --- a/paddle/fluid/pybind/dist_api.cc +++ b/paddle/fluid/pybind/dist_api.cc @@ -26,8 +26,7 @@ namespace py = pybind11; -namespace pybind11 { -namespace detail { +namespace pybind11::detail { template > : map_caster, Key, Value> {}; -} // namespace detail -} // namespace pybind11 +} // namespace pybind11::detail using paddle::dialect::OperationDistAttribute; using paddle::dialect::ProcessMeshAttribute; using paddle::dialect::TensorDistAttribute; -namespace paddle { -namespace pybind { +namespace paddle::pybind { void BindOperationDistAttribute(py::module *m) { py::class_ dist_attr( @@ -150,5 +147,4 @@ void BindDistApi(pybind11::module *module) { BindOpsFunction(&ops_modules); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 00b6ba994233f..4fbb8c3d48e2d 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -1416,7 +1416,7 @@ void BindEager(pybind11::module* module) { Py_INCREF(&PyBaseObject_Type); type->tp_base = reinterpret_cast(&PyBaseObject_Type); type->tp_flags |= - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; // NOLINT #if PY_VERSION_HEX >= 0x03050000 type->tp_as_async = &heap_type->as_async; #endif @@ -1464,7 +1464,7 @@ void BindEagerStringTensor(pybind11::module* module) { Py_INCREF(&PyBaseObject_Type); type->tp_base = reinterpret_cast(&PyBaseObject_Type); type->tp_flags |= - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; // NOLINT #if PY_VERSION_HEX >= 0x03050000 type->tp_as_async = &heap_type->as_async; #endif diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 11298fda6a300..6b3c15b55666e 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -72,8 +72,7 @@ typedef SSIZE_T ssize_t; COMMON_DECLARE_bool(set_to_1d); COMMON_DECLARE_bool(use_stride_kernel); -namespace paddle { -namespace pybind { +namespace paddle::pybind { extern void InitTensorWithNumpyValue(TensorObject* self, const pybind11::object& array, @@ -3518,5 +3517,4 @@ PyMethodDef string_tensor_variable_methods[] = { // NOLINT nullptr}, // TODO(zhoushunjie): Need to add _copy_to, copy_ for StringTensor. {nullptr, nullptr, 0, nullptr}}; -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index 0d9aff8c7ef32..a51fbd72947f3 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -778,7 +778,7 @@ void BindEagerPyLayer(PyObject* module) { Py_INCREF(&PyBaseObject_Type); type->tp_base = reinterpret_cast(&PyBaseObject_Type); type->tp_flags |= - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; // NOLINT #if PY_VERSION_HEX >= 0x03050000 type->tp_as_async = &heap_type->as_async; #endif diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 6044d2aa567e2..2fcbe08afdded 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -50,8 +50,7 @@ limitations under the License. */ COMMON_DECLARE_bool(check_nan_inf); COMMON_DECLARE_int32(check_nan_inf_level); -namespace paddle { -namespace pybind { +namespace paddle::pybind { extern PyTypeObject* p_tensor_type; extern PyTypeObject* p_string_tensor_type; @@ -2824,5 +2823,4 @@ void BindEagerUtils(PyObject* module) { } } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/exception.cc b/paddle/fluid/pybind/exception.cc index 7061b844987fa..1a2727504e197 100644 --- a/paddle/fluid/pybind/exception.cc +++ b/paddle/fluid/pybind/exception.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/common/exception.h" #include "paddle/fluid/memory/allocation/allocator.h" -namespace paddle { -namespace pybind { +namespace paddle::pybind { /* Paddle Exception mapping rules: * - InvalidArgumentError -> ValueError @@ -139,5 +138,4 @@ void ThrowExceptionToPython(std::exception_ptr p) { PyErr_SetString(PyExc_OSError, e.what()); } } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/generator_py.cc b/paddle/fluid/pybind/generator_py.cc index 41a98f7316766..a1ef869c087cb 100644 --- a/paddle/fluid/pybind/generator_py.cc +++ b/paddle/fluid/pybind/generator_py.cc @@ -29,8 +29,7 @@ limitations under the License. */ namespace py = pybind11; -namespace paddle { -namespace pybind { +namespace paddle::pybind { void BindGenerator(py::module* m_ptr) { auto& m = *m_ptr; py::class_def("graph_safe_remove_nodes", [](Graph *graph, const std::unordered_set &nodes) { @@ -408,5 +407,4 @@ void BindPass(py::module *m) { }); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 0b3d79b6e4ea4..f12828ba6ef80 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -52,8 +52,7 @@ namespace py = pybind11; // NOLINT -namespace pybind11 { -namespace detail { +namespace pybind11::detail { // Note: use same enum number of float16 in numpy. // import numpy as np @@ -79,11 +78,9 @@ struct npy_format_descriptor { static constexpr auto name = _("float16"); }; -} // namespace detail -} // namespace pybind11 +} // namespace pybind11::detail -namespace paddle { -namespace pybind { +namespace paddle::pybind { using paddle::AnalysisPredictor; using paddle::NativeConfig; using paddle::NativePaddlePredictor; @@ -1345,5 +1342,4 @@ void BindInternalUtils(py::module *m) { }); } } // namespace -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/kernel_signature_generator.cc b/paddle/fluid/pybind/kernel_signature_generator.cc deleted file mode 100644 index 23892fabe1c24..0000000000000 --- a/paddle/fluid/pybind/kernel_signature_generator.cc +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/phi_utils.h" -#include "paddle/fluid/pybind/pybind.h" // NOLINT -#include "paddle/phi/core/compat/op_utils.h" -#include "paddle/phi/core/kernel_factory.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/declarations.h" - -// print names of kernel function params with json format: -// { -// "norm":{ -// "inputs":[ -// "X" -// ], -// "attrs":[ -// "axis", -// "epsilon", -// "is_test" -// ], -// "outputs":[ -// "Norm", -// "Out" -// ] -// }, -// ... -// } -int main(int argc, char **argv) { // NOLINT - paddle::framework::InitDefaultKernelSignatureMap(); - auto &kernel_signature_map = phi::DefaultKernelSignatureMap::Instance(); - auto &kernel_factory = phi::KernelFactory::Instance(); - std::string kernel_signature_map_str{"{"}; - for (const auto &op_kernel_pair : kernel_factory.kernels()) { - std::string op_name = op_kernel_pair.first; - const paddle::flat_hash_map &kernel_name_map = - phi::OpUtilsMap::Instance().fluid_op_to_phi_kernel(); - for (auto &it : kernel_name_map) { - if (it.second == op_name) { - op_name = it.first; - break; - } - } - if (kernel_signature_map.Has(op_name)) { - kernel_signature_map_str.append("\"") - .append(op_kernel_pair.first) - .append("\":{"); - const auto &args = kernel_signature_map.Get(op_name); - - kernel_signature_map_str += "\"inputs\":["; - auto inputs_ = args.input_names; - for (size_t i = 0; i < inputs_.size(); i++) { - kernel_signature_map_str.append("\"").append(inputs_[i]).append("\","); - } - if (!inputs_.empty()) kernel_signature_map_str.pop_back(); - - kernel_signature_map_str += "],\"attrs\":["; - auto attrs_ = args.attr_names; - for (size_t i = 0; i < attrs_.size(); i++) { - kernel_signature_map_str.append("\"").append(attrs_[i]).append("\","); - } - if (!attrs_.empty()) kernel_signature_map_str.pop_back(); - kernel_signature_map_str += "],\"outputs\":["; - auto outputs_ = args.output_names; - for (size_t i = 0; i < outputs_.size(); i++) { - kernel_signature_map_str.append("\"").append(outputs_[i]).append("\","); - } - - if (!outputs_.empty()) kernel_signature_map_str.pop_back(); - kernel_signature_map_str += "]},"; - } - } - kernel_signature_map_str.pop_back(); - kernel_signature_map_str += "}\n"; - std::cout << kernel_signature_map_str; - return 0; -} diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h index 872be599d9a76..f41950db85e6d 100644 --- a/paddle/fluid/pybind/manual_static_op_function.h +++ b/paddle/fluid/pybind/manual_static_op_function.h @@ -81,7 +81,7 @@ static PyObject *static_api_set_parameter(PyObject *self, } } -static PyObject *static_api_updata_parameter(PyObject *self, +static PyObject *static_api_update_parameter(PyObject *self, PyObject *args, PyObject *kwargs) { try { @@ -98,7 +98,7 @@ static PyObject *static_api_updata_parameter(PyObject *self, // Call ir static api CallStackRecorder callstack_recoder("uodata_parameter"); callstack_recoder.Record(); - paddle::dialect::updata_parameter(parameter, name); + paddle::dialect::update_parameter(parameter, name); callstack_recoder.AttachToOps(); Py_RETURN_NONE; } catch (...) { @@ -975,10 +975,10 @@ static PyMethodDef ManualOpsAPI[] = { (PyCFunction)(void (*)(void))static_api_set_parameter, METH_VARARGS | METH_KEYWORDS, "C++ interface function for set_parameter."}, - {"updata_parameter", - (PyCFunction)(void (*)(void))static_api_updata_parameter, + {"update_parameter", + (PyCFunction)(void (*)(void))static_api_update_parameter, METH_VARARGS | METH_KEYWORDS, - "C++ interface function for updata_parameter."}, + "C++ interface function for update_parameter."}, {"set_persistable_value", (PyCFunction)(void (*)(void))static_api_set_persistable_value, METH_VARARGS | METH_KEYWORDS, diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc index d2478e592354f..2b57b27fb45f5 100644 --- a/paddle/fluid/pybind/op_function_common.cc +++ b/paddle/fluid/pybind/op_function_common.cc @@ -38,8 +38,7 @@ #include "paddle/pir/include/core/op_result.h" #include "paddle/pir/include/core/value.h" -namespace paddle { -namespace pybind { +namespace paddle::pybind { class OpAttrTypeMap { public: @@ -1147,5 +1146,4 @@ ssize_t GetIdxFromCoreOpsInfoMap( return -1; } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/parallel_executor.cc b/paddle/fluid/pybind/parallel_executor.cc deleted file mode 100644 index 7f6b054564bc6..0000000000000 --- a/paddle/fluid/pybind/parallel_executor.cc +++ /dev/null @@ -1,1178 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Copyright (c) 2022 NVIDIA Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include -// Avoid a problem with copysign defined in pyconfig.h on Windows. -#ifdef copysign -#undef copysign -#endif - -#include -#include -#include -#include -#include -#include -#include // NOLINT // for call_once -#include -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/framework/custom_operator.h" -#include "paddle/fluid/framework/data_layout.h" -#include "paddle/fluid/framework/data_type_transform.h" -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/executor_cache.h" -#include "paddle/fluid/framework/executor_gc_helper.h" -#include "paddle/fluid/framework/feed_fetch_method.h" -#include "paddle/fluid/framework/feed_fetch_type.h" -#include "paddle/fluid/framework/garbage_collector.h" -#include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/framework/ir/coalesce_grad_tensor_pass.h" -#include "paddle/fluid/framework/ir/cost_model.h" -#include "paddle/fluid/framework/ir/generate_pass.h" -#include "paddle/fluid/framework/ir/pass_builder.h" -#include "paddle/fluid/framework/lod_rank_table.h" -#include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/new_executor/executor_statistics.h" -#include "paddle/fluid/framework/new_executor/standalone_executor.h" -#include "paddle/fluid/framework/op_info.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/framework/parallel_executor.h" -#include "paddle/fluid/framework/phi_utils.h" -#include "paddle/fluid/framework/prune.h" -#include "paddle/fluid/framework/reader.h" -#include "paddle/fluid/framework/scope_pool.h" -#include "paddle/fluid/framework/selected_rows_utils.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/framework/trainer.h" -#include "paddle/fluid/framework/type_defs.h" -#include "paddle/fluid/framework/version.h" -#include "paddle/fluid/imperative/amp_auto_cast.h" -#include "paddle/fluid/imperative/layer.h" -#include "paddle/fluid/memory/allocation/allocator_strategy.h" -#ifdef PADDLE_WITH_CUDA -#include "paddle/fluid/memory/allocation/cuda_ipc_allocator.h" -#endif -#include "paddle/fluid/memory/allocation/mmap_allocator.h" -#include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/common_infer_shape_functions.h" -#include "paddle/fluid/platform/cpu_helper.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/dynload/dynamic_loader.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/init.h" -#include "paddle/fluid/platform/monitor.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/platform/profiler.h" -#include "paddle/fluid/platform/profiler/event_python.h" -#include "paddle/fluid/platform/profiler/event_tracing.h" -#include "paddle/fluid/platform/profiler/profiler.h" -#include "paddle/fluid/pybind/bind_cost_model.h" -#include "paddle/fluid/pybind/bind_fleet_executor.h" -#include "paddle/fluid/pybind/box_helper_py.h" -#include "paddle/fluid/pybind/communication.h" -#include "paddle/fluid/pybind/compatible.h" -#include "paddle/fluid/pybind/const_value.h" -#include "paddle/fluid/pybind/cuda_streams_py.h" -#include "paddle/fluid/pybind/data_set_py.h" -#include "paddle/fluid/pybind/distributed_py.h" -#include "paddle/fluid/pybind/eager.h" -#include "paddle/fluid/pybind/exception.h" -#include "paddle/fluid/pybind/fleet_wrapper_py.h" -#include "paddle/fluid/pybind/generator_py.h" -#include "paddle/fluid/pybind/global_value_getter_setter.h" -#include "paddle/fluid/pybind/gloo_context_py.h" -#include "paddle/fluid/pybind/gloo_wrapper_py.h" -#include "paddle/fluid/pybind/graph.h" -#include "paddle/fluid/pybind/heter_wrapper_py.h" -#include "paddle/fluid/pybind/imperative.h" -#include "paddle/fluid/pybind/inference_api.h" -#include "paddle/fluid/pybind/io.h" -#include "paddle/fluid/pybind/metrics_py.h" -#include "paddle/fluid/pybind/ps_gpu_wrapper_py.h" -#include "paddle/fluid/pybind/pybind_variant_caster.h" -#include "paddle/phi/backends/cpu/cpu_info.h" -#include "paddle/phi/backends/device_manager.h" -#include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/lod_utils.h" -#include "paddle/utils/none.h" - -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/pybind/nccl_wrapper_py.h" -#endif -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/pybind/protobuf.h" -#include "paddle/fluid/pybind/pybind.h" // NOLINT -#include "paddle/fluid/pybind/reader_py.h" -#include "paddle/fluid/pybind/tensor_py.h" -#include "paddle/utils/string/to_string.h" -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" -#endif -#ifndef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h" -#endif -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#endif - -#ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/device/xpu/xpu_info.h" -#include "paddle/fluid/platform/device/xpu/xpu_op_list.h" -#endif - -#ifdef PADDLE_WITH_CUSTOM_DEVICE -#include "paddle/phi/capi/capi.h" -#endif - -#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" - -#ifdef PADDLE_WITH_IPU -#include "paddle/fluid/platform/device/ipu/ipu_backend.h" -#include "paddle/fluid/platform/device/ipu/ipu_info.h" -#endif - -#ifdef PADDLE_WITH_CRYPTO -#include "paddle/fluid/pybind/crypto.h" -#endif - -#if defined PADDLE_WITH_PSCORE -#include "paddle/fluid/pybind/fleet_py.h" -#endif - -#ifdef PADDLE_WITH_CINN -#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" -#endif - -#include "paddle/common/flags.h" -#include "paddle/fluid/eager/api/utils/global_utils.h" -#include "paddle/fluid/imperative/layout_autotune.h" -#include "paddle/fluid/pybind/eager_utils.h" -#include "paddle/fluid/pybind/parallel_executor.h" -#include "paddle/phi/api/ext/op_meta_info.h" -#include "paddle/phi/kernels/autotune/cache.h" -#include "paddle/phi/kernels/autotune/switch_autotune.h" -#include "pybind11/stl.h" - -COMMON_DECLARE_bool(use_mkldnn); - -// disable auto conversion to list in Python -PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); -PYBIND11_MAKE_OPAQUE(paddle::framework::FetchUnmergedList); -PYBIND11_MAKE_OPAQUE(paddle::framework::FetchList); -PYBIND11_MAKE_OPAQUE(paddle::framework::FetchType); - -namespace paddle { -namespace pybind { -using namespace paddle::framework; // NOLINT -void BindParallelExecutor(pybind11::module &m) { // NOLINT - // -- python binds for parallel executor. - py::class_ pe(m, "ParallelExecutor"); - py::class_ exec_strategy(pe, "ExecutionStrategy"); - - py::enum_(m, "DeviceType", py::arithmetic()) - .value("CPU", paddle::platform::DeviceType::CPU) - .value("CUDA", paddle::platform::DeviceType::CUDA) - .value("XPU", paddle::platform::DeviceType::XPU); - - exec_strategy.def(py::init()) - .def_property( - "num_threads", - [](const ExecutionStrategy &self) { return self.num_threads_; }, - [](ExecutionStrategy &self, size_t num_threads) { - self.num_threads_ = num_threads; - }) - .def_property( - "_use_device", - [](const ExecutionStrategy &self) { return self.use_device_; }, - [](ExecutionStrategy &self, paddle::platform::DeviceType use_device) { - self.use_device_ = use_device; - }) // NOTE(liuyuhui): Doesn't add doc for 'use_device', because - // use_device isn‘t exposed to users. - .def_property( - "allow_op_delay", - [](const ExecutionStrategy &self) { return self.allow_op_delay_; }, - [](ExecutionStrategy &self, bool allow_op_delay) { - self.allow_op_delay_ = allow_op_delay; - }) - .def_property( - "num_iteration_per_drop_scope", - [](const ExecutionStrategy &self) { - return self.num_iteration_per_drop_scope_; - }, - [](ExecutionStrategy &self, size_t num_iteration_per_drop_scope) { - self.num_iteration_per_drop_scope_ = num_iteration_per_drop_scope; - }) - .def_property( - "num_iteration_per_run", - [](const ExecutionStrategy &self) { - return self.num_iteration_per_run_; - }, - [](ExecutionStrategy &self, size_t num_iteration_per_run) { - self.num_iteration_per_run_ = num_iteration_per_run; - }) - .def_property( - "use_thread_barrier", - [](const ExecutionStrategy &self) { return self.thread_barrier_; }, - [](ExecutionStrategy &self, bool use_thread_barrier) { - self.thread_barrier_ = use_thread_barrier; - }) - .def_property( - "_dry_run", - [](const ExecutionStrategy &self) { return self.dry_run_; }, - [](ExecutionStrategy &self, bool dry_run) { - self.dry_run_ = dry_run; - }); - - exec_strategy.def_property( - "use_experimental_executor", - [](const ExecutionStrategy &self) { - return self.type_ == ExecutionStrategy::kExperimental; - }, - [](ExecutionStrategy &self, bool experimental) { - self.type_ = experimental ? ExecutionStrategy::kExperimental - : ExecutionStrategy::kDefault; - }); - - py::class_ build_strategy(pe, "BuildStrategy", R"DOC( - BuildStrategy allows the user to more preciously control how to - build the SSA Graph in ParallelExecutor by setting the property. - - Returns: - BuildStrategy: An BuildStrategy object. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> data = static.data(name="x", shape=[None, 1], dtype="float32") - >>> hidden = static.nn.fc(data, size=10) - >>> loss = paddle.mean(hidden) - >>> paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.enable_inplace = True - >>> build_strategy.memory_optimize = True - >>> build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce - >>> program = static.CompiledProgram(static.default_main_program(), build_strategy=build_strategy) -)DOC"); - - py::enum_(build_strategy, "ReduceStrategy") - .value("Reduce", BuildStrategy::ReduceStrategy::kReduce) - .value("AllReduce", BuildStrategy::ReduceStrategy::kAllReduce) - .value("_NoReduce", BuildStrategy::ReduceStrategy::kNoReduce); - py::enum_(build_strategy, - "GradientScaleStrategy") - .value("CoeffNumDevice", - BuildStrategy::GradientScaleStrategy::kCoeffNumDevice) - .value("One", BuildStrategy::GradientScaleStrategy::kOne) - .value("Customized", BuildStrategy::GradientScaleStrategy::kCustomized); - - build_strategy.def(py::init()) - .def("_clear_finalized", &BuildStrategy::ClearFinalized) - .def_property( - "reduce_strategy", - [](const BuildStrategy &self) { return self.reduce_; }, - [](BuildStrategy &self, BuildStrategy::ReduceStrategy strategy) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.reduce_ = strategy; - }, - R"DOC((fluid.BuildStrategy.ReduceStrategy, optional): there are two reduce - strategies in ParallelExecutor, AllReduce and Reduce. If you want - that all the parameters' optimization are done on all devices independently, - you should choose AllReduce; otherwise, if you choose Reduce, all the parameters' - optimization will be evenly distributed to different devices, and then - broadcast the optimized parameter to other devices. - Default is 'AllReduce'. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce - )DOC") - .def_property( - "gradient_scale_strategy", - [](const BuildStrategy &self) { return self.gradient_scale_; }, - [](BuildStrategy &self, - BuildStrategy::GradientScaleStrategy strategy) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.gradient_scale_ = strategy; - }, - R"DOC((paddle.static.BuildStrategy.GradientScaleStrategy, optional): there are three - ways of defining :math:`loss@grad` in ParallelExecutor, that is, CoeffNumDevice, - One and Customized. By default, ParallelExecutor sets the :math:`loss@grad` - according to the number of devices. If you want to customize :math:`loss@grad`, - you can choose Customized. Default is 'CoeffNumDevice'. - - Examples: - .. code-block:: python - - >>> import numpy - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> use_cuda = paddle.device.is_compiled_with_cuda - >>> place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() - >>> exe = static.Executor(place) - - >>> data = static.data(name='X', shape=[None, 1], dtype='float32') - >>> hidden = static.nn.fc(data, size=10) - >>> loss = paddle.mean(hidden) - >>> paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) - - >>> exe.run(static.default_startup_program()) - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.gradient_scale_strategy = \ - ... static.BuildStrategy.GradientScaleStrategy.Customized - >>> compiled_prog = static.CompiledProgram( - ... static.default_main_program(), - ... build_strategy=build_strategy, - >>> ) - - >>> x = numpy.random.random(size=(10, 1)).astype('float32') - >>> loss_grad = numpy.ones((1)).astype("float32") * 0.01 - >>> loss_grad_name = loss.name+"@GRAD" - >>> loss_data = exe.run(compiled_prog, - ... feed={"X": x, loss_grad_name : loss_grad}, - ... fetch_list=[loss.name, loss_grad_name]) - )DOC") - .def_property( - "debug_graphviz_path", - [](const BuildStrategy &self) { return self.debug_graphviz_path_; }, - [](BuildStrategy &self, const std::string &path) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.debug_graphviz_path_ = path; - }, - R"DOC((str, optional): debug_graphviz_path indicates the path that - writing the SSA Graph to file in the form of graphviz. - It is useful for debugging. Default is empty string, that is, "" - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.debug_graphviz_path = "./graph" - )DOC") - .def_property( - "enable_sequential_execution", - [](const BuildStrategy &self) { - return self.enable_sequential_execution_; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.enable_sequential_execution_ = b; - }, - R"DOC((bool, optional): If set True, the execution order of ops would - be the same as what is in the program. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.enable_sequential_execution = True - )DOC") - .def_property( - "remove_unnecessary_lock", - [](const BuildStrategy &self) { - return self.remove_unnecessary_lock_; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.remove_unnecessary_lock_ = b; - }, - R"DOC((bool, optional): If set True, some locks in GPU ops would be - released and ParallelExecutor would run faster. Default is True. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.remove_unnecessary_lock = True - )DOC") - .def_property( - "num_trainers", - [](const BuildStrategy &self) { return self.num_trainers_; }, - [](BuildStrategy &self, int num_trainers) { -#ifdef WIN32 - PADDLE_THROW(platform::errors::Unavailable( - "Distribution mode is not supported on Windows platform.")); -#endif - self.num_trainers_ = num_trainers; - }) - .def_property( - "trainers_endpoints", - [](const BuildStrategy &self) { return self.trainers_endpoints_; }, - [](BuildStrategy &self, - const std::vector &trainers_endpoints) { - self.trainers_endpoints_ = trainers_endpoints; - }) - .def_property( - "trainer_id", - [](const BuildStrategy &self) { return self.trainer_id_; }, - [](BuildStrategy &self, int trainer_id) { - self.trainer_id_ = trainer_id; - }) - .def_property( - "nccl_comm_num", - [](const BuildStrategy &self) { return self.nccl_comm_num_; }, - [](BuildStrategy &self, int nccl_comm_num) { - self.nccl_comm_num_ = nccl_comm_num; - }) - .def_property( - "bkcl_comm_num", - [](const BuildStrategy &self) { return self.bkcl_comm_num_; }, - [](BuildStrategy &self, int bkcl_comm_num) { - self.bkcl_comm_num_ = bkcl_comm_num; - }) - .def_property( - "use_hierarchical_allreduce", - [](const BuildStrategy &self) { - return self.use_hierarchical_allreduce_; - }, - [](BuildStrategy &self, bool use) { - self.use_hierarchical_allreduce_ = use; - }) - .def_property( - "hierarchical_allreduce_inter_nranks", - [](const BuildStrategy &self) { - return self.hierarchical_allreduce_inter_nranks_; - }, - [](BuildStrategy &self, int nranks) { - self.hierarchical_allreduce_inter_nranks_ = nranks; - }) - .def_property( - "build_cinn_pass", - [](const BuildStrategy &self) { return self.build_cinn_pass_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, " - "cannot be configured again.")); - self.build_cinn_pass_ = b; - }, - R"DOC((bool, optional): build_cinn_pass indicates whether - to lowering some operators in graph into cinn ops - to execute, which will speed up the process of execution. - Default False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - >>> paddle.enable_static() - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.build_cinn_pass = True - )DOC") - .def_property( - "fuse_elewise_add_act_ops", - [](const BuildStrategy &self) { - return self.fuse_elewise_add_act_ops_; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_elewise_add_act_ops_ = b; - }, - R"DOC((bool, optional): fuse_elewise_add_act_ops indicate whether - to fuse elementwise_add_op and activation_op, - it may make the execution faster. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_elewise_add_act_ops = True - )DOC") - .def_property( - "fuse_gemm_epilogue", - [](const BuildStrategy &self) { return self.fuse_gemm_epilogue_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_gemm_epilogue_ = b; - }, - R"DOC((bool, optional): fuse_gemm_epilogue indicate whether - to fuse matmul_op, elemenewist_add_op and activation_op, - it may make the execution faster. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_gemm_epilogue = True - )DOC") - .def_property( - "fuse_dot_product_attention", - [](const BuildStrategy &self) { - return self.fuse_dot_product_attention_; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finlaized, cannot be " - "configured again.")); - self.fuse_dot_product_attention_ = b; - }, - R"DOC((bool, optional): fuse_dot_product_attention indicate whether - to fuse dot product attention, - it would make the execution faster. Default is False. - - Examples: - .. code-block:: python - - import paddle - import paddle.static as static - - paddle.enable_static() - - build_strategy = static.BuildStrategy() - build_strategy.fuse_dot_product_attention = True - )DOC") - .def_property( - "fuse_adamw", - [](const BuildStrategy &self) { return self.fuse_adamw_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_adamw_ = b; - }, - R"DOC((bool, optional): fuse_adamw indicate whether - to fuse all adamw optimizers with multi_tensor_adam, - it may make the execution faster. Default is False. - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - >>> paddle.enable_static() - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_adamw = True - )DOC") - .def_property( - "fused_attention", - [](const BuildStrategy &self) { return self.fused_attention_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fused_attention_ = b; - }, - R"DOC((bool, optional): fused_attention indicate whether - to fuse the whole multi head attention part with one op, - it may make the execution faster. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fused_attention = True - )DOC") - .def_property( - "fused_feedforward", - [](const BuildStrategy &self) { return self.fused_feedforward_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fused_feedforward_ = b; - }, - R"DOC((bool, optional): fused_feedforward indicate whether - to fuse the whole feed_forward part with one op, - it may make the execution faster. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fused_feedforward = True - )DOC") - .def_property( - "sequential_run", - [](const BuildStrategy &self) { return self.sequential_run_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.sequential_run_ = b; - }, - R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the - order of `ProgramDesc`. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.sequential_run = True - )DOC") - .def_property( - "fuse_resunit", - [](const BuildStrategy &self) { return self.fuse_resunit_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_resunit_ = b; -#ifndef PADDLE_WITH_CUDNN_FRONTEND - if (self.fuse_resunit_) { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "Paddle is not built with CUDNN Frontend support.")); - } -#endif - }, - R"DOC((bool, optional): fuse_resunit Default is False. - - Examples: - .. code-block:: python - - import paddle - import paddle.static as static - - paddle.enable_static() - - build_strategy = static.BuildStrategy() - build_strategy.fuse_resunit = True - )DOC") - .def_property( - "fuse_bn_act_ops", - [](const BuildStrategy &self) { return self.fuse_bn_act_ops_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_bn_act_ops_ = b; - }, - R"DOC((bool, optional): fuse_bn_act_ops indicate whether - to fuse batch_norm and activation_op, - it may make the execution faster. Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_bn_act_ops = True - )DOC") - .def_property( - "fuse_bn_add_act_ops", - [](const BuildStrategy &self) { return self.fuse_bn_add_act_ops_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_bn_add_act_ops_ = b; - }, - R"DOC((bool, optional): fuse_bn_add_act_ops indicate whether - to fuse batch_norm, elementwise_add and activation_op, - it may make the execution faster. Default is True - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_bn_add_act_ops = True - )DOC") - .def_property( - "enable_auto_fusion", - [](const BuildStrategy &self) { return self.enable_auto_fusion_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.enable_auto_fusion_ = b; - }, - R"DOC((bool, optional): Whether to enable fusing subgraph to a - fusion_group. Now we only support fusing subgraph that composed - of elementwise-like operators, such as elementwise_add/mul - without broadcast and activations. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.enable_auto_fusion = True - )DOC") - .def_property( - "fuse_relu_depthwise_conv", - [](const BuildStrategy &self) { - return self.fuse_relu_depthwise_conv_; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.fuse_relu_depthwise_conv_ = b; - }, - R"DOC((bool, optional): fuse_relu_depthwise_conv indicate whether - to fuse relu and depthwise_conv2d, - it will save GPU memory and may make the execution faster. - This options is only available in GPU devices. - Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_relu_depthwise_conv = True - )DOC") - .def_property( - "fuse_broadcast_ops", - [](const BuildStrategy &self) { - return self.fuse_broadcast_ops_ == true || - self.fuse_broadcast_ops_ == paddle::none; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, " - "cannot be configured again.")); - self.fuse_broadcast_ops_ = b; - }, - R"DOC((bool, optional): fuse_broadcast_op indicates whether - to fuse the broadcast ops. Note that, in Reduce mode, - fusing broadcast ops may make the program faster. Because - fusing broadcast OP equals delaying the execution of all - broadcast Ops, in this case, all nccl streams are used only - for NCCLReduce operations for a period of time. Default False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.fuse_broadcast_ops = True - )DOC") - .def_property( - "fuse_all_optimizer_ops", - [](const BuildStrategy &self) { - return self.fuse_all_optimizer_ops_ == true || - self.fuse_all_optimizer_ops_ == paddle::none; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, " - "cannot be configured again.")); - self.fuse_all_optimizer_ops_ = b; - }) - .def_property( - "sync_batch_norm", - [](const BuildStrategy &self) { return self.sync_batch_norm_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), - true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finalized, cannot be " - "configured again.")); - self.sync_batch_norm_ = b; - }, - R"DOC((bool, optional): sync_batch_norm indicates whether to use - synchronous batch normalization which synchronizes the mean - and variance through multi-devices in training phase. - Current implementation doesn't support FP16 training and CPU. - And only synchronous on one machine, not all machines. - Default is False. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.sync_batch_norm = True - )DOC") - .def_property( - "memory_optimize", - [](const BuildStrategy &self) -> py::object { - if (self.memory_optimize_) { // NOLINT - return py::cast(self.memory_optimize_.get()); - } else { - return py::cast(nullptr); - } - }, - [](BuildStrategy &self, const py::handle &value) { - auto *py_obj = value.ptr(); - if (py_obj == nullptr || py_obj == Py_None) { - self.memory_optimize_ = paddle::none; - } else if (PyBool_Check(py_obj)) { - self.memory_optimize_ = (py_obj == Py_True); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "BuildStrategy.memory_optimize must be set to None, False " - "or True")); - } - }, - R"DOC((bool, optional): memory opitimize aims to save total memory - consumption, set to True to enable it. - - Default None. None means framework would choose to use or not use - this strategy automatically. Currently, None means that it is - enabled when GC is disabled, and disabled when GC is enabled. - True means enabling and False means disabling. Default is None. - - Examples: - .. code-block:: python - - >>> import paddle - >>> import paddle.static as static - - >>> paddle.enable_static() - - >>> build_strategy = static.BuildStrategy() - >>> build_strategy.memory_optimize = True - - )DOC") - .def_property( - "is_distribution", - [](const BuildStrategy &self) { return self.is_distribution_; }, - [](BuildStrategy &self, bool b) { -#ifdef WIN32 - if (b) { - PADDLE_THROW(platform::errors::Unavailable( - "Distribution mode is not supported on Windows platform.")); - } -#else - self.is_distribution_ = b; -#endif - }) - .def_property( - "async_mode", - [](const BuildStrategy &self) { return self.async_mode_; }, - [](BuildStrategy &self, bool b) { self.async_mode_ = b; }) - .def_property( - "enable_inplace", - [](const BuildStrategy &self) { return self.enable_inplace_; }, - [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; }) - .def_property( - "enable_addto", - [](const BuildStrategy &self) { return self.enable_addto_; }, - [](BuildStrategy &self, bool b) { self.enable_addto_ = b; }) - .def_property( - "fuse_all_reduce_ops", - [](const BuildStrategy &self) { - return self.fuse_all_reduce_ops_ == true || - self.fuse_all_reduce_ops_ == paddle::none; - }, - [](BuildStrategy &self, bool b) { self.fuse_all_reduce_ops_ = b; }) - .def_property( - "enable_backward_optimizer_op_deps", - [](const BuildStrategy &self) { - return self.enable_backward_optimizer_op_deps_; - }, - [](BuildStrategy &self, bool b) { - self.enable_backward_optimizer_op_deps_ = b; - }) - .def_property( - "cache_runtime_context", - [](const BuildStrategy &self) { return self.cache_runtime_context_; }, - [](BuildStrategy &self, bool b) { self.cache_runtime_context_ = b; }) - .def_property( - "mkldnn_enabled_op_types", - [](const BuildStrategy &self) { - return self.mkldnn_enabled_op_types_; - }, - [](BuildStrategy &self, - const std::unordered_set &mkldnn_enabled_op_types) { - self.mkldnn_enabled_op_types_ = mkldnn_enabled_op_types; - }) - .def_property( - "fix_op_run_order", - [](const BuildStrategy &self) { return self.fix_op_run_order_; }, - [](BuildStrategy &self, bool fix_op_run_order) { - self.fix_op_run_order_ = fix_op_run_order; - }) - .def_property( - "allow_cuda_graph_capture", - [](const BuildStrategy &self) { - return self.allow_cuda_graph_capture_; - }, - [](BuildStrategy &self, bool allow_cuda_graph_capture) { - self.allow_cuda_graph_capture_ = allow_cuda_graph_capture; - }) - .def("_copy", - [](const BuildStrategy &self) { - auto new_bs = self; - new_bs.ClearFinalized(); - return new_bs; - }) - .def("__str__", - [](const BuildStrategy &self) { - std::stringstream ss; - ss << self; - return ss.str(); - }) - .def( - "_finalize_strategy_and_create_passes", - [](BuildStrategy &self) -> std::shared_ptr { - return self.CreatePassesFromStrategy(true); - }, - R"DOC(Allow user to customized passes. Normally model-specific - optimization passes should be defined in this way. BuildStrategy - cannot be updated after being finalized.)DOC"); - - m.def("_set_cached_executor_build_strategy", - [](int64_t program_id, const BuildStrategy &build_strategy) { - auto &cached_exe_info = framework::ExecutorInfoCache::Instance(); - cached_exe_info.SetBuildStrategy(program_id, build_strategy); - }); - - pe.def(py::init &, - const std::vector &, - const std::string &, - Scope *, - std::vector &, - const ExecutionStrategy &, - const BuildStrategy &, - ir::Graph *>()) - // NOTE: even we return a vec* to Python use reference policy. - // We still cannot get local_scope from this vector, since the element - // of vec will be freed by Python GC. We can only return Scope* - // one by one and mark them as reference. - .def( - "local_scopes", - [](ParallelExecutor &self) -> std::vector * { - return &self.GetLocalScopes(); - }, - py::return_value_policy::reference) - .def("drop_local_exe_scopes", &ParallelExecutor::DropLocalExeScopes) - .def("_need_create_local_exe_scopes", - &ParallelExecutor::NeedCreateLocalExeScope) - .def("feed_tensors_into_local_scopes", - &ParallelExecutor::FeedTensorsIntoLocalScopes) - .def("feed_and_split_tensor_into_local_scopes", - &ParallelExecutor::FeedAndSplitTensorIntoLocalScopes) - .def("run", - [](ParallelExecutor &self, - const std::vector &fetch_tensors, - bool return_merged) -> py::object { - if (return_merged) { - paddle::framework::FetchList ret; - /*gil_scoped_release*/ { - pybind11::gil_scoped_release release; - ret = self.RunAndMerge(fetch_tensors); - } - return py::cast(std::move(ret)); - } else { - paddle::framework::FetchUnmergedList ret; - /*gil_scoped_release*/ { - pybind11::gil_scoped_release release; - ret = self.Run(fetch_tensors); - } - return py::cast(std::move(ret)); - } - }) - .def("device_count", &ParallelExecutor::DeviceCount); - using VarQuantScale = - std::unordered_map>; - py::class_> pass(m, "Pass"); - pass.def(py::init()) - .def("has", &ir::Pass::Has) - .def("set_not_owned", - [](ir::Pass &self, const std::string &attr_name, ProgramDesc &attr) { - self.SetNotOwned(attr_name, &attr); - }) - .def( - "set", - [](ir::Pass &self, const std::string &name, const std::string &attr) { - self.Set(name, new std::string(attr)); - }) - .def("set", - [](ir::Pass &self, const std::string &name, bool val) { - self.Set(name, new bool(val)); - }) - .def("set", - [](ir::Pass &self, const std::string &name, int val) { - self.Set(name, new int(val)); - }) - .def("set", - [](ir::Pass &self, - const std::string &name, - std::vector set) { - self.Set(name, new std::vector(set)); - }) - .def("set", - [](ir::Pass &self, - const std::string &name, - std::unordered_set set) { - self.Set(name, new std::unordered_set(set)); - }) - .def("set", - [](ir::Pass &self, - const std::string &name, - std::unordered_set set) { - self.Set(name, new std::unordered_set(set)); - }) - .def("set", - [](ir::Pass &self, const std::string &name, VarQuantScale scales) { - self.Set(name, new VarQuantScale(scales)); - }) - .def("type", &ir::Pass::Type) - .def("apply", [](ir::Pass &self, std::shared_ptr graph) { - self.Apply(graph.get()); - }); - - py::class_> pb( - m, "PassBuilder"); - pb.def(py::init()) - .def("append_pass", - [](ir::PassBuilder &self, - const std::string &pass_type) -> std::shared_ptr { - return self.AppendPass(pass_type); - }) - .def("all_passes", [](ir::PassBuilder &self) { return self.AllPasses(); }) - .def("insert_pass", - [](ir::PassBuilder &self, size_t idx, const std::string &pass_type) { - return self.InsertPass(idx, pass_type); - }) - .def("remove_pass", - [](ir::PassBuilder &self, size_t idx) { self.RemovePass(idx); }); -} - -} // namespace pybind -} // namespace paddle diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc index 29c5c764c9753..e8efeb54f16b0 100644 --- a/paddle/fluid/pybind/pir.cc +++ b/paddle/fluid/pybind/pir.cc @@ -79,10 +79,10 @@ #include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" #include "paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/pir_to_py_code_converter.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" #endif -namespace py = pybind11; using paddle::dialect::ApiBuilder; using paddle::dialect::DenseTensorArrayType; using paddle::dialect::DenseTensorType; @@ -116,10 +116,12 @@ using pir::Program; using pir::StrAttribute; using pir::Type; using pir::Value; +using pir::VectorType; using pybind11::return_value_policy; COMMON_DECLARE_bool(print_ir); COMMON_DECLARE_bool(pir_apply_shape_optimization_pass); +COMMON_DECLARE_bool(logging_pir_py_code_dump_symbolic_dims); namespace paddle { namespace pybind { @@ -412,6 +414,12 @@ void BindProgram(py::module *m) { [](Program &self, IrMapping &ir_mapper) { return Clone(self, &ir_mapper); }) + .def( + "copy_to_block", + [](std::shared_ptr self, + pir::IrMapping &mapper, + Block *block) { return self->CopyToBlock(mapper, block); }, + return_value_policy::reference) .def( "list_vars", [](std::shared_ptr self) { @@ -449,6 +457,17 @@ void BindProgram(py::module *m) { global_prog_seed = random_seed; SetProgramInt64Attr(self, "random_seed", random_seed); }) + .def_property_readonly( + "num_blocks", + [](const std::shared_ptr &self) { + size_t num_blocks = 0; + auto top_level_op = self->module_op(); + for (size_t i = 0; i < top_level_op->num_regions(); ++i) { + auto ®ion = top_level_op->region(i); + num_blocks += region.size(); + } + return num_blocks; + }) .def_property_readonly( "blocks", [](const std::shared_ptr &self) { @@ -654,9 +673,12 @@ void BindIrMapping(py::module *m) { ir_mapping.def(py::init<>()) .def("look_up", [](IrMapping &self, Value from) { return self.Lookup(from); }) - .def("add", [](IrMapping &self, Value from, Value to) { - self.Add(from, to); - }); + .def("add", + [](IrMapping &self, Value from, Value to) { + self.Add(from, to); + }) + .def("size", + [](IrMapping &self) { return self.GetMutableMap().size(); }); } void BindCloneOptions(py::module *m) { @@ -1321,6 +1343,13 @@ void BindType(py::module *m) { PADDLE_THROW(phi::errors::InvalidArgument( "can't set _local_shape when building static graph")); }) + .def("as_vec_type", + [](Type self) -> py::object { + if (auto vec_type = self.dyn_cast()) { + return py::cast(vec_type); + } + return py::cast(Py_None); + }) .def("__str__", [](Type &self) { std::ostringstream print_stream; print_stream << self; @@ -1355,7 +1384,13 @@ void BindType(py::module *m) { } }); } - +void BindVectorType(py::module *m) { + py::class_ vec_type(*m, "VectorType"); + vec_type.def("as_list", &VectorType::data); + m->def("create_vec_type", [](std::vector &types) { + return VectorType::get(pir::IrContext::Instance(), types); + }); +} void BindAttribute(py::module *m) { py::class_ ir_attr(*m, "Attribute", py::module_local()); ir_attr.def("__eq__", &Attribute::operator==) @@ -2404,12 +2439,23 @@ std::shared_ptr ApplyFusedBnAddActPass( return program; } +void DumpPirPyCodeIfNeed(const std::shared_ptr &program, + const std::string &file_name) { +#ifdef PADDLE_WITH_CINN + ::cinn::dialect::ir::PirToPyCodeConverter(program.get()) + .file_name(file_name) + .dump_symbolic_shape(FLAGS_logging_pir_py_code_dump_symbolic_dims) + .SaveIfFlagEnabled(); +#endif +} + void BindIrPass(pybind11::module *m) { m->def("apply_cinn_pass", ApplyCinnPass); m->def("check_infer_symbolic_if_need", CheckInferSymbolicIfNeed); m->def("infer_symbolic_shape_pass", InferSymbolicShapePass); m->def("apply_cse_pass", ApplyCommonSubexpressionEliminationPass); m->def("apply_bn_add_act_pass", ApplyFusedBnAddActPass); + m->def("dump_pir_py_code_if_need", DumpPirPyCodeIfNeed); py::class_> pass(*m, "Pass", @@ -2487,6 +2533,7 @@ void BindPir(pybind11::module *module) { BindOperation(&ir_module); BindOpOperand(&ir_module); BindType(&ir_module); + BindVectorType(&ir_module); BindAttribute(&ir_module); BindInsertionPoint(&ir_module); BindUtils(&ir_module); diff --git a/paddle/fluid/pybind/place.cc b/paddle/fluid/pybind/place.cc index c97c9cdc94d7d..adf5852aabb64 100644 --- a/paddle/fluid/pybind/place.cc +++ b/paddle/fluid/pybind/place.cc @@ -181,8 +181,7 @@ PYBIND11_MAKE_OPAQUE(paddle::framework::FetchUnmergedList); PYBIND11_MAKE_OPAQUE(paddle::framework::FetchList); PYBIND11_MAKE_OPAQUE(paddle::framework::FetchType); -namespace paddle { -namespace pybind { +namespace paddle::pybind { PyTypeObject *g_place_pytype = nullptr; PyTypeObject *g_customplace_pytype = nullptr; PyTypeObject *g_cudaplace_pytype = nullptr; @@ -680,5 +679,4 @@ void BindPlace(pybind11::module &m) { // NOLINT .def("__str__", string::to_string); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index e99bf851f7c64..89c1e5ee0688d 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -35,8 +35,7 @@ limitations under the License. */ namespace py = pybind11; -namespace paddle { -namespace pybind { +namespace paddle::pybind { PyTypeObject *g_vartype_pytype = nullptr; PyTypeObject *g_blockdesc_pytype = nullptr; @@ -547,5 +546,4 @@ void BindJitProperty(pybind11::module *m) { .def("parse_from_string", DeserializeMessage); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b1163adc932fc..ae49f2594ce0a 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -62,7 +62,6 @@ limitations under the License. */ #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/raw_tensor.h" @@ -146,7 +145,6 @@ limitations under the License. */ #endif #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/pybind/compiled_program.h" -#include "paddle/fluid/pybind/parallel_executor.h" #include "paddle/fluid/pybind/place.h" #include "paddle/fluid/pybind/protobuf.h" #include "paddle/fluid/pybind/pybind.h" // NOLINT diff --git a/paddle/fluid/pybind/rpc.cc b/paddle/fluid/pybind/rpc.cc index ee35e9c3a4164..bc947af36f9a1 100644 --- a/paddle/fluid/pybind/rpc.cc +++ b/paddle/fluid/pybind/rpc.cc @@ -19,7 +19,6 @@ namespace py = pybind11; using paddle::distributed::FutureWrapper; -using paddle::distributed::PythonRpcHandler; using paddle::distributed::RpcAgent; using paddle::distributed::WorkerInfo; namespace paddle { diff --git a/paddle/fluid/pybind/xpu_streams_py.cc b/paddle/fluid/pybind/xpu_streams_py.cc index 044b954ce6b65..dc60ed8468798 100644 --- a/paddle/fluid/pybind/xpu_streams_py.cc +++ b/paddle/fluid/pybind/xpu_streams_py.cc @@ -25,8 +25,7 @@ namespace py = pybind11; -namespace paddle { -namespace pybind { +namespace paddle::pybind { void BindXpuStream(py::module *m_ptr) { auto &m = *m_ptr; @@ -38,7 +37,10 @@ void BindXpuStream(py::module *m_ptr) { } int curr_device_id = paddle::platform::GetXPUCurrentDeviceId(); paddle::platform::SetXPUDeviceId(device_id); - PADDLE_ENFORCE_XPU_SUCCESS(xpu_wait()); + auto place = phi::XPUPlace(device_id); + auto *dev_ctx = static_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)); + dev_ctx->Wait(); paddle::platform::SetXPUDeviceId(curr_device_id); #else PADDLE_THROW(platform::errors::Unavailable( @@ -47,5 +49,4 @@ void BindXpuStream(py::module *m_ptr) { }); } -} // namespace pybind -} // namespace paddle +} // namespace paddle::pybind diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 0788d6994ce3d..84eb2c5d39693 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -112,6 +112,8 @@ set(PHI_SRCS ${infermeta_srcs} ${capi_srcs}) +set(PHI_KERNEL_GPU_SRCS ${kernels_gpu_srcs}) + if(WITH_SHARED_PHI) set(PHI_BUILD_TYPE SHARED @@ -205,6 +207,40 @@ set(PHI_LIB "${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}" CACHE FILEPATH "PHI Library" FORCE) +# NOTE(silverling): what we are doing here is to build a library `phi_kernel_gpu` +# that contains all GPU kernels implementation. This can allow paddle be built +# with more CUDA archs and reduce the binary size of `phi` library. +if(WITH_GPU OR WITH_ROCM) + if(WITH_GPU) + nv_library( + phi_kernel_gpu ${PHI_BUILD_TYPE} + SRCS ${PHI_KERNEL_GPU_SRCS} + DEPS ${PHI_DEPS}) + elseif(WITH_ROCM) + hip_library( + phi_kernel_gpu ${PHI_BUILD_TYPE} + SRCS ${PHI_KERNEL_GPU_SRCS} + DEPS ${PHI_DEPS}) + endif() + + # NOTE(silverling): making library `phi` depend on `phi_kernel_gpu` (even `phi` does not use it) + # will make targets that depend on `phi` also automatically depend on `phi_kernel_gpu`. + # This will make users' life easier. + target_link_libraries(phi phi_kernel_gpu) + + # NOTE(silverling): `phi_kernel_gpu` needs symbols from `phi`. + # When it's a shared library, it will work with no problem. + # But when it's a static library, it must be linked to `phi` at link time explicitly. + if(NOT WITH_SHARED_PHI) + target_link_libraries(phi_kernel_gpu phi) + endif() + + string(REPLACE "phi" "phi_kernel_gpu" PHI_KERNEL_GPU_NAME ${PHI_NAME}) + set(PHI_KERNEL_GPU_LIB + "${CMAKE_CURRENT_BINARY_DIR}/${PHI_KERNEL_GPU_NAME}" + CACHE FILEPATH "PHI Kernel GPU Library" FORCE) +endif() + if(MKL_FOUND AND WITH_ONEMKL) target_include_directories(phi PRIVATE ${MKL_INCLUDE}) endif() diff --git a/paddle/phi/api/generator/dist_api_gen.py b/paddle/phi/api/generator/dist_api_gen.py index 54605d19b256d..aed5d2c28d571 100644 --- a/paddle/phi/api/generator/dist_api_gen.py +++ b/paddle/phi/api/generator/dist_api_gen.py @@ -295,7 +295,7 @@ }} std::vector {name}_meta_ptr_vec({name}.size()); for (size_t i = 0; i < {name}_meta_vec.size(); ++i) {{ - {name}_meta_ptr_vec[i] = &{name}_meta_vec[i]; + {name}_meta_ptr_vec[i] = {name}[i] ? &{name}_meta_vec[i] : nullptr; }} """ INFER_GLOBAL_SHAPE_TEMPLATE = """ @@ -400,7 +400,7 @@ std::vector {name}_meta_vec = MakeMetaTensor({name}); std::vector {name}_meta_ptr_vec({name}_meta_vec.size()); for (size_t i = 0; i < {name}_meta_vec.size(); ++i) {{ - {name}_meta_ptr_vec[i] = &{name}_meta_vec[i]; + {name}_meta_ptr_vec[i] = {name}[i] ? &{name}_meta_vec[i] : nullptr; }} """ INFER_META_TEMPLATE = """ @@ -1106,9 +1106,7 @@ def generate_output_creation_code(self) -> str: ) else: if ( - self.need_to_generate_code_for_inplace_or_view_impl( - i - ) + self.need_to_generate_code_for_inplace_impl(i) and self.generate_general_infer_spmd ): output_creation_code += ( diff --git a/paddle/phi/api/generator/dist_bw_api_gen.py b/paddle/phi/api/generator/dist_bw_api_gen.py index 1d57d552d7767..34d495d9d0536 100644 --- a/paddle/phi/api/generator/dist_bw_api_gen.py +++ b/paddle/phi/api/generator/dist_bw_api_gen.py @@ -53,33 +53,41 @@ std::shared_ptr shared_dist_out = CreateKernelDistOutput({}, !rank_is_in_current_mesh, spmd_info.second[0]); phi::distributed::DistTensor* dist_out = shared_dist_out.get(); - phi::DenseTensor* dense_out = dist_out->unsafe_mutable_value(); - if (dense_out && !rank_is_in_current_mesh && !dist_out->defined()) {{ - *dense_out = phi::DenseTensor( + phi::DenseTensor* dense_out = nullptr; + if (dist_out) {{ + dense_out = dist_out->unsafe_mutable_value(); + if (dense_out && !rank_is_in_current_mesh && !dist_out->defined()) {{ + *dense_out = phi::DenseTensor( std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), phi::DenseTensorMeta()); }} + }} """ SINGLE_OUT_CREATION_TEMPLATE = """ std::shared_ptr shared_dist_out = CreateKernelDistOutput({}, !rank_is_in_current_mesh); phi::distributed::DistTensor* dist_out = shared_dist_out.get(); - phi::DenseTensor* dense_out = dist_out->unsafe_mutable_value(); - if (dense_out && !rank_is_in_current_mesh && !dist_out->defined()) {{ + phi::DenseTensor* dense_out = nullptr; + if (dist_out) {{ + dense_out = dist_out->unsafe_mutable_value(); + if (dense_out && !rank_is_in_current_mesh && !dist_out->defined()) {{ *dense_out = phi::DenseTensor( - std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), - phi::DenseTensorMeta()); + std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), + phi::DenseTensorMeta()); + }} }} """ VECTOR_OUT_CREATION_TEMPLATE_WITH_NO_SPMD = """ auto dist_out = SetKernelDistOutput({name}); - std::vector dense_out(dist_out.size()); + std::vector dense_out(dist_out.size(), nullptr); for (size_t i=0; iunsafe_mutable_value(); - if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ - *dense_out[i] = phi::DenseTensor( - std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), - phi::DenseTensorMeta()); + if (dist_out[i]) {{ + dense_out[i] = dist_out[i]->unsafe_mutable_value(); + if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ + *dense_out[i] = phi::DenseTensor( + std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), + phi::DenseTensorMeta()); + }} }} }} """ @@ -90,13 +98,15 @@ for(auto& e: shared_dist_out){{ dist_out.push_back(e.get()); }} - std::vector dense_out(dist_out.size()); + std::vector dense_out(dist_out.size(), nullptr); for (size_t i=0; iunsafe_mutable_value(); - if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ - *dense_out[i] = phi::DenseTensor( - std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), - phi::DenseTensorMeta()); + if (dist_out[i]) {{ + dense_out[i] = dist_out[i]->unsafe_mutable_value(); + if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ + *dense_out[i] = phi::DenseTensor( + std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), + phi::DenseTensorMeta()); + }} }} }} """ @@ -108,13 +118,15 @@ for(auto& e: shared_dist_out){{ dist_out.push_back(e.get()); }} - std::vector dense_out(dist_out.size()); + std::vector dense_out(dist_out.size(), nullptr); for (size_t i=0; iunsafe_mutable_value(); - if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ - *dense_out[i] = phi::DenseTensor( - std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), - phi::DenseTensorMeta()); + if (dist_out[i]) {{ + dense_out[i] = dist_out[i]->unsafe_mutable_value(); + if (dense_out[i] && !rank_is_in_current_mesh && !dist_out[i]->defined()) {{ + *dense_out[i] = phi::DenseTensor( + std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), + phi::DenseTensorMeta()); + }} }} }} """ @@ -156,13 +168,15 @@ """ MULTI_VECTOR_OUT_CREATION_TEMPLATE = """ auto dist_out_{i} = SetKernelDistOutput({name}); - std::vector dense_out_{i}(dist_out_{i}.size()); + std::vector dense_out_{i}(dist_out_{i}.size(), nullptr); for (size_t i = 0; i < dist_out_{i}.size(); i++) {{ - dense_out_{i}[i] = const_cast(&dist_out_{i}[i]->value()); - if (dense_out_{i}[i] && !rank_is_in_current_mesh && !dist_out_{i}[i]->defined()) {{ - *dense_out_{i}[i]= phi::DenseTensor( - std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), - phi::DenseTensorMeta()); + if (dist_out_{i}[i]) {{ + dense_out_{i}[i] = const_cast(&dist_out_{i}[i]->value()); + if (dense_out_{i}[i] && !rank_is_in_current_mesh && !dist_out_{i}[i]->defined()) {{ + *dense_out_{i}[i]= phi::DenseTensor( + std::make_shared(nullptr, 0, phi::distributed::GetDefaultPlace()), + phi::DenseTensorMeta()); + }} }} }} """ diff --git a/paddle/phi/api/include/tensor_utils.h b/paddle/phi/api/include/tensor_utils.h index ada842835ffd8..3c2307fd01f0c 100644 --- a/paddle/phi/api/include/tensor_utils.h +++ b/paddle/phi/api/include/tensor_utils.h @@ -62,7 +62,7 @@ PADDLE_API Tensor from_blob(void* data, * @note Input of `Reshard` should be a `paddle::Tensor` whose impl is * shared_ptr of DistTensor. According to the given DistAttr, input will be * reshard to wanted distributed state. And it will return shared_ptr of a new - * DistTensor as outptut. + * DistTensor as output. * * @param input The input tensor to be resharded. * @param dist_attr The dist_attr to be resharded. diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index ef5cfc90727ff..c6426898371d2 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -736,6 +736,7 @@ std::shared_ptr CreateKernelDistOutput( } return dist_output; } + VLOG(4) << "CreateKernelDistOutput with NULL out"; return nullptr; } diff --git a/paddle/phi/api/lib/context_pool.cc b/paddle/phi/api/lib/context_pool.cc index ee1e21a58e2f1..e2eb1af09d8a5 100644 --- a/paddle/phi/api/lib/context_pool.cc +++ b/paddle/phi/api/lib/context_pool.cc @@ -23,8 +23,7 @@ limitations under the License. */ #include "paddle/phi/core/cuda_stream.h" #endif -namespace paddle { -namespace experimental { +namespace paddle::experimental { void DeviceContextPool::SyncDeviceContext(const Place& place) { if (!phi::DeviceContextPool::IsInitialized()) { @@ -64,8 +63,7 @@ phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) { return const_cast(Get(place)); // NOLINT } -} // namespace experimental -} // namespace paddle +} // namespace paddle::experimental namespace paddle { diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 28aecaf64094c..b9962901851dc 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -303,6 +303,7 @@ phi::DenseTensor CheckAndTrans2NewContiguousTensor( std::vector CheckAndTrans2NewContiguousTensor( const std::vector& tensor) { std::vector out; + out.reserve(tensor.size()); for (auto& t : tensor) { out.emplace_back(CheckAndTrans2NewContiguousTensor(t)); } diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h index 65729a01c20d4..9e023428a7672 100644 --- a/paddle/phi/api/lib/data_transform.h +++ b/paddle/phi/api/lib/data_transform.h @@ -64,7 +64,7 @@ class TransformFlag { // trans_data_type_ can be setted by api[data_transform->support_trans_dtype] // in the yaml file. // trans_data_type_ only affect the non complex types, - // the complex is always transferd, except stop_transform_ is true. + // the complex is always transfered, except stop_transform_ is true. bool trans_data_type_ = false; // trans_backend_ and trans_layout_ are true defaultly, diff --git a/paddle/phi/api/lib/kernel_dispatch.cc b/paddle/phi/api/lib/kernel_dispatch.cc index 94b04404c4ced..16dea76b4ad68 100644 --- a/paddle/phi/api/lib/kernel_dispatch.cc +++ b/paddle/phi/api/lib/kernel_dispatch.cc @@ -26,9 +26,7 @@ limitations under the License. */ #include "paddle/phi/backends/device_manager.h" #endif -namespace paddle { -namespace experimental { -namespace detail { +namespace paddle::experimental::detail { // We need judge whether the allocation is nullptr, // whether the allocation is initialized, wo we need GetHolder method @@ -109,7 +107,8 @@ std::size_t CountLeadingZeros(uint32_t val) { #endif } -} // namespace detail +} // namespace paddle::experimental::detail +namespace paddle::experimental { phi::DeviceContext* GetDeviceContextByBackend(phi::Backend backend) { auto& pool = paddle::experimental::DeviceContextPool::Instance(); @@ -182,5 +181,4 @@ phi::DataLayout ParseLayoutWithInputOrder(phi::DataLayout layout, return layout != phi::DataLayout::UNDEFINED ? layout : ParseLayout(tensor); } -} // namespace experimental -} // namespace paddle +} // namespace paddle::experimental diff --git a/paddle/phi/api/lib/scalar.cc b/paddle/phi/api/lib/scalar.cc index fd13e0809fadd..38cd92057931a 100644 --- a/paddle/phi/api/lib/scalar.cc +++ b/paddle/phi/api/lib/scalar.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -namespace paddle { -namespace experimental { +namespace paddle::experimental { template <> ScalarBase::ScalarBase(const Tensor& tensor_in) @@ -53,5 +52,4 @@ ScalarBase::ScalarBase(const Tensor& tensor_in) } } -} // namespace experimental -} // namespace paddle +} // namespace paddle::experimental diff --git a/paddle/phi/api/lib/tensor_utils.cc b/paddle/phi/api/lib/tensor_utils.cc index c6cce9577e9ec..b411eb1e69377 100644 --- a/paddle/phi/api/lib/tensor_utils.cc +++ b/paddle/phi/api/lib/tensor_utils.cc @@ -124,7 +124,7 @@ PADDLE_API std::shared_ptr reshard( typeid(input.impl().get()).name())); auto dev_ctx = phi::distributed::GetDistTensorDeviceContext( static_cast(input.impl().get())); - auto input_tensor_impl = input.impl(); + const auto& input_tensor_impl = input.impl(); std::shared_ptr dist_out_ptr = nullptr; if (input_tensor_impl) { phi::distributed::DistTensor* dist_tensor = diff --git a/paddle/phi/api/profiler/trace_event.h b/paddle/phi/api/profiler/trace_event.h index e526953d5c8e0..b74f1754ee318 100644 --- a/paddle/phi/api/profiler/trace_event.h +++ b/paddle/phi/api/profiler/trace_event.h @@ -49,7 +49,7 @@ enum class TracerEventType { Communication = 12, // Used to mark python api PythonOp = 13, - // Used to mark python level userdefined + // Used to mark python level user-defined PythonUserDefined = 14, // A flag to denote the number of current types NumTypes diff --git a/paddle/phi/backends/custom/custom_device.cc b/paddle/phi/backends/custom/custom_device.cc index 624aabeffaba7..d669eab67af42 100644 --- a/paddle/phi/backends/custom/custom_device.cc +++ b/paddle/phi/backends/custom/custom_device.cc @@ -587,7 +587,7 @@ class CustomDevice : public DeviceInterface { #undef return_result } - C_DataType ToCDatatType(phi::DataType data_type) { + C_DataType ToCDataType(phi::DataType data_type) { #define return_result(in, ret) \ case in: \ return C_DataType::ret @@ -669,7 +669,7 @@ class CustomDevice : public DeviceInterface { send_buf, recv_buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), ToXCCLReduceOp(op), reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -685,7 +685,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS(pimpl_->xccl_broadcast( buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), root, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -704,7 +704,7 @@ class CustomDevice : public DeviceInterface { pimpl_->xccl_reduce(in_data, out_data, num, - ToCDatatType(data_type), + ToCDataType(data_type), ToXCCLReduceOp(reduce_op), root_id, reinterpret_cast(comm), @@ -722,7 +722,7 @@ class CustomDevice : public DeviceInterface { send_buf, recv_buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); } @@ -739,7 +739,7 @@ class CustomDevice : public DeviceInterface { send_buf, recv_buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), ToXCCLReduceOp(reduce_op), reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -767,7 +767,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->xccl_send(send_buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), dest_rank, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -783,7 +783,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->xccl_recv(recv_buf, count, - ToCDatatType(data_type), + ToCDataType(data_type), src_rank, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -802,8 +802,8 @@ class CustomDevice : public DeviceInterface { if (pimpl_->xccl_all_to_all) { std::vector c_send_dtype, c_recv_dtype; for (size_t i = 0; i < nranks; ++i) { - c_send_dtype.push_back(ToCDatatType(send_dtype[i])); - c_recv_dtype.push_back(ToCDatatType(recv_dtype[i])); + c_send_dtype.push_back(ToCDataType(send_dtype[i])); + c_recv_dtype.push_back(ToCDataType(recv_dtype[i])); } PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS(pimpl_->xccl_all_to_all( send_buf, @@ -823,7 +823,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->xccl_recv(recv_buf[i], recv_count[i], - ToCDatatType(recv_dtype[i]), + ToCDataType(recv_dtype[i]), i, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -833,7 +833,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS(pimpl_->xccl_send( const_cast(send_buf[i]), send_count[i], - ToCDatatType(send_dtype[i]), + ToCDataType(send_dtype[i]), i, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -848,7 +848,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->xccl_recv(recv_buf[i], recv_count[i], - ToCDatatType(recv_dtype[i]), + ToCDataType(recv_dtype[i]), i, reinterpret_cast(comm), reinterpret_cast(stream.raw_stream()))); @@ -872,7 +872,7 @@ class CustomDevice : public DeviceInterface { PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( pimpl_->blas_axpby(device, reinterpret_cast(stream.raw_stream()), - ToCDatatType(dtype), + ToCDataType(dtype), numel, alpha, x, diff --git a/paddle/phi/backends/device_memory_aligment.h b/paddle/phi/backends/device_memory_alignment.h similarity index 100% rename from paddle/phi/backends/device_memory_aligment.h rename to paddle/phi/backends/device_memory_alignment.h diff --git a/paddle/phi/backends/dynload/cublas.cc b/paddle/phi/backends/dynload/cublas.cc index 2fe9ae774bf7a..b870a90cb091c 100644 --- a/paddle/phi/backends/dynload/cublas.cc +++ b/paddle/phi/backends/dynload/cublas.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cublas.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cublas_dso_flag; void *cublas_dso_handle = nullptr; @@ -34,5 +33,4 @@ CUBLAS_BLAS_ROUTINE_EACH_R3(DEFINE_WRAP); #ifdef CUBLAS_BLAS_ROUTINE_EACH_R4 CUBLAS_BLAS_ROUTINE_EACH_R4(DEFINE_WRAP); #endif -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/cublas.h b/paddle/phi/backends/dynload/cublas.h index 8053bbb6bd2ce..6da85283d6e71 100644 --- a/paddle/phi/backends/dynload/cublas.h +++ b/paddle/phi/backends/dynload/cublas.h @@ -94,8 +94,14 @@ extern void *cublas_dso_handle; __macro(cublasSgetriBatched); \ __macro(cublasDgetrfBatched); \ __macro(cublasDgetriBatched); \ + __macro(cublasCgetrfBatched); \ + __macro(cublasCgetriBatched); \ + __macro(cublasZgetrfBatched); \ + __macro(cublasZgetriBatched); \ __macro(cublasSmatinvBatched); \ __macro(cublasDmatinvBatched); \ + __macro(cublasCmatinvBatched); \ + __macro(cublasZmatinvBatched); \ __macro(cublasSgetrsBatched); \ __macro(cublasDgetrsBatched); diff --git a/paddle/phi/backends/dynload/cuda_driver.cc b/paddle/phi/backends/dynload/cuda_driver.cc index d9fd89a0c65a6..afd6fbb76f460 100644 --- a/paddle/phi/backends/dynload/cuda_driver.cc +++ b/paddle/phi/backends/dynload/cuda_driver.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cuda_driver.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cuda_dso_flag; void* cuda_dso_handle = nullptr; @@ -33,5 +32,4 @@ bool HasCUDADriver() { return cuda_dso_handle != nullptr; } -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/cupti.cc b/paddle/phi/backends/dynload/cupti.cc index 1d6e7c86c24d0..43fb64fd6f0a3 100644 --- a/paddle/phi/backends/dynload/cupti.cc +++ b/paddle/phi/backends/dynload/cupti.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cupti.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cupti_dso_flag; void *cupti_dso_handle = nullptr; @@ -26,7 +25,6 @@ void *cupti_dso_handle = nullptr; CUPTI_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace phi +} // namespace phi::dynload #endif // PADDLE_WITH_CUPTI diff --git a/paddle/phi/backends/dynload/cusolver.cc b/paddle/phi/backends/dynload/cusolver.cc index a5c88cf525c91..578edf14b49ed 100644 --- a/paddle/phi/backends/dynload/cusolver.cc +++ b/paddle/phi/backends/dynload/cusolver.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cusolver.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cusolver_dso_flag; void *cusolver_dso_handle; @@ -32,5 +31,4 @@ CUSOLVER_ROUTINE_EACH_R1(DEFINE_WRAP); CUSOLVER_ROUTINE_EACH_R2(DEFINE_WRAP); #endif -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/cusparse.cc b/paddle/phi/backends/dynload/cusparse.cc index ce8f87dc3cdfa..9d89b746df5b7 100644 --- a/paddle/phi/backends/dynload/cusparse.cc +++ b/paddle/phi/backends/dynload/cusparse.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cusparse.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cusparse_dso_flag; void *cusparse_dso_handle; @@ -34,5 +33,4 @@ CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP); CUSPARSE_ROUTINE_EACH_R3(DEFINE_WRAP); #endif -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/cutlass_conv2d.cc b/paddle/phi/backends/dynload/cutlass_conv2d.cc index 936a04fa3023c..a72eaba46eb0d 100644 --- a/paddle/phi/backends/dynload/cutlass_conv2d.cc +++ b/paddle/phi/backends/dynload/cutlass_conv2d.cc @@ -16,8 +16,7 @@ #include #include "paddle/phi/core/enforce.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag cutlass_dso_flag; void* cutlass_dso_handle; @@ -53,5 +52,4 @@ void* GetCutlassConv2dHandle() { return cutlass_dso_handle; } -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc index 612a959fc307b..5d8e26732196d 100644 --- a/paddle/phi/backends/dynload/dynamic_loader.cc +++ b/paddle/phi/backends/dynload/dynamic_loader.cc @@ -351,14 +351,14 @@ void* GetCublasDsoHandle() { return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib"); #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_11.dll"); #else return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path}); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_12.dll"); #else return GetDsoHandleFromSearchPath( @@ -372,13 +372,13 @@ void* GetCublasDsoHandle() { } #elif defined(__linux__) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so.11"); #else return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so"); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so.12"); #else return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so"); @@ -400,13 +400,13 @@ void* GetCublasLtDsoHandle() { // APIs available after CUDA 10.1 #if defined(__linux__) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so.11"); #else return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so"); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so.12"); #else return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so"); @@ -448,7 +448,7 @@ void* GetCUDNNDsoHandle() { "You should do this according to your CUDA installation directory and " "CUDNN version."); if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, "cudnn64_8.dll", true, {cuda_lib_path}, win_warn_meg); #else @@ -456,7 +456,7 @@ void* GetCUDNNDsoHandle() { FLAGS_cuda_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg); #endif } else if (CUDA_VERSION >= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, "cudnn64_9.dll", true, {cuda_lib_path}, win_warn_meg); #else @@ -467,7 +467,7 @@ void* GetCUDNNDsoHandle() { #elif defined(PADDLE_WITH_HIP) return GetDsoHandleFromSearchPath(FLAGS_miopen_dir, "libMIOpen.so", false); #else -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES if (CUDA_VERSION >= 12030) { return GetDsoHandleFromSearchPath( FLAGS_cudnn_dir, "libcudnn.so.9", false, {cuda_lib_path}); @@ -488,7 +488,7 @@ void* GetCUPTIDsoHandle() { FLAGS_cupti_dir, "libcupti.dylib", false, {cupti_lib_path}); #elif defined(__linux__) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cupti_dir, "libcupti.so.11.8", false, {cupti_lib_path}); #else @@ -497,7 +497,7 @@ void* GetCUPTIDsoHandle() { #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cupti_dir, "libcupti.so.12", false, {cupti_lib_path}); #else @@ -520,7 +520,7 @@ void* GetCurandDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib"); #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, "curand64_10.dll", true, {cuda_lib_path}); #else @@ -530,7 +530,7 @@ void* GetCurandDsoHandle() { #elif defined(PADDLE_WITH_HIP) return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "libhiprand.so"); #else -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_curand_dir, "libcurand.so.10"); #else return GetDsoHandleFromSearchPath(FLAGS_curand_dir, "libcurand.so"); @@ -564,7 +564,7 @@ void* GetCusolverDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.dylib"); #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, "cusolver64_11.dll", true, {cuda_lib_path}); #else @@ -572,7 +572,7 @@ void* GetCusolverDsoHandle() { FLAGS_cuda_dir, win_cusolver_lib, true, {cuda_lib_path}); #endif #else -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so.11"); #else return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so"); @@ -585,14 +585,14 @@ void* GetCusparseDsoHandle() { return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusparse.dylib"); #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_11.dll"); #else return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path}); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_12.dll"); #else return GetDsoHandleFromSearchPath( @@ -606,13 +606,13 @@ void* GetCusparseDsoHandle() { } #elif defined(__linux__) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so.11"); #else return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so"); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so.12"); #else return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so"); @@ -716,7 +716,7 @@ void* GetNCCLDsoHandle() { return GetDsoHandleFromSearchPath( FLAGS_rccl_dir, "librccl.so", true, {}, warning_msg); #else -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath( FLAGS_nccl_dir, "libnccl.so;libnccl.so.2", true, {}, warning_msg); #else @@ -782,7 +782,7 @@ void* GetCUFFTDsoHandle() { return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.dylib"); #elif defined(__linux__) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so.10"); #else return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so"); @@ -797,14 +797,14 @@ void* GetCUFFTDsoHandle() { } #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_10.dll"); #else return GetDsoHandleFromSearchPath( FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path}); #endif } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) { -#ifdef WITH_PIP_CUDA_LIBRARIES +#ifdef PADDLE_WITH_PIP_CUDA_LIBRARIES return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_11.dll"); #else return GetDsoHandleFromSearchPath( diff --git a/paddle/phi/backends/dynload/lapack.cc b/paddle/phi/backends/dynload/lapack.cc index 9719da9775146..924ea5192cd5c 100644 --- a/paddle/phi/backends/dynload/lapack.cc +++ b/paddle/phi/backends/dynload/lapack.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag lapack_dso_flag; void* lapack_dso_handle = nullptr; @@ -26,5 +25,4 @@ void* lapack_dso_handle = nullptr; LAPACK_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/nvjpeg.cc b/paddle/phi/backends/dynload/nvjpeg.cc index 9e9ac77dbaa98..8f13389398504 100644 --- a/paddle/phi/backends/dynload/nvjpeg.cc +++ b/paddle/phi/backends/dynload/nvjpeg.cc @@ -11,8 +11,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/nvjpeg.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag nvjpeg_dso_flag; void *nvjpeg_dso_handle; @@ -21,5 +20,4 @@ void *nvjpeg_dso_handle; NVJPEG_RAND_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/dynload/nvtx.cc b/paddle/phi/backends/dynload/nvtx.cc index b6bed459f32de..1fb52566fd6ae 100644 --- a/paddle/phi/backends/dynload/nvtx.cc +++ b/paddle/phi/backends/dynload/nvtx.cc @@ -14,8 +14,7 @@ limitations under the License. */ #ifndef _WIN32 #include "paddle/phi/backends/dynload/nvtx.h" -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag nvtx_dso_flag; void *nvtx_dso_handle; @@ -24,6 +23,5 @@ void *nvtx_dso_handle; NVTX_ROUTINE_EACH(DEFINE_WRAP); -} // namespace dynload -} // namespace phi +} // namespace phi::dynload #endif diff --git a/paddle/phi/backends/dynload/tensorrt.cc b/paddle/phi/backends/dynload/tensorrt.cc index ff4217ce02054..9d21b70e3be01 100644 --- a/paddle/phi/backends/dynload/tensorrt.cc +++ b/paddle/phi/backends/dynload/tensorrt.cc @@ -16,8 +16,7 @@ #include -namespace phi { -namespace dynload { +namespace phi::dynload { std::once_flag tensorrt_dso_flag; void* tensorrt_dso_handle; @@ -80,5 +79,4 @@ void* GetTensorRtPluginHandle() { return GetDsoHandle(dso_name); } -} // namespace dynload -} // namespace phi +} // namespace phi::dynload diff --git a/paddle/phi/backends/gpu/cuda/cuda_graph.cc b/paddle/phi/backends/gpu/cuda/cuda_graph.cc index 43ec0a0c89c08..ced9c22816c63 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_graph.cc +++ b/paddle/phi/backends/gpu/cuda/cuda_graph.cc @@ -25,9 +25,7 @@ cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr, COMMON_DECLARE_bool(use_cuda_malloc_async_allocator); COMMON_DECLARE_bool(auto_free_cudagraph_allocations_on_launch); -namespace phi { -namespace backends { -namespace gpu { +namespace phi::backends::gpu { std::unique_ptr CUDAGraph::capturing_graph_{nullptr}; paddle::optional CUDAGraph::capturing_thread_id_{paddle::none}; @@ -379,6 +377,4 @@ CUDAGraphNodeLauncher::GetParameterSettersForExecGraph(cudaGraph_t graph) { } #endif -} // namespace gpu -} // namespace backends -} // namespace phi +} // namespace phi::backends::gpu diff --git a/paddle/phi/backends/gpu/cuda/cuda_helper.h b/paddle/phi/backends/gpu/cuda/cuda_helper.h index 555cc2357b2ab..4f12ca02e3060 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_helper.h +++ b/paddle/phi/backends/gpu/cuda/cuda_helper.h @@ -43,7 +43,7 @@ namespace gpu { * this time, the cycle condition `i < (n)` is still satisfied, so it * will cause illegal access to cuda memory. * - * Here is a real example in ERINE, it will trigger above error. + * Here is a real example in ERNIE, it will trigger above error. * The related data are: * - blockIdx.x = 2172938 * - blockDim.x = 512 diff --git a/paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.cc b/paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.cc index d5b484c8eeb56..b4f71dacb3fcb 100644 --- a/paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.cc +++ b/paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.cc @@ -17,9 +17,7 @@ #include #include -namespace phi { -namespace backends { -namespace gpu { +namespace phi::backends::gpu { static int GetDefaultConvWorkspaceSizeLimitMBImpl() { const char *env_str = std::getenv("FLAGS_conv_workspace_size_limit"); @@ -31,6 +29,4 @@ int GetDefaultConvWorkspaceSizeLimitMB() { static auto workspace_size = GetDefaultConvWorkspaceSizeLimitMBImpl(); return workspace_size; } -} // namespace gpu -} // namespace backends -} // namespace phi +} // namespace phi::backends::gpu diff --git a/paddle/phi/backends/gpu/gpu_info.cc b/paddle/phi/backends/gpu/gpu_info.cc index 32546f762c39e..7f83b5c0e1da3 100644 --- a/paddle/phi/backends/gpu/gpu_info.cc +++ b/paddle/phi/backends/gpu/gpu_info.cc @@ -24,9 +24,7 @@ limitations under the License. */ COMMON_DECLARE_string(selected_gpus); -namespace phi { -namespace backends { -namespace gpu { +namespace phi::backends::gpu { static inline std::vector Split(std::string const& original, char separator) { @@ -83,6 +81,4 @@ size_t GpuMinChunkSize() { return 1 << 8; } -} // namespace gpu -} // namespace backends -} // namespace phi +} // namespace phi::backends::gpu diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index fc0f8ee1e35e1..60a4303c4605a 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -1009,6 +1009,10 @@ XPUOpMap& get_kl2_ops() { {"swish", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"swish_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"swiglu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"swiglu_grad", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"take_along_axis", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"tanh_grad", @@ -1055,7 +1059,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT64, phi::DataType::BOOL, phi::DataType::FLOAT64, - phi::DataType::FLOAT32})}, + phi::DataType::FLOAT32, + phi::DataType::FLOAT16})}, {"tile_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"transpose2_grad", XPUKernelSet({phi::DataType::FLOAT32, @@ -1248,6 +1253,7 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"sequence_unpad_xpu", XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, + {"block_multihead_attention_xpu", XPUKernelSet({phi::DataType::FLOAT16})}, }; return s_xpu2_kernels; diff --git a/paddle/phi/backends/xpu/xpu_context.cc b/paddle/phi/backends/xpu/xpu_context.cc index 1e06c9358a40a..c16822989c849 100644 --- a/paddle/phi/backends/xpu/xpu_context.cc +++ b/paddle/phi/backends/xpu/xpu_context.cc @@ -307,7 +307,7 @@ static int64_t get_l3_size(int i) { XPUContext::XPUContext() : DeviceContext() { if (std::getenv("XPU_CDNN_CLUSTER_PARALLEL") != nullptr) { - int default_num_stream = 4; + int default_num_stream = 2; if (std::getenv("XPU_CDNN_CLUSTER_PARALLEL_STREAM_NUMBER") != nullptr) { default_num_stream = atoi(std::getenv("XPU_CDNN_CLUSTER_PARALLEL_STREAM_NUMBER")); @@ -348,12 +348,31 @@ XPUContext::~XPUContext() = default; const Place& XPUContext::GetPlace() const { return impls_[0]->GetPlace(); } -XPUStream XPUContext::stream(int i) const { return impls_[i]->stream(); } +XPUStream XPUContext::stream(int i) const { + CheckValidStreamId(i); + return impls_[i]->stream(); +} void XPUContext::SetStream(void* stream, int i) { + CheckValidStreamId(i); impls_[i]->SetStream(stream); } +void XPUContext::CheckValidStreamId(int i) const { + PADDLE_ENFORCE_GE( + i, + 0, + errors::InvalidArgument( + "The stream index must be greater than or equal to 0.")); + PADDLE_ENFORCE_LT( + i, + GetStreamNum(), + errors::InvalidArgument("The stream index shoule be less than the number " + "of stream used (%d), but got %d", + GetStreamNum(), + i)); +} + void XPUContext::SetXpuVersion(int version) { impls_[0]->xpu_version_ = static_cast(version); } @@ -371,6 +390,7 @@ backends::xpu::XPUVersion XPUContext::xpu_version() const { } xpu::Context* XPUContext::x_context(int i) const { + CheckValidStreamId(i); return impls_[i]->GetXContext(); } @@ -385,10 +405,12 @@ void XPUContext::Wait() const { } void XPUContext::SetXContext(xpu::Context* context, int i) { + CheckValidStreamId(i); impls_[i]->SetXContext(context); } void XPUContext::SetL3Cache(int64_t l3_size, int i) { + CheckValidStreamId(i); impls_[i]->SetL3Cache(l3_size); } @@ -396,7 +418,36 @@ void XPUContext::SetBkclContext(xpu::BKCLContext_t context) { impls_[0]->SetBkclContext(context); } -void XPUContext::CreateStream(int i) { impls_[i]->CreateStream(); } +void XPUContext::CreateStream(int i) { + CheckValidStreamId(i); + impls_[i]->CreateStream(); +} + +void XPUContext::RecordEvent(XPUEvent event, int s) const { + CheckValidStreamId(s); + int r = xpu_event_record(event, stream(s)); + PADDLE_ENFORCE_XRE_SUCCESS(r); +} + +void XPUContext::StreamWaitEvent(XPUEvent event, int s) const { + CheckValidStreamId(s); + int r = xpu_stream_wait_event(stream(s), event); + PADDLE_ENFORCE_XRE_SUCCESS(r); +} + +void XPUContext::StreamWaitStream(int wait_stream, int record_stream) const { + CheckValidStreamId(wait_stream); + CheckValidStreamId(record_stream); + XPUEvent event; + int r = xpu_event_create(&event); + PADDLE_ENFORCE_XRE_SUCCESS(r); + RecordEvent(event, record_stream); + StreamWaitEvent(event, wait_stream); + r = xpu_event_destroy(event); + PADDLE_ENFORCE_XRE_SUCCESS(r); +} + +int64_t XPUContext::GetStreamNum() const { return impls_.size(); } void XPUContext::Init() { impls_[0]->Init(); } } // namespace phi diff --git a/paddle/phi/backends/xpu/xpu_context.h b/paddle/phi/backends/xpu/xpu_context.h index 1f2aaa1990540..da6e5f10dc362 100644 --- a/paddle/phi/backends/xpu/xpu_context.h +++ b/paddle/phi/backends/xpu/xpu_context.h @@ -53,6 +53,10 @@ class XPUContext : public DeviceContext, xpu::BKCLContext_t bkcl_context() const; void SetBkclContext(xpu::BKCLContext_t context); void CreateStream(int i = 0); + void RecordEvent(XPUEvent event, int s) const; + void StreamWaitEvent(XPUEvent event, int s) const; + void StreamWaitStream(int wait_stream, int record_stream) const; + int64_t GetStreamNum() const; // For share external stream. void SetStream(void* stream, int i = 0); @@ -89,6 +93,8 @@ class XPUContext : public DeviceContext, private: struct Impl; std::vector> impls_; + + void CheckValidStreamId(int i) const; }; // KPS (Kernel PrimitiveS API) needs to exist as a kind of backend, diff --git a/paddle/phi/common/scalar.cc b/paddle/phi/common/scalar.cc index e942e2f18cefa..7bbbe619ab39d 100644 --- a/paddle/phi/common/scalar.cc +++ b/paddle/phi/common/scalar.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" -namespace paddle { -namespace experimental { +namespace paddle::experimental { // The Tensor must have one dim template <> @@ -54,5 +53,4 @@ bool operator!=(const Scalar& lhs, const Scalar& rhs) { std::ostream& operator<<(std::ostream& os, const Scalar& s) { return os << s.ToString(); } -} // namespace experimental -} // namespace paddle +} // namespace paddle::experimental diff --git a/paddle/phi/core/distributed/auto_parallel/auto_parallel.proto b/paddle/phi/core/distributed/auto_parallel/auto_parallel.proto index 70c9e72aa5fe7..71c18ac426019 100644 --- a/paddle/phi/core/distributed/auto_parallel/auto_parallel.proto +++ b/paddle/phi/core/distributed/auto_parallel/auto_parallel.proto @@ -25,7 +25,7 @@ message ProcessMeshProto { // There are no duplicate process ids within one process mesh. repeated int64 process_ids = 2; - // The name of each dimension. + // The name of each dimension. repeated string dim_names = 3; } @@ -37,17 +37,17 @@ message TensorDistAttrProto { optional ProcessMeshProto process_mesh = 1; // The length of dims_mapping is same as the length of the tensor shape. - // The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension + // The i-th dimension of the tensor will be sharded by the dims_mapping[i]-th dimension // of the above process mesh. If dims_mapping[i] is -1, the i-th dimension of the tensor // will not be sharded. For example, given a tensor shape [2, 6, 12], a process mesh // shape [2, 3] and a dims_mapping [-1, 1, 0], each sharded tensor will have a shape [2, 2, 6]. repeated int64 dims_mapping = 2; - // The batch dimension of the corresponding tensor. + // The batch dimension of the corresponding tensor. optional int64 batch_dim = 3; - // If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor - // is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined. + // If the dynamic_dims[i] is True, the i-th dimension of the corresponding tensor + // is dynamic changed. Otherwise, the i-th dimension of the tensor is static determined. repeated bool dynamic_dims = 4; // This field is used to distinguish vars which are in same process_mesh and in different vpp chunk @@ -60,16 +60,16 @@ message OperatorDistAttrProto { message TensorDistAttrMappingEntryProto { optional string name = 1; optional TensorDistAttrProto tensor_dist_attr = 2; - } + } // The key of this map is the input tensor name and the value is the distributed attribute - // of the input tensor required by this corresponding operator. - // The distributed attribute of the actual tensor may be not the same as that within + // of the input tensor required by this corresponding operator. + // The distributed attribute of the actual tensor may be not the same as that within // the distributed attribute of the operator. repeated TensorDistAttrMappingEntryProto input_dist_attrs = 1; // The key of this map is the output tensor name and the value is the distributed attribute - // of the output tensor required by this corresponding operator. - // The distributed attribute of the actual tensor may be not the same as that within + // of the output tensor required by this corresponding operator. + // The distributed attribute of the actual tensor may be not the same as that within // the distributed attribute of the operator. repeated TensorDistAttrMappingEntryProto output_dist_attrs = 2; @@ -81,7 +81,7 @@ message OperatorDistAttrProto { // may shared the same distributed operator, the field is use for this scenario. optional string impl_type = 4; - // This field tells which distributed implementations of this corresponding operator + // This field tells which distributed implementations of this corresponding operator // will be selected for the actual computation. optional int64 impl_idx = 5; @@ -115,13 +115,13 @@ message DeviceProto { optional string type = 4; // The capability of this device. - optional DeviceCapabilityProto capability = 5; + optional DeviceCapabilityProto capability = 5; } -// This proto describes the capability of the link between two devices. -message LinkCapabilityProto { - optional int64 bandwidth = 1; // Bytes/s - optional int64 latency = 2; +// This proto describes the capability of the link between two devices. +message LinkCapabilityProto { + optional int64 bandwidth = 1; // Bytes/s + optional int64 latency = 2; } message LinkProto { @@ -133,14 +133,14 @@ message LinkProto { // Represent the link type. optional string type = 3; - + // The capability of this link. - optional LinkCapabilityProto capability = 4; + optional LinkCapabilityProto capability = 4; } // DeviceMesh is used to organize devices and like n-dimension array. message DeviceMeshProto { - // The global id of this mesh. + // The global id of this mesh. optional string name = 1; // The size of each dimension. @@ -150,13 +150,13 @@ message DeviceMeshProto { // There are no duplicate device ids within one device mesh. repeated int64 device_ids = 3; - // The name of each dimension. + // The name of each dimension. repeated string dim_names = 4; // The devices of this mesh. repeated DeviceProto devices = 5; - // The links are between devices. + // The links are between devices. repeated LinkProto links = 6; } diff --git a/paddle/phi/core/distributed/auto_parallel/dist_attr.cc b/paddle/phi/core/distributed/auto_parallel/dist_attr.cc index 62fbd97c46ab2..98dfa339589a5 100644 --- a/paddle/phi/core/distributed/auto_parallel/dist_attr.cc +++ b/paddle/phi/core/distributed/auto_parallel/dist_attr.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/phi/core/distributed/auto_parallel/proto_helper.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; using phi::distributed::auto_parallel::TensorDistAttrProto; @@ -450,5 +449,4 @@ bool TensorDistAttr::is_partial(int64_t mesh_axis) const { void TensorDistAttr::set_skip_check_mesh(bool skip) { skip_check_mesh_ = skip; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/proto_helper.cc b/paddle/phi/core/distributed/auto_parallel/proto_helper.cc index fad63c15d63bd..d1cd5b36a5f73 100644 --- a/paddle/phi/core/distributed/auto_parallel/proto_helper.cc +++ b/paddle/phi/core/distributed/auto_parallel/proto_helper.cc @@ -23,8 +23,7 @@ object.to_proto(&proto); \ return proto -namespace phi { -namespace distributed { +namespace phi::distributed { auto_parallel::TensorDistAttrProto to_proto(const TensorDistAttr& dist_attr) { TO_PROTO_HELPER(dist_attr, auto_parallel::TensorDistAttrProto); @@ -61,5 +60,4 @@ auto_parallel::DistributedMapperProto to_proto( const auto_parallel::DistributedMapper& dist_mapper) { TO_PROTO_HELPER(dist_mapper, auto_parallel::DistributedMapperProto); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc index 222e918ae540b..d2498c23e6eb7 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc @@ -27,8 +27,7 @@ #include "paddle/phi/core/distributed/auto_parallel/reshard/same_status_reshard_function.h" #include "paddle/phi/core/distributed/store/store_utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { namespace { ProcessMesh GetSubProcessMesh(const ProcessMesh& mesh, int64_t axis) { @@ -326,5 +325,4 @@ void CrossNdMeshReshardFunction::Eval(DeviceContext* dev_ctx, same_status_func.Eval(dev_ctx, tmp_result, out_dist_attr, out); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_r_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_r_reshard_function.cc index 1f8bb57293a45..91856cf8e928a 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_r_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_r_reshard_function.cc @@ -25,8 +25,7 @@ #include "paddle/phi/kernels/elementwise_divide_kernel.h" #include "paddle/phi/kernels/full_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { bool PToRReshardFunction::IsSuitable(const DistTensor& in, const TensorDistAttr& out_dist_attr) { @@ -142,5 +141,4 @@ void PToRReshardFunctionCrossMesh::Eval(phi::DeviceContext* dev_ctx, } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc index faedf01bab140..64757505ac868 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc @@ -26,8 +26,7 @@ #include "paddle/phi/kernels/split_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { bool PToSReshardFunction::IsSuitable(const DistTensor& in, const TensorDistAttr& out_dist_attr) { @@ -231,5 +230,4 @@ void PToSReshardFunctionCrossMesh::Eval(DeviceContext* dev_ctx, same_status_func.Eval(dev_ctx, tmp_result, out_dist_attr, out); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/r_to_s_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/r_to_s_reshard_function.cc index ef7208caf34bb..dafdfa48cb800 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/r_to_s_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/r_to_s_reshard_function.cc @@ -22,8 +22,7 @@ #include "paddle/phi/core/distributed/store/store_utils.h" #include "paddle/phi/kernels/split_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { bool RToSReshardFunction::IsSuitable(const DistTensor& in, const TensorDistAttr& out_dist_attr) { @@ -141,5 +140,4 @@ void RToSReshardFunctionCrossMesh::Eval(phi::DeviceContext* dev_ctx, same_status_func.Eval(dev_ctx, tmp_result, out_dist_attr, out); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/r_to_x_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/r_to_x_reshard_function.cc index 396d7fdba8deb..ee179c268bff6 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/r_to_x_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/r_to_x_reshard_function.cc @@ -27,8 +27,7 @@ #include "paddle/phi/kernels/p_send_kernel.h" #include "paddle/phi/kernels/split_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { bool RToXExpandReshardFunction::IsSuitable( const DistTensor& in, const TensorDistAttr& out_dist_attr) { @@ -134,5 +133,4 @@ void RToXExpandReshardFunction::Eval(phi::DeviceContext* dev_ctx, } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc index 99da6feb54eba..400627b0e1737 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc @@ -20,8 +20,7 @@ #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -74,5 +73,4 @@ DenseTensor* ReshardFunction::GetMutableTensor(DistTensor* tensor) { return tensor->value_.get(); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc index 01fbaf99c3c15..88b23e294c339 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc @@ -23,8 +23,7 @@ #include "paddle/phi/core/distributed/store/store_utils.h" #include "paddle/phi/core/enforce.h" -namespace phi { -namespace distributed { +namespace phi::distributed { namespace { std::string GenUniqueCommKey(const std::vector& process_ids) { @@ -260,5 +259,4 @@ bool IsSubMesh(const ProcessMesh& global_mesh, const ProcessMesh& sub_mesh) { return false; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/s_to_r_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/s_to_r_reshard_function.cc index dbfbf1df8d284..8b3a1b32808af 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/s_to_r_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/s_to_r_reshard_function.cc @@ -26,8 +26,7 @@ #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/split_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { namespace { @@ -221,5 +220,4 @@ void SToRReshardFunctionCrossMesh::Eval(DeviceContext* dev_ctx, } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/x_to_r_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/x_to_r_reshard_function.cc index bd415480d64e9..947a4b77f6961 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/x_to_r_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/x_to_r_reshard_function.cc @@ -25,8 +25,7 @@ #include "paddle/phi/kernels/p_recv_kernel.h" #include "paddle/phi/kernels/p_send_kernel.h" -namespace phi { -namespace distributed { +namespace phi::distributed { bool XToRShrinkReshardFunction::IsSuitable( const DistTensor& in, const TensorDistAttr& out_dist_attr) { @@ -130,5 +129,4 @@ void XToRShrinkReshardFunction::Eval(phi::DeviceContext* dev_ctx, } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc index 9307af45bd622..6c5fa3b81dd08 100644 --- a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc +++ b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc @@ -42,8 +42,7 @@ #define gpuFree cudaFree #endif -namespace phi { -namespace distributed { +namespace phi::distributed { void NCCLDynamicCheck::CheckDataType(const phi::DenseTensor& tensor, int64_t dtype) { PADDLE_ENFORCE_EQ( @@ -197,5 +196,4 @@ void NCCLDynamicCheck::CheckGatherShape( } } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/check/static_check.cc b/paddle/phi/core/distributed/check/static_check.cc index 25cdc8d01262e..16504f23e2a10 100644 --- a/paddle/phi/core/distributed/check/static_check.cc +++ b/paddle/phi/core/distributed/check/static_check.cc @@ -21,8 +21,7 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -namespace phi { -namespace distributed { +namespace phi::distributed { void CommStaticCheck::CheckRank(int rank, int world_size) { PADDLE_ENFORCE_GE(rank, @@ -163,5 +162,4 @@ void CommStaticCheck::GatherLikeShape(const phi::DenseTensor& out_tensor, place); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/comm_context_manager.cc b/paddle/phi/core/distributed/comm_context_manager.cc index 4c8f500c406f4..ddd9c70001c16 100644 --- a/paddle/phi/core/distributed/comm_context_manager.cc +++ b/paddle/phi/core/distributed/comm_context_manager.cc @@ -43,8 +43,7 @@ #include "paddle/phi/core/distributed/xccl_comm_context.h" #endif -namespace phi { -namespace distributed { +namespace phi::distributed { int CommContextManager::device_id = -1; @@ -295,5 +294,4 @@ std::vector CommContextManager::GetGroupRanks( return pg_key_ranks_.at(pg_key); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/gloo_utils.cc b/paddle/phi/core/distributed/gloo_utils.cc index ed9ec67710618..d8d86f25fa6c7 100644 --- a/paddle/phi/core/distributed/gloo_utils.cc +++ b/paddle/phi/core/distributed/gloo_utils.cc @@ -31,8 +31,7 @@ #include "paddle/phi/core/distributed/store/tcp_utils.h" #include "paddle/phi/core/enforce.h" -namespace phi { -namespace distributed { +namespace phi::distributed { std::shared_ptr CreateDeviceForInterface( const std::string& ifname) { gloo::transport::tcp::attr attr; @@ -106,5 +105,4 @@ void send_recv(SendRecvOptions* opts) { } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/nccl_comm_context.cc b/paddle/phi/core/distributed/nccl_comm_context.cc index 31740d95b7b24..a3095a2ba5f5c 100644 --- a/paddle/phi/core/distributed/nccl_comm_context.cc +++ b/paddle/phi/core/distributed/nccl_comm_context.cc @@ -24,8 +24,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/utils/data_type.h" -namespace phi { -namespace distributed { +namespace phi::distributed { // set this flag to `true` and recompile to enable dynamic checks constexpr bool FLAGS_enable_nccl_dynamic_check = false; @@ -251,5 +250,4 @@ void NCCLCommContext::RedOpDestroy(ncclRedOp_t op) { } #endif -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/distributed/nccl_comm_task.cc b/paddle/phi/core/distributed/nccl_comm_task.cc index c94e7f0a02db0..0cc32e0ad04b3 100644 --- a/paddle/phi/core/distributed/nccl_comm_task.cc +++ b/paddle/phi/core/distributed/nccl_comm_task.cc @@ -21,8 +21,7 @@ #include "paddle/phi/core/distributed/nccl_tools.h" #include "paddle/phi/core/utils/data_type.h" -namespace phi { -namespace distributed { +namespace phi::distributed { NCCLCommTask::NCCLCommTask(const phi::Place& place, const std::string& group_key, @@ -266,5 +265,4 @@ std::string NCCLCommTask::GetTraceMsg() { ",nranks:" + std::to_string(size_); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/core/infermeta_utils.h b/paddle/phi/core/infermeta_utils.h index 494fe160696ff..e63ced99ec539 100644 --- a/paddle/phi/core/infermeta_utils.h +++ b/paddle/phi/core/infermeta_utils.h @@ -49,6 +49,8 @@ class InferMetaContext { void EmplaceBackOutputs( paddle::small_vector outputs); + void UpdataInput(size_t idx, MetaTensor input) { inputs_[idx] = input; } + TEST_API virtual const MetaTensor& InputAt(size_t idx) const; TEST_API virtual std::vector InputsBetween( @@ -68,6 +70,10 @@ class InferMetaContext { const std::pair& InputRangeAt(size_t idx) const; TEST_API const std::pair& OutputRangeAt(size_t idx) const; + size_t InputsSize() const { return inputs_.size(); } + size_t OutputsSize() const { return outputs_.size(); } + size_t AttrsSize() const { return attrs_.size(); } + virtual ~InferMetaContext() = default; protected: diff --git a/paddle/phi/core/kernel_context.h b/paddle/phi/core/kernel_context.h index 947af3af1d089..5fa75214fcfb5 100644 --- a/paddle/phi/core/kernel_context.h +++ b/paddle/phi/core/kernel_context.h @@ -75,6 +75,10 @@ class KernelContext { void AssignOutputRange(std::pair&& range, size_t idx); + void UpdataInput(size_t idx, const TensorBase* input) { + inputs_[idx] = input; + } + template const TensorType& InputAt(size_t idx) const { return static_cast(*(inputs_.at(idx))); diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index e16ec77a3b0e1..4d1d37b541f09 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -128,7 +128,7 @@ void ChannelShuffleGradInferMeta(const MetaTensor& out_grad, "Input should be a 4-D tensor of format [N, C, H, W] " "or [N, H, W, C], but got %u.", do_dims.size())); - auto dx_dims = do_dims; + const auto& dx_dims = do_dims; x_grad->set_dims(dx_dims); x_grad->set_dtype(out_grad.dtype()); } @@ -445,6 +445,34 @@ void CudnnLSTMGradInferMeta( } } +void LSTMGradInferMeta(const MetaTensor& input, + const MetaTensor& h0, + const MetaTensor& c0, + const MetaTensor& weight, + const MetaTensor& bias, + MetaTensor* input_grad, + MetaTensor* h0_grad, + MetaTensor* c0_grad, + MetaTensor* weight_grad, + MetaTensor* bias_grad, + MetaConfig config) { + if (input_grad) { + input_grad->share_meta(input); + } + if (h0_grad) { + h0_grad->share_meta(h0); + } + if (c0_grad) { + c0_grad->share_meta(c0); + } + if (weight_grad) { + weight_grad->share_meta(weight); + } + if (bias_grad) { + bias_grad->share_meta(bias); + } +} + void DeformableConvGradInferMeta(const MetaTensor& x, const MetaTensor& offset, const MetaTensor& filter, diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index e9971b5042ac0..89795c008d34d 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -167,6 +167,18 @@ void CudnnLSTMGradInferMeta( MetaTensor* init_c_grad, std::vector weight_list_grad); +void LSTMGradInferMeta(const MetaTensor& input, + const MetaTensor& h0, + const MetaTensor& c0, + const MetaTensor& weight, + const MetaTensor& bias, + MetaTensor* input_grad, + MetaTensor* h0_grad, + MetaTensor* c0_grad, + MetaTensor* weight_grad, + MetaTensor* bias_grad, + MetaConfig config = MetaConfig()); + void DeformableConvGradInferMeta(const MetaTensor& x, const MetaTensor& offset, const MetaTensor& filter, diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 3c3ef874854ab..aa4028efa1a6e 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -3573,6 +3573,45 @@ void TakeAlongAxisInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void TdmChildInferMeta(const MetaTensor& x, + const MetaTensor& tree_info, + int child_nums, + DataType dtype, + MetaTensor* child, + MetaTensor* leaf_mask) { + PADDLE_ENFORCE_GT( + child_nums, + 0, + phi::errors::InvalidArgument( + "ValueError: The value of the 'child_nums' must greater than 0. " + "But received child_nums value = %d, ", + child_nums)); + + const auto& info_dims = tree_info.dims(); + const auto& input_dims = x.dims(); + + PADDLE_ENFORCE_EQ( + info_dims.size(), + 2, + phi::errors::InvalidArgument( + "ShapeError: The dimensions of the 'tree info' must be 2. " + "But received tree info's dimensions = %d, " + "tree info's shape = [%s].", + info_dims.size(), + info_dims)); + + auto output_dims = common::vectorize(input_dims); + output_dims.push_back(child_nums); + if (child != nullptr) { + child->set_dims(common::make_ddim(output_dims)); + leaf_mask->set_dims(common::make_ddim(output_dims)); + child->share_lod(x); + leaf_mask->share_lod(x); + child->set_dtype(x.dtype()); + leaf_mask->set_dtype(x.dtype()); + } +} + void TriangularSolveInferMeta(const MetaTensor& x, const MetaTensor& y, bool upper, diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h index e166746e3a646..391d01debd7a3 100644 --- a/paddle/phi/infermeta/binary.h +++ b/paddle/phi/infermeta/binary.h @@ -635,6 +635,13 @@ void TakeAlongAxisInferMeta(const MetaTensor& x, int axis, MetaTensor* out); +void TdmChildInferMeta(const MetaTensor& x, + const MetaTensor& tree_info, + int child_nums, + DataType dtype, + MetaTensor* child, + MetaTensor* leaf_mask); + void TriangularSolveInferMeta(const MetaTensor& x, const MetaTensor& y, bool upper, diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index 9987524d4997d..c243f640446a0 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -23,6 +23,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" +#include "paddle/phi/kernels/funcs/fused_elemwise_activation_functor.h" #include "paddle/phi/kernels/funcs/strided_slice.h" namespace phi { @@ -377,6 +378,89 @@ void BlockMultiheadAttentionInferMeta(const MetaTensor& qkv, } } +void BlockMultiheadAttentionInferXPUMeta( + const MetaTensor& qkv, + const MetaTensor& key_cache, + const MetaTensor& value_cache, + const MetaTensor& seq_lens_encoder, + const MetaTensor& seq_lens_decoder, + const MetaTensor& seq_lens_this_time, + const MetaTensor& padding_offsets, + const MetaTensor& cum_offsets, + const MetaTensor& cu_seqlens_q, + const MetaTensor& cu_seqlens_k, + const MetaTensor& cache_k_per_batch_maxs, + const MetaTensor& cache_v_per_batch_maxs, + const MetaTensor& block_tables, + const MetaTensor& pre_key_cache, + const MetaTensor& pre_value_cache, + const MetaTensor& rope_emb, + const MetaTensor& mask, + const MetaTensor& tgt_mask, + const MetaTensor& cache_k_quant_scales, + const MetaTensor& cache_v_quant_scales, + const MetaTensor& cache_k_dequant_scales, + const MetaTensor& cache_v_dequant_scales, + const MetaTensor& qkv_out_scale, + const MetaTensor& qkv_bias, + const MetaTensor& out_shift, + const MetaTensor& out_smooth, + const MetaTensor& max_enc_len_this_time, + const MetaTensor& max_dec_len_this_time, + int max_seq_len, + int block_size, + bool use_neox_style, + bool dynamic_cachekv_quant, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + const float out_scale, + const std::string& compute_dtype, + MetaTensor* fmha_out, + MetaTensor* qkv_out, + MetaTensor* key_cache_out, + MetaTensor* value_cache_out) { + BlockMultiheadAttentionInferMeta(qkv, + key_cache, + value_cache, + seq_lens_encoder, + seq_lens_decoder, + seq_lens_this_time, + padding_offsets, + cum_offsets, + cu_seqlens_q, + cu_seqlens_k, + block_tables, + pre_key_cache, + pre_value_cache, + rope_emb, + mask, + tgt_mask, + cache_k_quant_scales, + cache_v_quant_scales, + cache_k_dequant_scales, + cache_v_dequant_scales, + qkv_out_scale, + qkv_bias, + out_shift, + out_smooth, + max_enc_len_this_time, + max_dec_len_this_time, + max_seq_len, + block_size, + use_neox_style, + dynamic_cachekv_quant, + quant_round_type, + quant_max_bound, + quant_min_bound, + out_scale, + compute_dtype, + fmha_out, + qkv_out, + key_cache_out, + value_cache_out); +} + void Conv1dXPUInferMeta(const MetaTensor& x, const MetaTensor& x_max, const MetaTensor& filter, @@ -4483,4 +4567,120 @@ void FusedTokenPruneInferMeta(const MetaTensor& attn, cls_inds->set_dtype(DataType::INT64); } +void FusedElemwiseActivationInferMeta( + const MetaTensor& x, + const MetaTensor& y, + const std::vector& functor_list, + int axis, + float scale, + bool save_intermediate_out, + MetaTensor* out, + MetaTensor* intermediate_out, + MetaConfig config) { + const auto& x_dim = x.dims(); + const auto& y_dim = y.dims(); + + // Whether the shape of Y is a continuous subsequence of X, + // For more information please refer to the op's introduction. + bool bcast_y = phi::funcs::IsBcastY(x_dim, y_dim); + + const auto& out_dim = bcast_y ? x_dim : y_dim; + const auto& out_lod = bcast_y ? x : y; + auto out_dtype = bcast_y ? x.dtype() : y.dtype(); + + if (save_intermediate_out) { + PADDLE_ENFORCE_EQ( + intermediate_out->initialized(), + true, + phi::errors::InvalidArgument( + "Output(IntermediateOut) of FusedElemwiseActivationOp " + "should not be null.")); + + if (phi::funcs::IsUnaryCompound(functor_list)) { + // for Unary(Binary(X, Y)), the shape and lod of out and + // intermediate_out are the same. + intermediate_out->set_dims(out_dim); + // set the lod of intermediate_out + intermediate_out->share_lod(out_lod); + intermediate_out->set_dtype(out_dtype); + } else { + // for Binary(X, Unary(Y)), the shape and lod of Y and + // intermediate_out are the same. + intermediate_out->set_dims(y_dim); + // set the lod of intermediate_out + intermediate_out->share_lod(y); + intermediate_out->set_dtype(y.dtype()); + } + } + out->set_dims(out_dim); + out->share_lod(out_lod); + out->set_dtype(out_dtype); +} + +void FusedElemwiseActivationGradInferMeta( + const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& out, + const MetaTensor& intermediate_out, + const MetaTensor& out_grad, + const std::vector& functor_list, + int axis, + float scale, + bool save_intermediate_out, + MetaTensor* x_grad, + MetaTensor* y_grad, + MetaConfig config) { + PADDLE_ENFORCE_EQ( + out_grad.initialized(), + true, + phi::errors::InvalidArgument("Input(Out@Grad) should not be null.")); + + if (save_intermediate_out) { + PADDLE_ENFORCE_EQ(intermediate_out.initialized(), + true, + phi::errors::InvalidArgument( + "Input(IntermediateOut) should not be null.")); + } else { + if (!phi::funcs::InputXCanBeAbsent(functor_list)) { + PADDLE_ENFORCE_EQ( + x.initialized(), + true, + phi::errors::InvalidArgument("Input(X) should not be null.")); + } + } + + if (x_grad != nullptr) { + if (x.initialized()) { + x_grad->set_dims(x.dims()); + x_grad->share_lod(x); + x_grad->set_dtype(x.dtype()); + } else { + // Currently, only when Binary is elementwise_add or elementwise_sub, + // the "X" could be absent. + PADDLE_ENFORCE_EQ( + phi::funcs::InputXCanBeAbsent(functor_list), + true, + phi::errors::InvalidArgument( + "Only when BinaryFunctor is elementwise_add, the 'X' " + "could be absent.")); + + // Node: If "X" is absence, the shape of Y should be a continuous + // subsequence of X, otherwise, we could not infer the shape of dx. + x_grad->set_dims(out_grad.dims()); + x_grad->share_lod(out_grad); + x_grad->set_dtype(out_grad.dtype()); + } + } + + if (y_grad != nullptr) { + PADDLE_ENFORCE_EQ( + y.initialized(), + true, + phi::errors::InvalidArgument("Input(Y) should not be null.")); + y_grad->set_dims(y.dims()); + y_grad->share_lod(y); + y_grad->set_dtype(y.dtype()); + } +} + } // namespace phi diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index aa48f64434ee3..528ce5dff8b7e 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -128,6 +128,49 @@ void BlockMultiheadAttentionInferMeta(const MetaTensor& qkv, MetaTensor* key_cache_out, MetaTensor* value_cache_out); +void BlockMultiheadAttentionInferXPUMeta( + const MetaTensor& qkv, + const MetaTensor& key_cache, + const MetaTensor& value_cache, + const MetaTensor& seq_lens_encoder, + const MetaTensor& seq_lens_decoder, + const MetaTensor& seq_lens_this_time, + const MetaTensor& padding_offsets, + const MetaTensor& cum_offsets, + const MetaTensor& cu_seqlens_q, + const MetaTensor& cu_seqlens_k, + const MetaTensor& cache_k_per_batch_maxs, + const MetaTensor& cache_v_per_batch_maxs, + const MetaTensor& block_tables, + const MetaTensor& pre_key_cache, + const MetaTensor& pre_value_cache, + const MetaTensor& rope_emb, + const MetaTensor& mask, + const MetaTensor& tgt_mask, + const MetaTensor& cache_k_quant_scales, + const MetaTensor& cache_v_quant_scales, + const MetaTensor& cache_k_dequant_scales, + const MetaTensor& cache_v_dequant_scales, + const MetaTensor& qkv_out_scale, + const MetaTensor& qkv_bias, + const MetaTensor& out_shift, + const MetaTensor& out_smooth, + const MetaTensor& max_enc_len_this_time, + const MetaTensor& max_dec_len_this_time, + int max_seq_len, + int block_size, + bool use_neox_style, + bool dynamic_cachekv_quant, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + const float out_scale, + const std::string& compute_dtype, + MetaTensor* fmha_out, + MetaTensor* qkv_out, + MetaTensor* key_cache_out, + MetaTensor* value_cache_out); + void Conv1dXPUInferMeta(const MetaTensor& x, const MetaTensor& x_max, const MetaTensor& filter, @@ -1006,4 +1049,29 @@ void FusedTokenPruneInferMeta(const MetaTensor& attn, MetaTensor* slimmed_x, MetaTensor* cls_inds); +void FusedElemwiseActivationInferMeta( + const MetaTensor& x, + const MetaTensor& y, + const std::vector& functor_list, + int axis, + float scale, + bool save_intermediate_out, + MetaTensor* out, + MetaTensor* intermediate_out, + MetaConfig config = MetaConfig()); + +void FusedElemwiseActivationGradInferMeta( + const MetaTensor& x, + const MetaTensor& y, + const MetaTensor& out, + const MetaTensor& intermediate_out, + const MetaTensor& out_grad, + const std::vector& functor_list, + int axis, + float scale, + bool save_intermediate_out, + MetaTensor* x_grad, + MetaTensor* y_grad, + MetaConfig config = MetaConfig()); + } // namespace phi diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a80997970f8fb..9b7eaed7c3bf3 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/common/layout.h" -#include "paddle/phi/backends/device_memory_aligment.h" +#include "paddle/phi/backends/device_memory_alignment.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/infermeta_utils.h" @@ -1349,6 +1349,115 @@ void CudnnLSTMInferMeta( state_out->set_dtype(phi::DataType::UINT8); } +void LSTMInferMeta(const MetaTensor& input, + const MetaTensor& h0, + const MetaTensor& c0, + const MetaTensor& weight, + const MetaTensor& bias, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + MetaTensor* hidden, + MetaTensor* cell, + MetaTensor* batch_gate, + MetaTensor* batch_cell_pre_act, + MetaConfig config) { + const auto& in_dims = input.dims(); + PADDLE_ENFORCE_EQ( + in_dims.size(), + 2, + phi::errors::InvalidArgument( + "Input(X)'s rank must be 2, but received %d.", in_dims.size())); + + if (h0) { + PADDLE_ENFORCE_EQ( + c0.initialized(), + true, + phi::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " + "should not be null at the same time.")); + const auto& h_dims = h0.dims(); + const auto& c_dims = c0.dims(); + PADDLE_ENFORCE_EQ(h_dims, + c_dims, + phi::errors::InvalidArgument( + "The dimension of Input(H0) and Input(C0) should " + "be the same, but received [%s] (H0) vs [%s] (C0).", + h_dims, + c_dims)); + } + + int frame_size = static_cast(in_dims[1] / 4); + const auto& w_dims = weight.dims(); + PADDLE_ENFORCE_EQ( + w_dims.size(), + 2, + phi::errors::InvalidArgument( + "The rank of Input(Weight) should be 2, but received %d.", + w_dims.size())); + PADDLE_ENFORCE_EQ(w_dims[0], + frame_size, + phi::errors::InvalidArgument( + "The first dimension of Input(Weight) should be %d, " + "but received %d.", + frame_size, + w_dims[0])); + PADDLE_ENFORCE_EQ(w_dims[1], + 4 * frame_size, + phi::errors::InvalidArgument( + "The second dimension of Input(Weight) should be 4 * " + "%d, but received %d.", + frame_size, + w_dims[1])); + + const auto& b_dims = bias.dims(); + PADDLE_ENFORCE_EQ(b_dims.size(), + 2, + phi::errors::InvalidArgument( + "The rank of Input(Bias) should be 2, but received %d.", + b_dims.size())); + PADDLE_ENFORCE_EQ( + b_dims[0], + 1, + phi::errors::InvalidArgument( + "The first dimension of Input(Bias) should be 1, but received %d.", + b_dims[0])); + + if (use_peepholes) { + PADDLE_ENFORCE_EQ( + b_dims[1], + 7 * frame_size, + phi::errors::InvalidArgument( + "The second dimension of Input(Bias) should be 7 * %d if enable " + "peepholes connection, but received %d.", + frame_size, + b_dims[1])); + } else { + PADDLE_ENFORCE_EQ( + b_dims[1], + 4 * frame_size, + phi::errors::InvalidArgument( + "The second dimension of Input(Bias) should be 4 * %d if disable " + "peepholes connection, but received %d.", + frame_size, + b_dims[1])); + } + + phi::DDim out_dims({in_dims[0], frame_size}); + hidden->set_dims(out_dims); + cell->set_dims(out_dims); + if (!is_test) { + batch_gate->set_dims(in_dims); + batch_cell_pre_act->set_dims(out_dims); + } + hidden->share_lod(input); + cell->share_lod(input); + hidden->set_dtype(input.dtype()); + cell->set_dtype(input.dtype()); +} + void DecayedAdagradInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& moment, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 56dff7422b2cc..a73212505f669 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -292,6 +292,23 @@ void CudnnLSTMInferMeta( MetaTensor* reserve, MetaTensor* state_out); +void LSTMInferMeta(const MetaTensor& input, + const MetaTensor& h0, + const MetaTensor& c0, + const MetaTensor& weight, + const MetaTensor& bias, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + MetaTensor* hidden, + MetaTensor* cell, + MetaTensor* batch_gate, + MetaTensor* batch_cell_pre_act, + MetaConfig config = MetaConfig()); + void DecayedAdagradInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& moment, diff --git a/paddle/phi/infermeta/sparse/unary.cc b/paddle/phi/infermeta/sparse/unary.cc index f624df0d8c55a..106a4a84474e2 100644 --- a/paddle/phi/infermeta/sparse/unary.cc +++ b/paddle/phi/infermeta/sparse/unary.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/sparse/unary.h" #include "paddle/phi/core/infermeta_utils.h" -namespace phi { -namespace sparse { +namespace phi::sparse { void IndicesInferMeta(const MetaTensor& x, MetaTensor* out) { // TODO(zhangkaihuo) Currently, we cannot get sparse_dim from tensor. @@ -51,5 +50,4 @@ void CastInferMeta(const MetaTensor& x, } } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse diff --git a/paddle/phi/infermeta/spmd_rules/elementwise.cc b/paddle/phi/infermeta/spmd_rules/elementwise.cc index 4e12c994b595b..099f7b7d54fd2 100644 --- a/paddle/phi/infermeta/spmd_rules/elementwise.cc +++ b/paddle/phi/infermeta/spmd_rules/elementwise.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/utils.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -531,5 +530,4 @@ SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, info.first.emplace(info.first.begin() + 2, out_grad.dist_attr()); return info; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/expand_as.cc b/paddle/phi/infermeta/spmd_rules/expand_as.cc index 6bd663c826664..ea26fe7b54c26 100644 --- a/paddle/phi/infermeta/spmd_rules/expand_as.cc +++ b/paddle/phi/infermeta/spmd_rules/expand_as.cc @@ -25,7 +25,7 @@ std::tuple AlignExpandAsDistAttrs( auto x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src); auto y_dist_attr_dst = CopyTensorDistAttrForOutput(y_dist_attr_src); auto x_dims_mapping_dst = x_dims_mapping_src; - auto y_dims_mapping_dst = y_dims_mapping_src; + const auto& y_dims_mapping_dst = y_dims_mapping_src; int dims_diff = y_ndim - x_ndim; for (int i = 0; i < y_ndim; ++i) { if (i >= dims_diff) { diff --git a/paddle/phi/infermeta/spmd_rules/flash_attention.cc b/paddle/phi/infermeta/spmd_rules/flash_attention.cc index 737ad4eff03c9..cd2cfacad3d37 100644 --- a/paddle/phi/infermeta/spmd_rules/flash_attention.cc +++ b/paddle/phi/infermeta/spmd_rules/flash_attention.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/core/enforce.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { const int kNumHeadsDimIndex = 2; #define LOG_SPMD_INPUT(name) \ @@ -806,5 +805,4 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, {q_grad, k_grad, v_grad}}; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/flatten.cc b/paddle/phi/infermeta/spmd_rules/flatten.cc index a0f084b491771..0cff9a46c5656 100644 --- a/paddle/phi/infermeta/spmd_rules/flatten.cc +++ b/paddle/phi/infermeta/spmd_rules/flatten.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h" #include "paddle/phi/infermeta/spmd_rules/dim_trans.h" +#include "paddle/phi/infermeta/spmd_rules/reshape.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" namespace phi { @@ -105,41 +106,31 @@ SpmdInfo FlattenInferSpmd(const DistMetaTensor& x, x_ndim, x_dims_mapping.size())); - // Step1: Build the transformation from - // the original shape to the target shape - + // obtain target shape and use ReshapeInferSpmdDynamic to infer start_axis = PreprocessAxis(start_axis, x_ndim); stop_axis = PreprocessAxis(stop_axis, x_ndim); - std::vector> trans = - MakeFlattenDimTrans(src_shape, start_axis, stop_axis); - - // Step2: Infer the dims mapping of input (if reshard is - // needed) and output from the dimension transformation. - std::vector> dims_mapping_vec = - InferFromDimTrans(x, trans); - - // Step3: Update the dist attributes of input - // and output with the inferred dims mapping. - TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src); - x_dist_attr_dst.set_dims_mapping(dims_mapping_vec[0]); - TensorDistAttr out_dist_attr = CopyTensorDistAttrForOutput(x_dist_attr_src); - out_dist_attr.set_dims_mapping(dims_mapping_vec[1]); + std::vector dst_shape; + int64_t flatten_size = 1; + for (int64_t i = 0; i < x_ndim; i++) { + if (i < start_axis || i > stop_axis) { + dst_shape.emplace_back(src_shape[i]); + } else { + flatten_size *= src_shape[i]; + if (i == stop_axis) { + dst_shape.emplace_back(flatten_size); + } + } + } VLOG(4) << "FlattenInferSpmd: X shape: [" << str_join(src_shape) << "]"; VLOG(4) << "Start_axis: " << start_axis; - VLOG(4) << "Stop_axis: " << start_axis; - VLOG(4) << "Transformation from input to output:"; - for (int64_t i = 0, n = static_cast(trans.size()); i < n; i++) { - std::shared_ptr t = trans[i]; - VLOG(4) << "\tOut axis[" << i << "]: " << t->to_string(); - } - VLOG(4) << "X dims_mapping_src: [" << str_join(x_dims_mapping) - << "] dims_mapping_dst: [" << str_join(dims_mapping_vec[0]) << "]"; - VLOG(4) << "Out dims_mapping: [" << str_join(dims_mapping_vec[1]) << "]\n\n"; - - return {{x_dist_attr_dst}, {out_dist_attr}}; + VLOG(4) << "Stop_axis: " << stop_axis; + VLOG(4) << "FlattenInferSpmd: output shape: [" << str_join(dst_shape) << "]"; + VLOG(4) << "use ReshapeInferSpmdDynamic to infer distributed attribute"; + return ReshapeInferSpmdDynamic(x, dst_shape); } +// TODO(jeff41404): consider xshape and use ReshapeInferSpmdReverse in future SpmdInfo FlattenInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, int start_axis, @@ -198,5 +189,16 @@ SpmdInfo FlattenInferSpmdReverse(const DistMetaTensor& x, return {{x_dist_attr}, {out_dist_attr_dst}}; } +SpmdInfo FlattenGradInferSpmd(const DistMetaTensor& xshape, + const DistMetaTensor& out_grad) { + // TODO(jeff41404): when ReshapeInferSpmd and ReshapeGradInferSpmd can deliver + // distributed attribute of xshape, we will use ReshapeGradInferSpmd directly + // in future return ReshapeGradInferSpmd(xshape, out_grad); + auto shape = phi::vectorize(xshape.dims()); + shape = std::vector(shape.begin() + 1, shape.end()); + const auto& spmd = ReshapeInferSpmd(out_grad, shape); + return {{xshape.dist_attr(), spmd.first[0]}, {spmd.second[0]}}; +} + } // namespace distributed } // namespace phi diff --git a/paddle/phi/infermeta/spmd_rules/flatten.h b/paddle/phi/infermeta/spmd_rules/flatten.h index bb62d8c0d7b0a..28bf5e56d5256 100644 --- a/paddle/phi/infermeta/spmd_rules/flatten.h +++ b/paddle/phi/infermeta/spmd_rules/flatten.h @@ -30,5 +30,8 @@ SpmdInfo FlattenInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, int start_axis, int stop_axis); + +SpmdInfo FlattenGradInferSpmd(const DistMetaTensor& xshape, + const DistMetaTensor& out_grad); } // namespace distributed } // namespace phi diff --git a/paddle/phi/infermeta/spmd_rules/full_like.cc b/paddle/phi/infermeta/spmd_rules/full_like.cc index 6089865766b31..0670df21ab153 100644 --- a/paddle/phi/infermeta/spmd_rules/full_like.cc +++ b/paddle/phi/infermeta/spmd_rules/full_like.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/full_like.h" #include "paddle/phi/infermeta/spmd_rules/elementwise.h" -namespace phi { -namespace distributed { +namespace phi::distributed { SpmdInfo FullLikeInferSpmd(const DistMetaTensor& x, const Scalar& y, phi::DataType dtype) { @@ -24,5 +23,4 @@ SpmdInfo FullLikeInferSpmd(const DistMetaTensor& x, out_dist_attr.clean_partial_status(); return {{x.dist_attr()}, {out_dist_attr}}; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.cc b/paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.cc index c2afdd86b57a7..295d5fa593da5 100644 --- a/paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.cc +++ b/paddle/phi/infermeta/spmd_rules/fused_linear_param_grad_add.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/matmul.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { SpmdInfo FusedLinearParamGradAddInferSpmd(const DistMetaTensor& x, const DistMetaTensor& dout, @@ -73,5 +72,4 @@ SpmdInfo FusedLinearParamGradAddInferSpmd(const DistMetaTensor& x, SpmdInfo FusedLinearParamGradAddInferSpmdFakeReverse() { return SpmdInfo(); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/fused_rope.cc b/paddle/phi/infermeta/spmd_rules/fused_rope.cc index 8099c12aa0e1b..f3aa720d61ece 100644 --- a/paddle/phi/infermeta/spmd_rules/fused_rope.cc +++ b/paddle/phi/infermeta/spmd_rules/fused_rope.cc @@ -563,6 +563,7 @@ SpmdInfo FusedRopeGradInferSpmd(const DistMetaTensor& sin, time_major); std::vector dist_attrs; std::vector order = {3, 4, 5, 0, 1, 2}; + dist_attrs.reserve(order.size()); for (int ind : order) { dist_attrs.emplace_back(spmd_info.first[ind]); } diff --git a/paddle/phi/infermeta/spmd_rules/gather.cc b/paddle/phi/infermeta/spmd_rules/gather.cc index 30cb413ba1ddf..c2376d8545170 100644 --- a/paddle/phi/infermeta/spmd_rules/gather.cc +++ b/paddle/phi/infermeta/spmd_rules/gather.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -215,5 +214,4 @@ SpmdInfo GatherGradInferSpmd(const DistMetaTensor& x, {x_grad_dist_attr}}; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/numel.cc b/paddle/phi/infermeta/spmd_rules/numel.cc index ca0678b773163..7dcc78fbe0f50 100644 --- a/paddle/phi/infermeta/spmd_rules/numel.cc +++ b/paddle/phi/infermeta/spmd_rules/numel.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace phi { namespace distributed { -using phi::distributed::auto_parallel::str_join; SpmdInfo NumelInferSpmd(const DistMetaTensor& x) { std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; diff --git a/paddle/phi/infermeta/spmd_rules/one_hot.cc b/paddle/phi/infermeta/spmd_rules/one_hot.cc index dc90684dde1ef..bc7f0e32ba043 100644 --- a/paddle/phi/infermeta/spmd_rules/one_hot.cc +++ b/paddle/phi/infermeta/spmd_rules/one_hot.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -90,5 +89,4 @@ SpmdInfo OneHotInferSpmdDynamic(const DistMetaTensor& x, return OneHotInferSpmd(x, num_classes.to()); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/pow.cc b/paddle/phi/infermeta/spmd_rules/pow.cc index 59112010e5998..0a73d5706b1a0 100644 --- a/paddle/phi/infermeta/spmd_rules/pow.cc +++ b/paddle/phi/infermeta/spmd_rules/pow.cc @@ -13,8 +13,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/pow.h" #include "paddle/phi/infermeta/spmd_rules/elementwise.h" -namespace phi { -namespace distributed { +namespace phi::distributed { SpmdInfo PowInferSpmd(const DistMetaTensor& x, const Scalar& y) { return ElementwiseUnaryInferSpmd(x); } @@ -23,5 +22,4 @@ SpmdInfo PowGradInferSpmd(const DistMetaTensor& x, const Scalar y) { return ElementwiseUnaryGradInferSpmd(x, out_grad); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/slice.cc b/paddle/phi/infermeta/spmd_rules/slice.cc index cde458df747e2..01587cd4dad12 100644 --- a/paddle/phi/infermeta/spmd_rules/slice.cc +++ b/paddle/phi/infermeta/spmd_rules/slice.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/utils.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -379,5 +378,4 @@ SpmdInfo StridedSliceGradInferSpmdDynamic(const DistMetaTensor& input, return SliceGradInferBase(input, out_grad, axes_bridge, {}); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/softmax.cc b/paddle/phi/infermeta/spmd_rules/softmax.cc index b6f886a49468a..542050f15ef50 100644 --- a/paddle/phi/infermeta/spmd_rules/softmax.cc +++ b/paddle/phi/infermeta/spmd_rules/softmax.cc @@ -23,8 +23,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/utils.h" #include "paddle/phi/infermeta/unary.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -206,5 +205,4 @@ SpmdInfo SoftmaxGradInferSpmd(const DistMetaTensor& out, DistMetaTensor(out_grad.dims(), out_grad_dist_attr)); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/split.cc b/paddle/phi/infermeta/spmd_rules/split.cc index e1769392f7238..779f98fea9d24 100644 --- a/paddle/phi/infermeta/spmd_rules/split.cc +++ b/paddle/phi/infermeta/spmd_rules/split.cc @@ -227,6 +227,7 @@ SpmdInfo SplitWithNumInferSpmdDynamic(const DistMetaTensor& x, SpmdInfo ret; ret.first = tmp.first; std::vector out_dist_attrs; + out_dist_attrs.reserve(tmp.second.size()); for (const auto& out : tmp.second) { out_dist_attrs.push_back(PADDLE_GET_CONST(TensorDistAttr, out)); } diff --git a/paddle/phi/infermeta/spmd_rules/swiglu.cc b/paddle/phi/infermeta/spmd_rules/swiglu.cc index df6ee24733597..88466785ef3bc 100644 --- a/paddle/phi/infermeta/spmd_rules/swiglu.cc +++ b/paddle/phi/infermeta/spmd_rules/swiglu.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/utils.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { SpmdInfo SwiGLUInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y) { // y.dist_attr() is empty means y is None @@ -75,5 +74,4 @@ SpmdInfo SwiGLUGradInferSpmd(const DistMetaTensor& x, } } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/triu.cc b/paddle/phi/infermeta/spmd_rules/triu.cc index ed98889de4ea7..24e7770869a22 100644 --- a/paddle/phi/infermeta/spmd_rules/triu.cc +++ b/paddle/phi/infermeta/spmd_rules/triu.cc @@ -26,8 +26,8 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo TriuInferSpmdBase(const DistMetaTensor& x) { auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto x_dist_attr_src = x.dist_attr(); - std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); + const auto& x_dist_attr_src = x.dist_attr(); + const std::vector& x_dims_mapping = x_dist_attr_src.dims_mapping(); PADDLE_ENFORCE_EQ( x_ndim, x_dims_mapping.size(), @@ -73,8 +73,9 @@ SpmdInfo TriuInferSpmdReverseBase(const DistMetaTensor& x, const DistMetaTensor& out) { auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); - auto out_dist_attr_src = out.dist_attr(); - std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); + const auto& out_dist_attr_src = out.dist_attr(); + const std::vector& out_dims_mapping = + out_dist_attr_src.dims_mapping(); PADDLE_ENFORCE_EQ( out_ndim, out_dims_mapping.size(), @@ -119,7 +120,7 @@ SpmdInfo TriuInferSpmdReverse(const DistMetaTensor& x, SpmdInfo TriuGradInferSpmdBase(const DistMetaTensor& out_grad) { auto out_shape = common::vectorize(out_grad.dims()); int out_ndim = out_shape.size(); - auto out_dist_attr_src = out_grad.dist_attr(); + const auto& out_dist_attr_src = out_grad.dist_attr(); const std::vector& out_dims_mapping = out_dist_attr_src.dims_mapping(); PADDLE_ENFORCE_EQ(out_ndim, diff --git a/paddle/phi/infermeta/spmd_rules/unbind.cc b/paddle/phi/infermeta/spmd_rules/unbind.cc index 79634e8076771..bc0a13cf1761e 100644 --- a/paddle/phi/infermeta/spmd_rules/unbind.cc +++ b/paddle/phi/infermeta/spmd_rules/unbind.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -171,6 +170,7 @@ SpmdInfo UnbindInferSpmdDynamic(const DistMetaTensor& x, int axis) { SpmdInfo ret; ret.first = tmp.first; std::vector out_dist_attrs; + out_dist_attrs.reserve(tmp.second.size()); for (const auto& out : tmp.second) { out_dist_attrs.push_back(PADDLE_GET_CONST(TensorDistAttr, out)); } @@ -178,5 +178,4 @@ SpmdInfo UnbindInferSpmdDynamic(const DistMetaTensor& x, int axis) { return ret; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/unsqueeze.cc b/paddle/phi/infermeta/spmd_rules/unsqueeze.cc index 9aab6676bd383..dc3b76bd05b6e 100644 --- a/paddle/phi/infermeta/spmd_rules/unsqueeze.cc +++ b/paddle/phi/infermeta/spmd_rules/unsqueeze.cc @@ -25,8 +25,7 @@ #include "paddle/phi/infermeta/spmd_rules/reshape.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -239,5 +238,4 @@ SpmdInfo UnsqueezeGradInferSpmd(const DistMetaTensor& xshape, return {{xshape.dist_attr(), spmd.first[0]}, {spmd.second[0]}}; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/utils.cc b/paddle/phi/infermeta/spmd_rules/utils.cc index 336924dd5e951..995f152777655 100644 --- a/paddle/phi/infermeta/spmd_rules/utils.cc +++ b/paddle/phi/infermeta/spmd_rules/utils.cc @@ -22,8 +22,7 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/utils.h" #include "paddle/phi/core/enforce.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; @@ -605,5 +604,4 @@ TensorDistAttr ReduceGradBroadCastDims(const TensorDistAttr& input, return grad_out; } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/spmd_rules/where.cc b/paddle/phi/infermeta/spmd_rules/where.cc index 6499d3f37635f..d823ae4ce75cb 100644 --- a/paddle/phi/infermeta/spmd_rules/where.cc +++ b/paddle/phi/infermeta/spmd_rules/where.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/core/enforce.h" #include "paddle/phi/infermeta/spmd_rules/utils.h" -namespace phi { -namespace distributed { +namespace phi::distributed { using phi::distributed::auto_parallel::str_join; SpmdInfo WhereInferSpmd(const DistMetaTensor& condition, @@ -397,5 +396,4 @@ SpmdInfo WhereGradInferSpmd(const DistMetaTensor& condition, {cond_dist_attr, x_dist_attr, y_dist_attr, out_grad_dist_attr}, {x_grad, y_grad}); } -} // namespace distributed -} // namespace phi +} // namespace phi::distributed diff --git a/paddle/phi/infermeta/strings/nullary.cc b/paddle/phi/infermeta/strings/nullary.cc index 80f75c0e06721..ce7eb4a0e5233 100644 --- a/paddle/phi/infermeta/strings/nullary.cc +++ b/paddle/phi/infermeta/strings/nullary.cc @@ -13,8 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/infermeta/strings/nullary.h" -namespace phi { -namespace strings { +namespace phi::strings { void CreateInferMeta(const IntArray& shape, MetaTensor* out) { const auto& out_dims = common::make_ddim(shape.GetData()); @@ -23,5 +22,4 @@ void CreateInferMeta(const IntArray& shape, MetaTensor* out) { out->set_layout(DataLayout::PSTRING_UNION); } -} // namespace strings -} // namespace phi +} // namespace phi::strings diff --git a/paddle/phi/infermeta/strings/unary.cc b/paddle/phi/infermeta/strings/unary.cc index c4c1aa5c990eb..d9ff624ebd995 100644 --- a/paddle/phi/infermeta/strings/unary.cc +++ b/paddle/phi/infermeta/strings/unary.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" -namespace phi { -namespace strings { +namespace phi::strings { void UnchangedInferMeta(const StringTensorMeta& x_meta, MetaTensor* out) { out->set_dims(x_meta.dims); @@ -31,5 +30,4 @@ void CreateLikeInferMeta(const MetaTensor& x, MetaTensor* out) { out->set_layout(x.layout()); } -} // namespace strings -} // namespace phi +} // namespace phi::strings diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index 6c278867d9ac3..8e9cebf46ac0f 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -645,6 +645,141 @@ void GlobalScatterInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void AddGroupNormSiluInferMeta(const MetaTensor& x, + const MetaTensor& residual, + const MetaTensor& scale, + const MetaTensor& bias, + float epsilon, + int groups, + const std::string& data_layout_str, + const std::string& activation, + MetaTensor* y, + MetaTensor* residual_out, + MetaTensor* mean, + MetaTensor* variance) { + PADDLE_ENFORCE_NE(y, + nullptr, + phi::errors::InvalidArgument( + "The y in GroupNormInferMeta can't be nullptr.")); + PADDLE_ENFORCE_NE(mean, + nullptr, + phi::errors::InvalidArgument( + "The mean in GroupNormInferMeta can't be nullptr.")); + PADDLE_ENFORCE_NE( + variance, + nullptr, + phi::errors::InvalidArgument( + "The variance in GroupNormInferMeta can't be nullptr.")); + + auto x_dim = x.dims(); + PADDLE_ENFORCE_GE( + x_dim.size(), + 2, + phi::errors::InvalidArgument( + "The Input(X)'s dimension of Op(group_norm) must be " + "greater than 1. But received: %u-D Tensor, which shape is [%s].", + x_dim.size(), + x_dim)); + + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); + const int64_t channel_num = + (data_layout == DataLayout::kNCHW ? x_dim[1] : x_dim[x_dim.size() - 1]); + auto batch_size = x_dim[0]; + PADDLE_ENFORCE_LE( + groups, + channel_num, + phi::errors::InvalidArgument( + "The Attr(groups) of Op(group_norm) must be less than or " + "equal to the number of channels. But received: groups " + "is [%s], channels is [%s], the Attr(data_layout) " + "is [%s]. The error may come from wrong data_layout setting.", + groups, + channel_num, + data_layout_str)); + PADDLE_ENFORCE_GE( + groups, + 1, + phi::errors::InvalidArgument( + "The Attr(groups) of Op(group_norm) must be " + "greater than or equal to 1. But received: groups is [%s].", + groups)); + PADDLE_ENFORCE_EQ( + channel_num % groups, + 0, + phi::errors::InvalidArgument( + "Expected number of channels in input to be divisible by " + "num_groups, but got input channel is %d and num_groups is %d", + channel_num, + groups)); + + if (scale) { + PADDLE_ENFORCE_EQ( + scale.dims().size(), + 1UL, + phi::errors::InvalidArgument( + "The Input(Scale) of Op(group_norm) should be 1-D Tensor. " + "But received: %u-D Tensor, the shape of Input(Scale) is [%s].", + scale.dims().size(), + scale.dims())); + PADDLE_ENFORCE_EQ( + scale.dims()[0], + channel_num, + phi::errors::InvalidArgument( + "The Input(Scale)'s first dimension size of Op(group_norm) must " + "be equal to the number of channels. But received: the " + "Input(Scale)'s first dimension size is [%s], the channels is " + "[%s], the Attr(data_layout) is [%s]. The error may come " + "from wrong data_layout setting.", + scale.dims()[0], + channel_num, + data_layout_str)); + } + if (bias) { + PADDLE_ENFORCE_EQ( + bias.dims().size(), + 1UL, + phi::errors::InvalidArgument( + "The Input(Bias) of Op(group_norm) should be 1-D Tensor. " + "But received: %u-D Tensor, the shape of Input(Bias) is [%s].", + bias.dims().size(), + bias.dims())); + PADDLE_ENFORCE_EQ( + bias.dims()[0], + channel_num, + phi::errors::InvalidArgument( + "The Input(Bias)'s first dimension size of " + "Op(group_norm) must be equal to the number of channels. " + "But received: the Input(Bias)'s first dimension size is [%s], " + "the channels is [%s], the Attr(data_layout) is [%s]. The " + "error may come from wrong data_layout setting.", + bias.dims()[0], + channel_num, + data_layout_str)); + } + y->set_dims(x_dim); + y->set_dtype(x.dtype()); + y->share_lod(x); + + phi::DataType x_dtype = x.dtype(); + phi::DataType param_type = + (x_dtype == phi::DataType::BFLOAT16 || x_dtype == phi::DataType::FLOAT16) + ? phi::DataType::FLOAT32 + : x_dtype; + if (mean) { + mean->set_dims({batch_size, groups}); + mean->set_dtype(param_type); + } + if (variance) { + variance->set_dims({batch_size, groups}); + variance->set_dtype(param_type); + } + if (residual_out) { + residual_out->set_dims(x_dim); + residual_out->set_dtype(x.dtype()); + residual_out->share_lod(x); + } +} + void GroupNormInferMeta(const MetaTensor& x, const MetaTensor& scale, const MetaTensor& bias, @@ -1746,7 +1881,7 @@ void SparseMomentumInferMeta(const MetaTensor& param, MetaTensor* velocity_out, MetaTensor* master_param_out) { auto lr_dims = common::product(learning_rate.dims()); - PADDLE_ENFORCE_EQ(lr_dims != 0 && lr_dims == 1, + PADDLE_ENFORCE_EQ(lr_dims == 1, true, phi::errors::InvalidArgument( "Learning_rate should be a scalar. But Received " diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h index 8732a87c55cd6..1b276846619e6 100644 --- a/paddle/phi/infermeta/ternary.h +++ b/paddle/phi/infermeta/ternary.h @@ -144,6 +144,19 @@ void GlobalScatterInferMeta(const MetaTensor& x, bool use_calc_stream, MetaTensor* out); +void AddGroupNormSiluInferMeta(const MetaTensor& x, + const MetaTensor& residual, + const MetaTensor& scale, + const MetaTensor& bias, + float epsilon, + int groups, + const std::string& data_layout, + const std::string& activation, + MetaTensor* y, + MetaTensor* residual_out, + MetaTensor* mean, + MetaTensor* variance); + void GroupNormInferMeta(const MetaTensor& x, const MetaTensor& scale, const MetaTensor& bias, diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 0aca647dd6a49..6670361ad4f83 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -29,8 +29,6 @@ file(GLOB kernel_primitive_h "primitive/*.h") file( GLOB kernel_cu RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "gpu/*.cu" - "gpu/*.cu.cc" "gpudnn/*.cu" "kps/*.cu" "legacy/kps/*.cu" @@ -40,21 +38,42 @@ file( "strings/gpu/*.cu" "fusion/gpu/*.cu") +file( + GLOB kernel_gpu + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "gpu/*.cu" "gpu/*.cu.cc") + if(APPLE OR WIN32) list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu") list(REMOVE_ITEM kernel_cu "sparse/gpu/conv_kernel_igemm.cu") endif() if(NOT WITH_DGC) - list(REMOVE_ITEM kernel_cu "gpu/dgc_kernel.cu") + list(REMOVE_ITEM kernel_gpu "gpu/dgc_kernel.cu") endif() if(DEFINED REDUCE_INFERENCE_LIB_SIZE) - list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$") list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$") + list(FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\.cc$") + list(FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\.cu$") endif() if(WITH_CUTLASS) + add_custom_target( + gemm_epilogue_compile_script ALL + COMMAND bash compile.sh "${PYTHON_EXECUTABLE}" "${CUDA_TOOLKIT_ROOT_DIR}" + \"${NVCC_ARCH_BIN}\" "${CMAKE_COMMAND}" + WORKING_DIRECTORY + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/gemm_epilogue + COMMENT "GemmEpilogue compile script") + add_custom_target( + fused_conv2d_add_act_compile_script ALL + COMMAND bash compile.sh "${PYTHON_EXECUTABLE}" "${CUDA_TOOLKIT_ROOT_DIR}" + \"${NVCC_ARCH_BIN}\" "${CMAKE_COMMAND}" + WORKING_DIRECTORY + ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/conv2d + COMMENT "FusedConv2dAddAct compile script") + execute_process( COMMAND ${PYTHON_EXECUTABLE} @@ -201,6 +220,15 @@ if(WITH_ROCM) list( REMOVE_ITEM kernel_cu + "gpudnn/mha_cudnn_frontend.cu" + "fusion/gpu/blha_get_max_len.cu" + "fusion/gpu/block_multi_head_attention_kernel.cu" + "fusion/gpu/fused_bn_add_activation_grad_kernel.cu" + "fusion/gpu/fused_bn_add_activation_kernel.cu" + "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu") + list( + REMOVE_ITEM + kernel_gpu "gpu/affine_grid_grad_kernel.cu" "gpu/apply_per_channel_scale_kernel.cu" "gpu/cholesky_solve_kernel.cu" @@ -213,13 +241,7 @@ if(WITH_ROCM) "gpu/put_along_axis_grad_kernel.cu" "gpu/put_along_axis_kernel.cu" "gpu/qr_kernel.cu" - "gpu/svd_kernel.cu" - "gpudnn/mha_cudnn_frontend.cu" - "fusion/gpu/blha_get_max_len.cu" - "fusion/gpu/block_multi_head_attention_kernel.cu" - "fusion/gpu/fused_bn_add_activation_grad_kernel.cu" - "fusion/gpu/fused_bn_add_activation_kernel.cu" - "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu") + "gpu/svd_kernel.cu") endif() set(cc_search_pattern @@ -276,6 +298,8 @@ file( if(WITH_GPU OR WITH_ROCM) collect_srcs(kernels_srcs SRCS ${kernel_cu}) kernel_declare("${kernel_cu}") + collect_srcs(kernels_gpu_srcs SRCS ${kernel_gpu}) + kernel_declare("${kernel_gpu}") endif() if(WITH_XPU) diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index b2fae7b0406e0..8aed27bb59ea9 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -307,7 +307,6 @@ DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Floor); DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Ceil); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, alpha); -DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, lambda); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Logit, eps); @@ -318,5 +317,6 @@ DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(HardTanh, t_min, t_max); DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, scale_a, scale_b); DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, beta, threshold); DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, slope, offset); +DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu, threshold, value); } // namespace phi diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h index 70c0187e68865..bf3cb325160d3 100644 --- a/paddle/phi/kernels/activation_kernel.h +++ b/paddle/phi/kernels/activation_kernel.h @@ -74,7 +74,6 @@ DECLARE_ACTIVATION_KERNEL(Ceil) DECLARE_ACTIVATION_KERNEL(Negative) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha) -DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, threshold) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold) @@ -87,6 +86,7 @@ DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardTanh, t_min, t_max) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, beta, threshold) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset) +DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu, threshold, value) template void HardSwishKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/check_memory_continue_kernel.cc b/paddle/phi/kernels/check_memory_continue_kernel.cc index ed2722f9d8411..29149ae0f768a 100644 --- a/paddle/phi/kernels/check_memory_continue_kernel.cc +++ b/paddle/phi/kernels/check_memory_continue_kernel.cc @@ -20,7 +20,7 @@ #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/backends/device_memory_aligment.h" +#include "paddle/phi/backends/device_memory_alignment.h" namespace phi { diff --git a/paddle/phi/kernels/coalesce_tensor_kernel.cc b/paddle/phi/kernels/coalesce_tensor_kernel.cc index a60369af449f4..fea53f55ce8df 100644 --- a/paddle/phi/kernels/coalesce_tensor_kernel.cc +++ b/paddle/phi/kernels/coalesce_tensor_kernel.cc @@ -20,7 +20,7 @@ #include "glog/logging.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/backends/device_memory_aligment.h" +#include "paddle/phi/backends/device_memory_alignment.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index 3f26f8c388e66..b8ced8d4defe2 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -155,9 +155,6 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP(Ceil, ZeroGradFunctor); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, LeakyReluGradFunctor, alpha); -DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, - ThresholdedReluGradFunctor, - threshold); DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, SoftShrinkGradFunctor, lambda); @@ -188,6 +185,10 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, HardSigmoidGradFunctor, slope, offset); +DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu, + ThresholdedReluGradFunctor, + threshold, + value); template void SiluGradKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index 92acf104fedcf..fda8493c9f452 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -106,9 +106,6 @@ DEFINE_CPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Exp, ExpFunctor) DEFINE_CPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Expm1, Expm1Functor) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha) -DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, - ThresholdedReluFunctor, - threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda) @@ -122,6 +119,10 @@ DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, HardSigmoidFunctor, slope, offset) +DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu, + ThresholdedReluFunctor, + threshold, + value) template void HardSwishKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/cumprod_kernel.cc b/paddle/phi/kernels/cpu/cumprod_kernel.cc index f39bddbb443ba..422f566c6612e 100644 --- a/paddle/phi/kernels/cpu/cumprod_kernel.cc +++ b/paddle/phi/kernels/cpu/cumprod_kernel.cc @@ -32,8 +32,16 @@ void CumprodKernel(const Context& dev_ctx, DenseTensor* out) { const DenseTensor* x = &input; auto* x_data = x->data(); - auto* out_data = dev_ctx.template Alloc(out); + auto* out_ptr = dev_ctx.template Alloc(out); DDim shape = x->dims(); + DenseTensor out_tmp; + T* out_data = nullptr; + if (x_data == out_ptr) { + out_tmp.Resize(shape); + out_data = dev_ctx.template Alloc(&out_tmp); + } else { + out_data = out_ptr; + } size_t outer_dim = 1; size_t mid_dim = 1; @@ -88,6 +96,9 @@ void CumprodKernel(const Context& dev_ctx, } } } + if (x_data == out_ptr) { + memcpy(out_ptr, out_data, out->numel() * sizeof(T)); + } } } // namespace phi diff --git a/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc index 204c544e2d95f..1e2954cfeac91 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc @@ -35,7 +35,7 @@ void FillDiagonalGradKernel(const Context& ctx, auto size = x_grad->numel(); auto wrapsize = std::min(size, dx_dims[1] * dx_dims[1]); - // The wrap mode supported only the dims equels to 2; In wrap mode, the + // The wrap mode supported only the dims equals to 2; In wrap mode, the // value will be filled in cycles if (wrap) { wrapsize = size; diff --git a/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc index c5888f5d30ed2..b38f3403df1b7 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc @@ -36,7 +36,7 @@ void FillDiagonalKernel(const Context& ctx, auto strides = funcs::CalStride(out_dims); auto size = out->numel(); - // The wrap mode supported only the dims equels to 2; In wrap mode, the + // The wrap mode supported only the dims equals to 2; In wrap mode, the // value will be filled in cycles if (!wrap) { size = std::min(size, out_dims[1] * out_dims[1]); diff --git a/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc index 5d0fa3c8b5753..0d43f5dec05d7 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc @@ -85,7 +85,7 @@ void FillDiagonalTensorKernel(const Context &ctx, phi::Copy(ctx, x, ctx.GetPlace(), false, out); auto out_dims = out->dims(); - auto matdims = y.dims(); + const auto &matdims = y.dims(); auto fill_dims = common::flatten_to_2d(matdims, matdims.size() - 1); std::array new_dims = {}; diff --git a/paddle/phi/kernels/cpu/inverse_grad_kernel.cc b/paddle/phi/kernels/cpu/inverse_grad_kernel.cc index 97c10e69c8eab..5014cfd0f95c7 100644 --- a/paddle/phi/kernels/cpu/inverse_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/inverse_grad_kernel.cc @@ -16,5 +16,11 @@ #include "paddle/phi/core/kernel_registry.h" -PD_REGISTER_KERNEL( - inverse_grad, CPU, ALL_LAYOUT, phi::InverseGradKernel, float, double) {} +PD_REGISTER_KERNEL(inverse_grad, + CPU, + ALL_LAYOUT, + phi::InverseGradKernel, + float, + double, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/inverse_kernel.cc b/paddle/phi/kernels/cpu/inverse_kernel.cc index 4b21718eca3f2..6fecef6f888dc 100644 --- a/paddle/phi/kernels/cpu/inverse_kernel.cc +++ b/paddle/phi/kernels/cpu/inverse_kernel.cc @@ -16,5 +16,11 @@ #include "paddle/phi/core/kernel_registry.h" -PD_REGISTER_KERNEL( - inverse, CPU, ALL_LAYOUT, phi::InverseKernel, float, double) {} +PD_REGISTER_KERNEL(inverse, + CPU, + ALL_LAYOUT, + phi::InverseKernel, + float, + double, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/fluid/operators/ops_signature/number_count_sig.cc b/paddle/phi/kernels/cpu/lstm_grad_kernel.cc similarity index 58% rename from paddle/fluid/operators/ops_signature/number_count_sig.cc rename to paddle/phi/kernels/cpu/lstm_grad_kernel.cc index 48e0b4fce9ac1..ddaa85c8bdce1 100644 --- a/paddle/fluid/operators/ops_signature/number_count_sig.cc +++ b/paddle/phi/kernels/cpu/lstm_grad_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/core/compat/op_utils.h" +#include +#include +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/lstm_kernel_impl.h" -namespace phi { - -KernelSignature NumberCountOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("number_count", {"numbers"}, {"upper_range"}, {"Out"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(number_count, phi::NumberCountOpArgumentMapping); +PD_REGISTER_KERNEL( + lstm_grad, CPU, ALL_LAYOUT, phi::LSTMGradKernel, float, double) {} diff --git a/test/deprecated/cpp_extension/custom_power.h b/paddle/phi/kernels/cpu/lstm_kernel.cc similarity index 55% rename from test/deprecated/cpp_extension/custom_power.h rename to paddle/phi/kernels/cpu/lstm_kernel.cc index f2cf8acb9cd52..848ba68bb3b76 100644 --- a/test/deprecated/cpp_extension/custom_power.h +++ b/paddle/phi/kernels/cpu/lstm_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,18 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#pragma once -#include "paddle/extension.h" +#include +#include +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/lstm_kernel_impl.h" -struct Power { - Power(int A, int B) { - tensor_ = paddle::ones({A, B}, phi::DataType::FLOAT32, phi::CPUPlace()); - } - explicit Power(paddle::Tensor x) { tensor_ = x; } - paddle::Tensor forward() { return paddle::experimental::pow(tensor_, 2); } - paddle::Tensor get() const { return tensor_; } - - private: - paddle::Tensor tensor_; -}; +PD_REGISTER_KERNEL(lstm, CPU, ALL_LAYOUT, phi::LSTMKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc b/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc index 5b43fb02b5117..9d1319e0b5e4a 100644 --- a/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc @@ -25,4 +25,6 @@ PD_REGISTER_KERNEL(meshgrid_grad, float, double, int, - int64_t) {} + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/meshgrid_kernel.cc b/paddle/phi/kernels/cpu/meshgrid_kernel.cc index 35e43f7bbc85e..a0239da6bb128 100644 --- a/paddle/phi/kernels/cpu/meshgrid_kernel.cc +++ b/paddle/phi/kernels/cpu/meshgrid_kernel.cc @@ -25,4 +25,6 @@ PD_REGISTER_KERNEL(meshgrid, float, double, int, - int64_t) {} + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/pool_grad_kernel.cc b/paddle/phi/kernels/cpu/pool_grad_kernel.cc index 4511d9164f002..f262c046e1687 100644 --- a/paddle/phi/kernels/cpu/pool_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pool_grad_kernel.cc @@ -19,6 +19,8 @@ PD_REGISTER_KERNEL( pool2d_grad, CPU, ALL_LAYOUT, phi::Pool2dGradKernel, float, double) {} +PD_REGISTER_KERNEL( + lp_pool2d_grad, CPU, ALL_LAYOUT, phi::LPPool2dGradKernel, float, double) {} PD_REGISTER_KERNEL(pool2d_double_grad, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/pool_kernel.cc b/paddle/phi/kernels/cpu/pool_kernel.cc index e606173919d74..d4c66eedc54ff 100644 --- a/paddle/phi/kernels/cpu/pool_kernel.cc +++ b/paddle/phi/kernels/cpu/pool_kernel.cc @@ -18,6 +18,8 @@ #include "paddle/phi/kernels/impl/pool_kernel_impl.h" PD_REGISTER_KERNEL(pool2d, CPU, ALL_LAYOUT, phi::Pool2dKernel, float, double) {} +PD_REGISTER_KERNEL( + lp_pool2d, CPU, ALL_LAYOUT, phi::LPPool2dKernel, float, double) {} PD_REGISTER_KERNEL(max_pool2d_with_index, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/random_routing_kernel.cc b/paddle/phi/kernels/cpu/random_routing_kernel.cc index cdeab98f4c1ab..f236ad4d9370c 100644 --- a/paddle/phi/kernels/cpu/random_routing_kernel.cc +++ b/paddle/phi/kernels/cpu/random_routing_kernel.cc @@ -16,8 +16,7 @@ #include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace fusion { +namespace phi::fusion { template void RandomRoutingKernel(const Context& dev_ctx, @@ -29,8 +28,7 @@ void RandomRoutingKernel(const Context& dev_ctx, "Do not support expert count op for cpu kernel now.")); } -} // namespace fusion -} // namespace phi +} // namespace phi::fusion PD_REGISTER_KERNEL(random_routing, CPU, diff --git a/paddle/phi/kernels/cpu/tdm_child_kernel.cc b/paddle/phi/kernels/cpu/tdm_child_kernel.cc index 246f2113d65e8..3fabbba572f7e 100644 --- a/paddle/phi/kernels/cpu/tdm_child_kernel.cc +++ b/paddle/phi/kernels/cpu/tdm_child_kernel.cc @@ -104,7 +104,7 @@ void TDMChildKernel(const Context &dev_ctx, const phi::DenseTensor &x, const phi::DenseTensor &tree_info, int child_nums, - int dtype, + phi::DataType dtype, phi::DenseTensor *child, phi::DenseTensor *leaf_mask) { const auto &input_type = x.dtype(); @@ -132,7 +132,7 @@ void TDMChildKernel(const Context &dev_ctx, DataTypeToString(DataType::INT32), DataTypeToString(DataType::INT64))); - auto output_type = phi::TransToPhiDataType(dtype); + auto output_type = dtype; bool out_type_match = output_type == DataType::INT32 || output_type == DataType::INT64; PADDLE_ENFORCE_EQ(out_type_match, diff --git a/paddle/phi/kernels/cpu/tile_kernel.cc b/paddle/phi/kernels/cpu/tile_kernel.cc index 2320c30310a64..30eb1d5cd6c47 100644 --- a/paddle/phi/kernels/cpu/tile_kernel.cc +++ b/paddle/phi/kernels/cpu/tile_kernel.cc @@ -27,5 +27,6 @@ PD_REGISTER_KERNEL(tile, double, int, int64_t, + phi::dtype::float16, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index ba1d9873ec2a4..27223dad0c1de 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -1825,22 +1825,25 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { template struct ThresholdedReluFunctor : public BaseActivationFunctor { float threshold; + float value; typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; + return {{"threshold", &threshold}, {"value", &value}}; } template void operator()(Device d, X x, Out out) const { auto th = static_cast(threshold); // NOLINT - out.device(d) = (x > th).template cast() * x; + out.device(d) = (x > th).template cast() * x + + (x <= th).template cast() * static_cast(value); } }; template struct ThresholdedReluGradFunctor : public BaseActivationFunctor { float threshold; + float value; typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; + return {{"threshold", &threshold}, {"value", &value}}; } template { template struct CudaThresholdedReluFunctor : public BaseActivationFunctor { - T zero = static_cast(0.0f); float threshold; + float value; typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; + return {{"threshold", &threshold}, {"value", &value}}; } - // thresholded_relu(x) = x > threshold ? x : 0 + // thresholded_relu(x, threshold, value) = x > threshold ? x : value __device__ __forceinline__ T operator()(const T x) const { - return x > static_cast(threshold) ? x : zero; + return x > static_cast(threshold) ? x : static_cast(value); } }; @@ -4247,9 +4250,10 @@ template struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor { T zero = static_cast(0.0f); float threshold; + float value; typename BaseActivationFunctor::AttrPair GetAttrs() { - return {{"threshold", &threshold}}; + return {{"threshold", &threshold}, {"value", &value}}; } // dx = x > threshold ? dout : 0 diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h index 96b2128eee16c..a58b5998a6703 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h @@ -685,6 +685,63 @@ struct CUBlas> { ldb, batch_size)); } + + static void GETRF_BATCH(cublasHandle_t handle, + int n, + phi::dtype::complex **A, + int lda, + int *ipiv, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCgetrfBatched( + handle, + n, + reinterpret_cast(A), + lda, + ipiv, + info, + batch_size)); + } + + static void GETRI_BATCH(cublasHandle_t handle, + int n, + const phi::dtype::complex **A, + int lda, + const int *ipiv, + phi::dtype::complex **Ainv, + int ldc, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCgetriBatched( + handle, + n, + reinterpret_cast(A), + lda, + ipiv, + reinterpret_cast(Ainv), + ldc, + info, + batch_size)); + } + + static void MATINV_BATCH(cublasHandle_t handle, + int n, + const phi::dtype::complex **A, + int lda, + phi::dtype::complex **Ainv, + int lda_inv, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCmatinvBatched( + handle, + n, + reinterpret_cast(A), + lda, + reinterpret_cast(Ainv), + lda_inv, + info, + batch_size)); + } }; template <> @@ -923,6 +980,63 @@ struct CUBlas> { "cublasGemmEx is not supported on cuda <= 7.5")); #endif } + + static void GETRF_BATCH(cublasHandle_t handle, + int n, + phi::dtype::complex **A, + int lda, + int *ipiv, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZgetrfBatched( + handle, + n, + reinterpret_cast(A), + lda, + ipiv, + info, + batch_size)); + } + + static void GETRI_BATCH(cublasHandle_t handle, + int n, + const phi::dtype::complex **A, + int lda, + const int *ipiv, + phi::dtype::complex **Ainv, + int ldc, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZgetriBatched( + handle, + n, + reinterpret_cast(A), + lda, + ipiv, + reinterpret_cast(Ainv), + ldc, + info, + batch_size)); + } + + static void MATINV_BATCH(cublasHandle_t handle, + int n, + const phi::dtype::complex **A, + int lda, + phi::dtype::complex **Ainv, + int lda_inv, + int *info, + int batch_size) { + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZmatinvBatched( + handle, + n, + reinterpret_cast(A), + lda, + reinterpret_cast(Ainv), + lda_inv, + info, + batch_size)); + } }; template <> diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cc b/paddle/phi/kernels/funcs/concat_and_split_functor.cc index fd49748666a6e..c42bbbd3a5318 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cc +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" -namespace phi { -namespace funcs { +namespace phi::funcs { /* * All tensors' dimension should be the same and the values of @@ -132,5 +131,4 @@ struct SplitFunctor { FOR_ALL_TYPES(DEFINE_FUNCTOR); -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/data_layout_transform.cc b/paddle/phi/kernels/funcs/data_layout_transform.cc index 383881c6cc3c9..fc67ef927f4cc 100644 --- a/paddle/phi/kernels/funcs/data_layout_transform.cc +++ b/paddle/phi/kernels/funcs/data_layout_transform.cc @@ -28,8 +28,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #endif -namespace phi { -namespace funcs { +namespace phi::funcs { #ifdef PADDLE_WITH_DNNL @@ -131,5 +130,4 @@ void TransDataLayoutFromOneDNN(DataLayout in_layout, #endif -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/eigen/erf.cc b/paddle/phi/kernels/funcs/eigen/erf.cc index 63d3bba30f99a..5734c6eed61e5 100644 --- a/paddle/phi/kernels/funcs/eigen/erf.cc +++ b/paddle/phi/kernels/funcs/eigen/erf.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template struct EigenErf { @@ -56,5 +55,4 @@ INSTANTIATION(EigenErf); INSTANTIATION(EigenErfGrad); #undef INSTANTIATION -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/eigen/pad.cc b/paddle/phi/kernels/funcs/eigen/pad.cc index 946bff40544ee..c51cd25e45c29 100644 --- a/paddle/phi/kernels/funcs/eigen/pad.cc +++ b/paddle/phi/kernels/funcs/eigen/pad.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/phi/common/complex.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template struct EigenPad { @@ -72,5 +71,4 @@ INSTANTIATION(EigenPad, dtype::complex); INSTANTIATION(EigenPad, dtype::complex); #undef INSTANTIATION -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/eigen/reverse.cc b/paddle/phi/kernels/funcs/eigen/reverse.cc index bd1996956cd38..7b37d56b79e0e 100644 --- a/paddle/phi/kernels/funcs/eigen/reverse.cc +++ b/paddle/phi/kernels/funcs/eigen/reverse.cc @@ -13,8 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template struct EigenReverse { @@ -46,5 +45,4 @@ INSTANTIATION(EigenReverse, float); INSTANTIATION(EigenReverse, double); #undef INSTANTIATION -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h b/paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h index 0326881940aaa..82516cd3c6d07 100644 --- a/paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h +++ b/paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h @@ -1165,7 +1165,7 @@ void FusedElemwiseAndActGradComputeWithBroadcast( UseIntermediateOut, BcastY, SameShapeOfIntermediateOutAndOut>( - dev_ctx, + reinterpret_cast(dev_ctx), x_data, y_data, intermediate_out == nullptr ? nullptr : intermediate_out->data(), @@ -1214,7 +1214,7 @@ void FusedElemwiseAndActGradComputeWithBroadcast( UseIntermediateOut, BcastY, SameShapeOfIntermediateOutAndOut>( - dev_ctx.stream(), + reinterpret_cast(dev_ctx).stream(), x_data, y_data, intermediate_out == nullptr ? nullptr : intermediate_out->data(), diff --git a/paddle/phi/kernels/funcs/fft.cc b/paddle/phi/kernels/funcs/fft.cc index beb0a98636039..13a3822b26005 100644 --- a/paddle/phi/kernels/funcs/fft.cc +++ b/paddle/phi/kernels/funcs/fft.cc @@ -25,11 +25,11 @@ #include "extern_pocketfft/pocketfft_hdronly.h" #endif -namespace phi { -namespace funcs { +namespace phi::funcs { #if defined(PADDLE_WITH_ONEMKL) -namespace detail { +} // namespace phi::funcs +namespace phi::funcs::detail { // Execute a general fft operation (can be c2c, onesided r2c or onesided c2r) template void exec_fft(const phi::CPUContext& ctx, @@ -141,7 +141,8 @@ void exec_fft(const phi::CPUContext& ctx, TransposeKernel( ctx, transposed_output, reverse_dim_permute, out); } -} // namespace detail +} // namespace phi::funcs::detail +namespace phi::funcs { template struct FFTC2CFunctor { @@ -192,7 +193,8 @@ struct FFTC2RFunctor { }; #elif defined(PADDLE_WITH_POCKETFFT) -namespace detail { +} // namespace phi::funcs +namespace phi::funcs::detail { template static T compute_factor(size_t size, FFTNormMode normalization) { constexpr auto one = static_cast(1); @@ -206,7 +208,8 @@ static T compute_factor(size_t size, FFTNormMode normalization) { } PADDLE_THROW(phi::errors::InvalidArgument("Unsupported normalization type")); } -} // namespace detail +} // namespace phi::funcs::detail +namespace phi::funcs { template struct FFTC2CFunctor { @@ -374,5 +377,4 @@ template struct FFTC2RFunctor; template struct FFTC2RFunctor; template struct FFTR2CFunctor; template struct FFTR2CFunctor; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/fft_fill_conj.h b/paddle/phi/kernels/funcs/fft_fill_conj.h index c47257818f3a3..594dccd99db23 100644 --- a/paddle/phi/kernels/funcs/fft_fill_conj.h +++ b/paddle/phi/kernels/funcs/fft_fill_conj.h @@ -189,26 +189,23 @@ template struct FFTFillConjGradFunctor { T* input_; const size_t axis_; - const int64_t* strides_; + const int64_t stride_to_last_axis; + const int64_t stride_second_to_last_axis; const size_t double_length_; FFTFillConjGradFunctor(T* input, size_t axis, - const int64_t* strides, + int64_t stride_second_to_last_axis, + int64_t stride_to_last_axis, size_t double_length) : input_(input), axis_(axis), - strides_(strides), + stride_to_last_axis(stride_to_last_axis), + stride_second_to_last_axis(stride_second_to_last_axis), double_length_(double_length) {} HOSTDEVICE void operator()(size_t index) { - size_t offtset = index; // back - size_t index_i; - for (size_t i = 0; i <= axis_; i++) { - index_i = offtset / strides_[i]; - offtset %= strides_[i]; - } - + size_t index_i = (index % stride_second_to_last_axis) / stride_to_last_axis; if ((0 < index_i) && (index_i < double_length_ + 1)) { input_[index] *= static_cast(2); } diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/phi/kernels/funcs/fused_elemwise_activation_functor.h similarity index 62% rename from paddle/fluid/operators/fused/fused_elemwise_activation_op.h rename to paddle/phi/kernels/funcs/fused_elemwise_activation_functor.h index a271a87d9eb35..5568611708339 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/phi/kernels/funcs/fused_elemwise_activation_functor.h @@ -1,69 +1,158 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #pragma once +#include #include +#include #include -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/compound_functors.h" #include "paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/functors.h" -namespace paddle { -namespace operators { +namespace phi { +namespace funcs { + +static inline bool IsBcastY(const phi::DDim &x_dim, const phi::DDim &y_dim) { + bool bcast_y = x_dim.size() >= y_dim.size(); + if (x_dim.size() == y_dim.size()) { + for (int i = 0; i < x_dim.size(); ++i) { + if (x_dim[i] < y_dim[i]) { + bcast_y = false; + break; + } + } + } + return bcast_y; +} /** * Whether the compound function is Unary(Binary(X, Y)). * For Unary(Binary(X, Y)), the intermediate_out's shape is the same the final * out. */ -bool IsUnaryCompound(const std::vector &functor_list); +static inline bool IsUnaryCompound( + const std::vector &functor_list) { + PADDLE_ENFORCE_EQ( + functor_list.size(), + 2, + phi::errors::InvalidArgument( + "Invalid functor list size %d, which should be equal to %d.", + functor_list.size(), + 2)); + static std::unordered_set binary_fun = {"elementwise_add", + "elementwise_mul", + "elementwise_add_grad", + "elementwise_mul_grad"}; + return binary_fun.count(functor_list[1]) != 0; +} /** * For the in-place unary functor, the inputs of op_desc only have Out and * Out@Grad. */ -bool HasInPlaceUnary(const std::vector &functor_list); +static inline bool HasInPlaceUnary( + const std::vector &functor_list) { + PADDLE_ENFORCE_EQ( + functor_list.size(), + 2, + phi::errors::InvalidArgument( + "Invalid functor list size %d, which should be equal to %d.", + functor_list.size(), + 2)); + static std::unordered_set InplaceOpSet = {"relu", "relu_grad"}; + bool is_in_place = false; + for (auto &func_name : functor_list) { + is_in_place |= (InplaceOpSet.count(func_name) == 1); + } + return is_in_place; +} /** * Whether the Input(X) could be absent. */ -bool InputXCanBeAbsent(const std::vector &functor_list); +static inline bool InputXCanBeAbsent( + const std::vector &functor_list) { + PADDLE_ENFORCE_EQ( + functor_list.size(), + 2, + phi::errors::InvalidArgument( + "Invalid functor list size %d, which should be equal to %d.", + functor_list.size(), + 2)); + static std::unordered_set binary_fun = {"elementwise_add_grad"}; + return binary_fun.count(functor_list[0]) != 0 || + binary_fun.count(functor_list[1]) != 0; +} + +/* + * Whether the compound function is supported. + * For Unary(Binary(X, Y)), the intermediate_out's shape is the same the final + * out. + */ +static bool IsSupportedCompound(const std::vector &functors) { + PADDLE_ENFORCE_EQ( + functors.size(), + 2UL, + phi::errors::InvalidArgument( + "Invalid functor list size %d, which should be equal to %d.", + functors.size(), + 2)); + + static std::unordered_set unary_fun = { + "scale", "relu", "tanh", "sigmoid", "gelu"}; + static std::unordered_set binary_fun = {"elementwise_add", + "elementwise_mul"}; + + std::string unary_fun_str; + if (binary_fun.count(functors[0])) { + unary_fun_str = functors[1]; + } else if (binary_fun.count(functors[1])) { + unary_fun_str = functors[0]; + } else { + PADDLE_THROW(phi::errors::InvalidArgument( + "%s and %s are not included in fused_list.", functors[0], functors[1])); + } + PADDLE_ENFORCE_EQ(unary_fun.count(unary_fun_str), + 1, + phi::errors::InvalidArgument( + "%s is not included in fused_list.", unary_fun_str)); + return true; +} template -static void RunBinaryCompoundFunctor(const framework::ExecutionContext &ctx, - const BinaryFunctor &binary_functor, - const UnaryFunctor &unary_functor, - const phi::DenseTensor &in_x, - const phi::DenseTensor &in_y, - std::vector *outputs) { +void RunBinaryCompoundFunctor(const DeviceContext &dev_ctx, + const BinaryFunctor &binary_functor, + const UnaryFunctor &unary_functor, + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs, + int axis, + bool save_intermediate_out) { // Z = Binary(X, Unary(Y)) // intermediate_out = Unary(Y) // out = Binary(X, Unary(Y)) // In this case, the shape of intermediate_out and out are different. phi::funcs::BinaryCompoundFunctor compound_func(binary_functor, unary_functor); - int axis = ctx.Attr("axis"); - auto &dev_ctx = ctx.template device_context(); - if (ctx.Attr("save_intermediate_out")) { + if (save_intermediate_out) { phi::funcs::FusedElemwiseAndActComputeEx< DeviceContext, T, @@ -86,22 +175,23 @@ template -static void RunUnaryCompoundFunctors(const framework::ExecutionContext &ctx, - const UnaryFunctor &unary_functor, - const BinaryFunctor &binary_functor, - const phi::DenseTensor &in_x, - const phi::DenseTensor &in_y, - std::vector *outputs) { +void RunUnaryCompoundFunctors(const DeviceContext &dev_ctx, + const UnaryFunctor &unary_functor, + const BinaryFunctor &binary_functor, + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs, + int axis, + bool save_intermediate_out) { // Z = Unary(Binary(X, Y)) // intermediate_out = Binary(X, Y) // out = Unary(Binary(X, Y)) // In this case, the shape of intermediate_out and out are the same. - int axis = ctx.Attr("axis"); phi::funcs::UnaryCompoundFunctor compound_func(unary_functor, binary_functor); - auto &dev_ctx = ctx.template device_context(); - if (ctx.Attr("save_intermediate_out")) { + + if (save_intermediate_out) { phi::funcs::FusedElemwiseAndActComputeEx< DeviceContext, T, @@ -126,21 +216,20 @@ template -static void RunBinaryCompoundGradFunctors( - const framework::ExecutionContext &ctx, - const BinaryGradFunctor &binary_grad_functor, - const UnaryFunctor &unary_functor, - const UnaryGradFunctor &unary_grad_functor, - const phi::DenseTensor *in_x, - const phi::DenseTensor *in_y, - const phi::DenseTensor *in_out, - const phi::DenseTensor *in_intermediate_out, - const phi::DenseTensor *in_out_grad, - phi::DenseTensor *x_grad, - phi::DenseTensor *y_grad, - phi::DenseTensor *d_intermediate_out) { +void RunBinaryCompoundGradFunctors(const DeviceContext &dev_ctx, + const BinaryGradFunctor &binary_grad_functor, + const UnaryFunctor &unary_functor, + const UnaryGradFunctor &unary_grad_functor, + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out, + int axis) { // Z = Binary(X, Unary(Y)) - int axis = ctx.Attr("axis"); using BinaryCompoundDxFunctor = phi::funcs:: BinaryCompoundGradDxFunctor; @@ -155,7 +244,6 @@ static void RunBinaryCompoundGradFunctors( BinaryGradFunctor, UnaryFunctor>; - auto &dev_ctx = ctx.template device_context(); if (in_intermediate_out) { phi::funcs::FusedElemwiseAndActGradComputeEx< DeviceContext, @@ -213,21 +301,20 @@ template -static void RunUnaryCompoundGradFunctors( - const framework::ExecutionContext &ctx, - const UnaryGradFunctor &unary_grad_functor, - const BinaryFunctor &binary_functor, - const BinaryGradFunctor &binary_grad_functor, - const phi::DenseTensor *in_x, - const phi::DenseTensor *in_y, - const phi::DenseTensor *in_out, - const phi::DenseTensor *in_intermediate_out, - const phi::DenseTensor *in_out_grad, - phi::DenseTensor *x_grad, - phi::DenseTensor *y_grad, - phi::DenseTensor *d_intermediate_out) { +void RunUnaryCompoundGradFunctors(const DeviceContext &dev_ctx, + const UnaryGradFunctor &unary_grad_functor, + const BinaryFunctor &binary_functor, + const BinaryGradFunctor &binary_grad_functor, + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out, + int axis) { // Z = Unary(Binary(X, Y)) - int axis = ctx.Attr("axis"); using UnaryCompoundDxFunctor = phi::funcs::UnaryCompoundGradDxFunctor; - auto &dev_ctx = ctx.template device_context(); if (in_intermediate_out) { phi::funcs::FusedElemwiseAndActGradComputeEx< DeviceContext, @@ -300,125 +386,147 @@ static void RunUnaryCompoundGradFunctors( } template -static void RunFunctors(const framework::ExecutionContext &ctx, - const phi::DenseTensor &in_x, - const phi::DenseTensor &in_y, - std::vector *outputs) { - auto &functors = ctx.Attr>("functor_list"); +void RunFunctors(const DeviceContext &dev_ctx, + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs, + std::vector functor_list, + float in_scale, + int axis, + bool save_intermediate_out) { + auto &functors = functor_list; // TODO(zcd): The following code can be refined. auto funcs_str = functors[0] + "," + functors[1]; if (funcs_str == "elementwise_add,scale") { // Z = Binary(X, Unary(Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunBinaryCompoundFunctor, phi::funcs::ScaleFunctor>( - ctx, + dev_ctx, phi::funcs::AddFunctor(), phi::funcs::ScaleFunctor(scale), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "scale,elementwise_add") { // Z = Unary(Binary(X, Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunUnaryCompoundFunctors, phi::funcs::AddFunctor>( - ctx, + dev_ctx, phi::funcs::ScaleFunctor(scale), phi::funcs::AddFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "elementwise_add,relu") { // Z = Binary(X, Unary(Y)) RunBinaryCompoundFunctor, phi::funcs::ReluFunctor>( - ctx, + dev_ctx, phi::funcs::AddFunctor(), phi::funcs::ReluFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "relu,elementwise_add") { // Z = Unary(Binary(X, Y)) RunUnaryCompoundFunctors, phi::funcs::AddFunctor>( - ctx, + dev_ctx, phi::funcs::ReluFunctor(), phi::funcs::AddFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "elementwise_mul,scale") { // Z = Binary(X, Unary(Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunBinaryCompoundFunctor, phi::funcs::ScaleFunctor>( - ctx, + dev_ctx, phi::funcs::MultiplyFunctor(), phi::funcs::ScaleFunctor(scale), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "tanh,elementwise_add") { // Z = Unary(Binary(X, Y)) RunUnaryCompoundFunctors, phi::funcs::AddFunctor>( - ctx, + dev_ctx, phi::funcs::TanhFunctor(), phi::funcs::AddFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "elementwise_mul,tanh") { // Z = Binary(X, Unary(Y)) RunBinaryCompoundFunctor, phi::funcs::TanhFunctor>( - ctx, + dev_ctx, phi::funcs::MultiplyFunctor(), phi::funcs::TanhFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "elementwise_mul,sigmoid") { // Z = Binary(X, Unary(Y)) RunBinaryCompoundFunctor, phi::funcs::SigmoidFunctor>( - ctx, + dev_ctx, phi::funcs::MultiplyFunctor(), phi::funcs::SigmoidFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else if (funcs_str == "gelu,elementwise_add") { // Z = Unary(Binary(X, Y)) RunUnaryCompoundFunctors, phi::funcs::AddFunctor>( - ctx, + dev_ctx, phi::funcs::GeluFunctor(), phi::funcs::AddFunctor(), in_x, in_y, - outputs); + outputs, + axis, + save_intermediate_out); } else { PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", funcs_str)); @@ -426,28 +534,31 @@ static void RunFunctors(const framework::ExecutionContext &ctx, } template -static void RunGradFunctors(const framework::ExecutionContext &ctx, - const phi::DenseTensor *in_x, - const phi::DenseTensor *in_y, - const phi::DenseTensor *in_out, - const phi::DenseTensor *in_intermediate_out, - const phi::DenseTensor *in_out_grad, - phi::DenseTensor *x_grad, - phi::DenseTensor *y_grad, - phi::DenseTensor *d_intermediate_out) { - auto &functors = ctx.Attr>("functor_list"); +void RunGradFunctors(const DeviceContext &dev_ctx, + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out, + std::vector functor_list, + float in_scale, + int axis) { + auto &functors = functor_list; auto funcs_str = functors[0] + "," + functors[1]; if (funcs_str == "elementwise_add_grad,scale_grad") { // The backward of Z = Binary(X, Unary(Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunBinaryCompoundGradFunctors, phi::funcs::ScaleFunctor, phi::funcs::ScaleGradFunctor, InPlace>( - ctx, + dev_ctx, phi::funcs::AddGradFunctor(), phi::funcs::ScaleFunctor(scale), phi::funcs::ScaleGradFunctor(scale), @@ -458,17 +569,18 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "scale_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunUnaryCompoundGradFunctors, phi::funcs::AddFunctor, phi::funcs::AddGradFunctor, InPlace>( - ctx, + dev_ctx, phi::funcs::ScaleGradFunctor(scale), phi::funcs::AddFunctor(), phi::funcs::AddGradFunctor(), @@ -479,7 +591,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "elementwise_add_grad,relu_grad") { // The backward of Z = Binary(X, Unary(Y)) RunBinaryCompoundGradFunctors, phi::funcs::ReluFunctor, phi::funcs::ReluGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::AddGradFunctor(), phi::funcs::ReluFunctor(), phi::funcs::ReluGradFunctor(), @@ -498,7 +611,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "relu_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors, phi::funcs::AddFunctor, phi::funcs::AddGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::ReluGradFunctor(), phi::funcs::AddFunctor(), phi::funcs::AddGradFunctor(), @@ -517,17 +631,18 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "elementwise_mul_grad,scale_grad") { // The backward of Z = Binary(X, Unary(Y)) - T scale = static_cast(ctx.Attr("scale")); + T scale = static_cast(in_scale); RunBinaryCompoundGradFunctors, phi::funcs::ScaleFunctor, phi::funcs::ScaleGradFunctor, InPlace>( - ctx, + dev_ctx, phi::funcs::MulGradFunctor(), phi::funcs::ScaleFunctor(scale), phi::funcs::ScaleGradFunctor(scale), @@ -538,7 +653,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "tanh_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors, phi::funcs::AddFunctor, phi::funcs::AddGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::TanhGradFunctor(), phi::funcs::AddFunctor(), phi::funcs::AddGradFunctor(), @@ -557,7 +673,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "elementwise_mul_grad,tanh_grad") { // The backward of Z = Binary(X, Unary(Y)) RunBinaryCompoundGradFunctors, phi::funcs::TanhFunctor, phi::funcs::TanhGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::MulGradFunctor(), phi::funcs::TanhFunctor(), phi::funcs::TanhGradFunctor(), @@ -576,7 +693,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "elementwise_mul_grad,sigmoid_grad") { // The backward of Z = Binary(X, Unary(Y)) RunBinaryCompoundGradFunctors, phi::funcs::SigmoidFunctor, phi::funcs::SigmoidGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::MulGradFunctor(), phi::funcs::SigmoidFunctor(), phi::funcs::SigmoidGradFunctor(), @@ -595,7 +713,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else if (funcs_str == "gelu_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors, phi::funcs::AddFunctor, phi::funcs::AddGradFunctor, - InPlace>(ctx, + InPlace>(dev_ctx, phi::funcs::GeluGradFunctor(), phi::funcs::AddFunctor(), phi::funcs::AddGradFunctor(), @@ -614,170 +733,13 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, in_out_grad, x_grad, y_grad, - d_intermediate_out); + d_intermediate_out, + axis); } else { PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", funcs_str)); } } -template -class FusedElemwiseActivationKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto &in_x = GET_DATA_SAFELY(ctx.Input("X"), - "Input", - "X", - "FusedElemwiseActivation"); - auto &in_y = GET_DATA_SAFELY(ctx.Input("Y"), - "Input", - "Y", - "FusedElemwiseActivation"); - - PADDLE_ENFORCE_EQ( - ctx.HasOutput("Out"), - true, - phi::errors::InvalidArgument("The output(Out) should not be empty")); - auto output = ctx.Output("Out"); - - std::vector outputs; - outputs.emplace_back(output); - - if (ctx.Attr("save_intermediate_out")) { - PADDLE_ENFORCE_EQ(ctx.HasOutput("IntermediateOut"), - true, - phi::errors::InvalidArgument( - "The save_intermediate_out is enable, so the " - "IntermediateOut should not be empty.")); - - auto intermediate_out = ctx.Output("IntermediateOut"); - outputs.emplace_back(intermediate_out); - } else { - outputs.emplace_back(nullptr); - } - - RunFunctors(ctx, in_x, in_y, &outputs); - } -}; - -template -class FusedElemwiseActivationGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto in_y = ctx.Input("Y"); - PADDLE_ENFORCE_NE( - in_y, - nullptr, - phi::errors::InvalidArgument("Input(Y) should not be nullptr.")); - phi::DenseTensor *in_out = - const_cast(ctx.Input("Out")); - - auto in_out_grad = - ctx.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_NE( - in_out_grad, - nullptr, - phi::errors::InvalidArgument("Input(Out@Grad) should not be nullptr.")); - - phi::DenseTensor *in_x = - const_cast(ctx.Input("X")); - phi::DenseTensor *x_grad = - ctx.Output(framework::GradVarName("X")); - phi::DenseTensor *y_grad = - ctx.Output(framework::GradVarName("Y")); - phi::DenseTensor *d_intermediate_out = - ctx.Output(framework::GradVarName("IntermediateOut")); - - auto functor_list = ctx.Attr>("functor_list"); - - // Get intermediate_out - phi::DenseTensor *in_intermediate_out = nullptr; - if (ctx.Attr("save_intermediate_out")) { - // if save_intermediate_out is true, for Unary(Binary(x, y)) and - // Binary(x, Unary(y)), the Binary(x, y) and Unary(y) not need to - // recompute. - in_intermediate_out = const_cast( - ctx.Input("IntermediateOut")); - PADDLE_ENFORCE_NE(in_intermediate_out, - nullptr, - phi::errors::InvalidArgument( - "The option of 'save_intermediate_out' is opened," - " so the number of 'Out' should be two.")); - } else { - if (!InputXCanBeAbsent(functor_list)) { - PADDLE_ENFORCE_NE( - in_x, - nullptr, - phi::errors::InvalidArgument("Input(X) should not be null.")); - } - } - - // Get in_x - if (ctx.HasInput("X")) { - PADDLE_ENFORCE_NE( - in_x, - nullptr, - phi::errors::InvalidArgument("Input(X) should not be null.")); - } else { - // If functor_list contains elementwise_add, the backward doesn't use - // in_x, in_y and in_out. - PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), - true, - phi::errors::InvalidArgument( - "Only when the compoundfunctor contains " - "elementwise_add_grad, the 'X' could be absent.")); - in_x = const_cast(in_out_grad); - } - - // Get in_Out - if (ctx.HasInput("Out")) { - PADDLE_ENFORCE_NE( - in_out, - nullptr, - phi::errors::InvalidArgument("Input(X) should not be null.")); - } else { - // If functor_list contains elementwise_add, the backward doesn't use - // in_x, in_y and in_out. - PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), - true, - phi::errors::InvalidArgument( - "Only when the compoundfunctor contains " - "elementwise_add_grad, the 'X' could be absent.")); - in_out = const_cast(in_out_grad); - } - - bool has_in_place = HasInPlaceUnary(functor_list); - if (has_in_place) { - RunGradFunctors(ctx, - in_x, - in_y, - in_out, - in_intermediate_out, - in_out_grad, - x_grad, - y_grad, - d_intermediate_out); - } else { - RunGradFunctors(ctx, - in_x, - in_y, - in_out, - in_intermediate_out, - in_out_grad, - x_grad, - y_grad, - d_intermediate_out); - } - } -}; - -template -class FusedElemwiseAddActivationKernel - : public FusedElemwiseActivationKernel {}; - -template -class FusedElemwiseAddActivationGradKernel - : public FusedElemwiseActivationGradKernel {}; - -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/funcs/gather_scatter_functor.cc b/paddle/phi/kernels/funcs/gather_scatter_functor.cc index ca6c44dbdbd76..df0cf3ac9be33 100644 --- a/paddle/phi/kernels/funcs/gather_scatter_functor.cc +++ b/paddle/phi/kernels/funcs/gather_scatter_functor.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/common/macros.h" -namespace phi { -namespace funcs { +namespace phi::funcs { class TensorAssign { public: @@ -705,5 +704,4 @@ Instantiate_Template_Function(cpu_gather_kernel) // NOLINT Instantiate_Template_Function_With_Out( cpu_scatter_mul_min_max_value_grad_kernel) // NOLINT -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/gpc.cc b/paddle/phi/kernels/funcs/gpc.cc index ba24dbb442dfa..397f0d23ab12f 100644 --- a/paddle/phi/kernels/funcs/gpc.cc +++ b/paddle/phi/kernels/funcs/gpc.cc @@ -28,8 +28,7 @@ #include "paddle/phi/core/enforce.h" -namespace phi { -namespace funcs { +namespace phi::funcs { typedef struct lmt_shape { /* Local minima table */ double y; /* Y coordinate at local minimum */ @@ -2263,5 +2262,4 @@ void gpc_tristrip_clip(gpc_op op, gpc_free(sbt); } // NOLINT -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/gru_compute.cc b/paddle/phi/kernels/funcs/gru_compute.cc index f0c946134906b..563c5a2d34fe2 100644 --- a/paddle/phi/kernels/funcs/gru_compute.cc +++ b/paddle/phi/kernels/funcs/gru_compute.cc @@ -15,8 +15,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_kernel.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template struct GRUUnitFunctor { @@ -364,5 +363,4 @@ template struct GRUUnitFunctorV2; template struct GRUUnitGradFunctorV2; template struct GRUUnitGradFunctorV2; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/jit/gen/adamw.cc b/paddle/phi/kernels/funcs/jit/gen/adamw.cc index 9426ea16a88fb..4a8545c24f964 100644 --- a/paddle/phi/kernels/funcs/jit/gen/adamw.cc +++ b/paddle/phi/kernels/funcs/jit/gen/adamw.cc @@ -19,9 +19,7 @@ #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace gen { +namespace phi::jit::gen { void AdamWJitCode::loadArgs() { static constexpr int32_t one_as_float = 0x3f800000; @@ -155,9 +153,7 @@ class AdamWCreator : public JitCodeCreator { } }; -} // namespace gen -} // namespace jit -} // namespace phi +} // namespace phi::jit::gen namespace gen = phi::jit::gen; diff --git a/paddle/phi/kernels/funcs/jit/gen/blas.cc b/paddle/phi/kernels/funcs/jit/gen/blas.cc index 1e29b7f4953fe..f9de8a6d01f45 100644 --- a/paddle/phi/kernels/funcs/jit/gen/blas.cc +++ b/paddle/phi/kernels/funcs/jit/gen/blas.cc @@ -18,9 +18,7 @@ #include "paddle/phi/kernels/funcs/jit/macro.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace gen { +namespace phi::jit::gen { void VXXJitCode::genCode() { // do not need push stack, and do not need save avx512reg if do not use avx512 @@ -134,9 +132,7 @@ DECLARE_BLAS_CREATOR(VAddBias); #undef DECLARE_BLAS_CREATOR -} // namespace gen -} // namespace jit -} // namespace phi +} // namespace phi::jit::gen namespace gen = phi::jit::gen; diff --git a/paddle/phi/kernels/funcs/jit/gen/gru.cc b/paddle/phi/kernels/funcs/jit/gen/gru.cc index 33dfaa6cd097c..69f1e62fbfcf0 100644 --- a/paddle/phi/kernels/funcs/jit/gen/gru.cc +++ b/paddle/phi/kernels/funcs/jit/gen/gru.cc @@ -20,9 +20,7 @@ #include "paddle/phi/kernels/funcs/jit/macro.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace gen { +namespace phi::jit::gen { void GRUJitCode::genCode() { reg64_t reg_ptr_gates = rax; @@ -107,9 +105,7 @@ DECLARE_GRU_CREATOR(GRUHtPart2); #undef DECLARE_GRU_CREATOR -} // namespace gen -} // namespace jit -} // namespace phi +} // namespace phi::jit::gen namespace gen = phi::jit::gen; diff --git a/paddle/phi/kernels/funcs/jit/gen/seqpool.cc b/paddle/phi/kernels/funcs/jit/gen/seqpool.cc index fca00feb5c49b..5f1edf194d252 100644 --- a/paddle/phi/kernels/funcs/jit/gen/seqpool.cc +++ b/paddle/phi/kernels/funcs/jit/gen/seqpool.cc @@ -18,9 +18,7 @@ #include "paddle/phi/kernels/funcs/jit/gen/act.h" // for exp_float_consts ones #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace gen { +namespace phi::jit::gen { void SeqPoolJitCode::genCode() { constexpr int block = YMM_FLOAT_BLOCK; @@ -85,9 +83,7 @@ class SeqPoolCreator : public JitCodeCreator { } }; -} // namespace gen -} // namespace jit -} // namespace phi +} // namespace phi::jit::gen namespace gen = phi::jit::gen; diff --git a/paddle/phi/kernels/funcs/jit/gen/vbroadcast.cc b/paddle/phi/kernels/funcs/jit/gen/vbroadcast.cc index c52d7d50379b4..4b9944fb8b2ba 100644 --- a/paddle/phi/kernels/funcs/jit/gen/vbroadcast.cc +++ b/paddle/phi/kernels/funcs/jit/gen/vbroadcast.cc @@ -17,9 +17,7 @@ #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace gen { +namespace phi::jit::gen { void VBroadcastJitCode::genCode() { preCode(); @@ -85,9 +83,7 @@ class VBroadcastCreator : public JitCodeCreator { } }; -} // namespace gen -} // namespace jit -} // namespace phi +} // namespace phi::jit::gen namespace gen = phi::jit::gen; diff --git a/paddle/phi/kernels/funcs/jit/kernel_pool.cc b/paddle/phi/kernels/funcs/jit/kernel_pool.cc index 97a09bf48ba50..e850626101130 100644 --- a/paddle/phi/kernels/funcs/jit/kernel_pool.cc +++ b/paddle/phi/kernels/funcs/jit/kernel_pool.cc @@ -14,8 +14,7 @@ #include "paddle/phi/kernels/funcs/jit/kernel_pool.h" -namespace phi { -namespace jit { +namespace phi::jit { std::map>& GetJITCodesMap() { static thread_local std::map> g_jit_codes_map; @@ -37,5 +36,4 @@ ReferKernelPool& ReferKernelPool::Instance() { return g_refer_kernel_pool; } -} // namespace jit -} // namespace phi +} // namespace phi::jit diff --git a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc index c36ca0d7360cc..43a011277cb5f 100644 --- a/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc +++ b/paddle/phi/kernels/funcs/jit/more/intrinsic/crf_decoding.cc @@ -19,10 +19,7 @@ #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace more { -namespace intrinsic { +namespace phi::jit::more::intrinsic { // Note: intrinsic code is not runtime build. // For example, if you build code on AVX, and run on AVX512 it can only use AVX @@ -174,10 +171,7 @@ bool CRFDecodingKernel::CanBeUsed(const int& d) const { return phi::backends::cpu::MayIUse(phi::backends::cpu::avx) && d >= block; } -} // namespace intrinsic -} // namespace more -} // namespace jit -} // namespace phi +} // namespace phi::jit::more::intrinsic namespace intrinsic = phi::jit::more::intrinsic; diff --git a/paddle/phi/kernels/funcs/jit/more/mix/mix.cc b/paddle/phi/kernels/funcs/jit/more/mix/mix.cc index 7bb58a8b2463a..2c659111d435e 100644 --- a/paddle/phi/kernels/funcs/jit/more/mix/mix.cc +++ b/paddle/phi/kernels/funcs/jit/more/mix/mix.cc @@ -17,10 +17,7 @@ #include "paddle/phi/kernels/funcs/jit/kernels.h" #include "paddle/phi/kernels/funcs/jit/registry.h" -namespace phi { -namespace jit { -namespace more { -namespace mix { +namespace phi::jit::more::mix { using CPUPlace = phi::CPUPlace; @@ -196,10 +193,7 @@ bool GRUHtPart1Kernel::CanBeUsed(const gru_attr_t& attr) const { return true; } bool GRUHtPart2Kernel::CanBeUsed(const gru_attr_t& attr) const { return true; } -} // namespace mix -} // namespace more -} // namespace jit -} // namespace phi +} // namespace phi::jit::more::mix namespace mix = phi::jit::more::mix; diff --git a/paddle/phi/kernels/funcs/lapack/lapack_function.cc b/paddle/phi/kernels/funcs/lapack/lapack_function.cc index 09d45fcf24be9..ebfd53291c36f 100644 --- a/paddle/phi/kernels/funcs/lapack/lapack_function.cc +++ b/paddle/phi/kernels/funcs/lapack/lapack_function.cc @@ -17,8 +17,7 @@ #include "paddle/phi/backends/dynload/lapack.h" #include "paddle/phi/common/complex.h" -namespace phi { -namespace funcs { +namespace phi::funcs { // LU (for example) template <> @@ -537,5 +536,4 @@ void lapackSvd(char jobz, &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, info); } -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/lstm_utils.h b/paddle/phi/kernels/funcs/lstm_utils.h new file mode 100644 index 0000000000000..4a02b097fd340 --- /dev/null +++ b/paddle/phi/kernels/funcs/lstm_utils.h @@ -0,0 +1,36 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/mixed_vector.h" +#include "paddle/phi/kernels/funcs/detail/activation_functions.h" +#include "paddle/phi/kernels/funcs/sequence2batch.h" + +namespace phi { + +template +inline void ReorderInitState(const Context& dev_ctx, + const phi::DenseTensor& src, + phi::Vector index_lod, + phi::DenseTensor* dst, + bool indexed_src) { + phi::funcs::CopyMatrixRowsFunctor row_shuffle; + dst->Resize(src.dims()); + dev_ctx.template Alloc(dst); + row_shuffle(dev_ctx, src, index_lod, dst, indexed_src); +} +} // namespace phi diff --git a/paddle/phi/kernels/funcs/math/sampler.cc b/paddle/phi/kernels/funcs/math/sampler.cc index b225674274a7b..46c20dc5a4727 100644 --- a/paddle/phi/kernels/funcs/math/sampler.cc +++ b/paddle/phi/kernels/funcs/math/sampler.cc @@ -18,8 +18,7 @@ #include "paddle/phi/core/generator.h" -namespace phi { -namespace math { +namespace phi::math { Sampler::~Sampler() = default; @@ -93,5 +92,4 @@ int64_t CustomSampler::Sample() const { float CustomSampler::Probability(int64_t value) const { return probs_[value]; } -} // namespace math -} // namespace phi +} // namespace phi::math diff --git a/paddle/phi/kernels/funcs/matrix_inverse.cc b/paddle/phi/kernels/funcs/matrix_inverse.cc index c316970e6a560..2a3749ef36b81 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.cc +++ b/paddle/phi/kernels/funcs/matrix_inverse.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template void MatrixInverseFunctor::operator()(const Context& dev_ctx, @@ -28,6 +27,7 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, template class MatrixInverseFunctor; template class MatrixInverseFunctor; +template class MatrixInverseFunctor>; +template class MatrixInverseFunctor>; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/matrix_inverse.cu b/paddle/phi/kernels/funcs/matrix_inverse.cu index c0ea7ad84c41b..f46dd714c9f55 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.cu +++ b/paddle/phi/kernels/funcs/matrix_inverse.cu @@ -131,6 +131,8 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, template class MatrixInverseFunctor; template class MatrixInverseFunctor; +template class MatrixInverseFunctor>; +template class MatrixInverseFunctor>; } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/funcs/matrix_inverse.h b/paddle/phi/kernels/funcs/matrix_inverse.h index f0cd265a54648..d45f7d8863a63 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.h +++ b/paddle/phi/kernels/funcs/matrix_inverse.h @@ -25,14 +25,69 @@ limitations under the License. */ namespace phi { namespace funcs { +template +struct MapMatrixInverseFunctor { + void operator()( + const Context& dev_ctx, const T* a_ptr, T* a_inv_ptr, int offset, int n) { + using Matrix = + Eigen::Matrix; + using EigenMatrixMap = Eigen::Map; + using ConstEigenMatrixMap = Eigen::Map; + + ConstEigenMatrixMap mat(a_ptr + offset, n, n); + EigenMatrixMap mat_inv(a_inv_ptr + offset, n, n); + Eigen::PartialPivLU lu; + lu.compute(mat); + + const T min_abs_pivot = lu.matrixLU().diagonal().cwiseAbs().minCoeff(); + PADDLE_ENFORCE_GT(min_abs_pivot, + static_cast(0), + errors::InvalidArgument("Input is not invertible.")); + mat_inv.noalias() = lu.inverse(); + } +}; + +template +struct MapMatrixInverseFunctor> { + void operator()(const Context& dev_ctx, + const phi::dtype::complex* a_ptr, + phi::dtype::complex* a_inv_ptr, + int offset, + int n) { + using Matrix = Eigen::Matrix, + Eigen::Dynamic, + Eigen::Dynamic, + Eigen::RowMajor>; + using EigenMatrixMap = Eigen::Map; + using ConstEigenMatrixMap = Eigen::Map; + std::complex* std_ptr = new std::complex[n * n]; + std::complex* std_inv_ptr = new std::complex[n * n]; + for (int i = 0; i < n * n; i++) { + *(std_ptr + i) = static_cast>(*(a_ptr + offset + i)); + } + ConstEigenMatrixMap mat(std_ptr, n, n); + EigenMatrixMap mat_inv(std_inv_ptr, n, n); + Eigen::PartialPivLU lu; + lu.compute(mat); + + const T min_abs_pivot = lu.matrixLU().diagonal().cwiseAbs().minCoeff(); + PADDLE_ENFORCE_NE(min_abs_pivot, + static_cast>(0), + errors::InvalidArgument("Input is not invertible.")); + mat_inv.noalias() = lu.inverse(); + for (int i = 0; i < n * n; i++) { + *(a_inv_ptr + offset + i) = + static_cast>(*(std_inv_ptr + i)); + } + delete[] std_ptr; + delete[] std_inv_ptr; + } +}; + template void ComputeInverseEigen(const Context& dev_ctx, const DenseTensor& a, DenseTensor* a_inv) { - using Matrix = - Eigen::Matrix; - using EigenMatrixMap = Eigen::Map; - using ConstEigenMatrixMap = Eigen::Map; const auto& mat_dims = a.dims(); const int rank = mat_dims.size(); int n = mat_dims[rank - 1]; @@ -41,17 +96,13 @@ void ComputeInverseEigen(const Context& dev_ctx, const T* a_ptr = a.data(); T* a_inv_ptr = dev_ctx.template Alloc(a_inv); + // Putting phi::dtype::complex into eigen::matrix has a problem, + // it's not going to get the right result, + // so we're going to convert it to std::complex and + // then we're going to put it into eigen::matrix. for (int i = 0; i < batch_size; ++i) { - ConstEigenMatrixMap mat(a_ptr + i * n * n, n, n); - EigenMatrixMap mat_inv(a_inv_ptr + i * n * n, n, n); - Eigen::PartialPivLU lu; - lu.compute(mat); - - const T min_abs_pivot = lu.matrixLU().diagonal().cwiseAbs().minCoeff(); - PADDLE_ENFORCE_GT(min_abs_pivot, - static_cast(0), - errors::InvalidArgument("Input is not invertible.")); - mat_inv.noalias() = lu.inverse(); + MapMatrixInverseFunctor functor; + functor(dev_ctx, a_ptr, a_inv_ptr, i * n * n, n); } } diff --git a/paddle/phi/kernels/funcs/pooling.cc b/paddle/phi/kernels/funcs/pooling.cc index 99281f62cef37..3c93a8341d411 100644 --- a/paddle/phi/kernels/funcs/pooling.cc +++ b/paddle/phi/kernels/funcs/pooling.cc @@ -684,12 +684,16 @@ template class MaxPool2dGradFunctor; template class Pool2dFunctor, float>; template class Pool2dFunctor, float>; +template class Pool2dFunctor, float>; template class Pool2dGradFunctor, float>; template class Pool2dGradFunctor, float>; +template class Pool2dGradFunctor, float>; template class Pool2dFunctor, double>; template class Pool2dFunctor, double>; +template class Pool2dFunctor, double>; template class Pool2dGradFunctor, double>; template class Pool2dGradFunctor, double>; +template class Pool2dGradFunctor, double>; /* * Tensors are in NCDHW or NDHWC format. diff --git a/paddle/phi/kernels/funcs/pooling.cu b/paddle/phi/kernels/funcs/pooling.cu index 3d69d11c4f839..62537d5488e23 100644 --- a/paddle/phi/kernels/funcs/pooling.cu +++ b/paddle/phi/kernels/funcs/pooling.cu @@ -1005,12 +1005,16 @@ template class MaxPool2dGradFunctor; template class Pool2dFunctor, float>; template class Pool2dFunctor, float>; +template class Pool2dFunctor, float>; template class Pool2dGradFunctor, float>; template class Pool2dGradFunctor, float>; +template class Pool2dGradFunctor, float>; template class Pool2dFunctor, double>; template class Pool2dFunctor, double>; +template class Pool2dFunctor, double>; template class Pool2dGradFunctor, double>; template class Pool2dGradFunctor, double>; +template class Pool2dGradFunctor, double>; template class Pool2dFunctor, @@ -1018,24 +1022,36 @@ template class Pool2dFunctor, dtype::float16>; +template class Pool2dFunctor, + dtype::float16>; template class Pool2dGradFunctor, dtype::float16>; template class Pool2dGradFunctor, dtype::float16>; +template class Pool2dGradFunctor, + dtype::float16>; template class Pool2dFunctor, dtype::bfloat16>; template class Pool2dFunctor, dtype::bfloat16>; +template class Pool2dFunctor, + dtype::bfloat16>; template class Pool2dGradFunctor, dtype::bfloat16>; template class Pool2dGradFunctor, dtype::bfloat16>; +template class Pool2dGradFunctor, + dtype::bfloat16>; template __global__ void KernelPool3D(const int nthreads, diff --git a/paddle/phi/kernels/funcs/pooling.h b/paddle/phi/kernels/funcs/pooling.h index 3e91175e8a392..325116ce0cf7e 100644 --- a/paddle/phi/kernels/funcs/pooling.h +++ b/paddle/phi/kernels/funcs/pooling.h @@ -68,6 +68,27 @@ class AvgPool { } }; +template +class LPPool { + using MT = typename dtype::MPTypeTrait::Type; + MT intermediate_res; + float norm_type; + + public: + HOSTDEVICE inline void setNormType(float ntype) { norm_type = ntype; } + DEVICE inline T initial() { + intermediate_res = static_cast(0.0f); + return static_cast(0); + } + DEVICE inline void compute(const T& x, T* y UNUSED) { + intermediate_res += static_cast(powf(x, norm_type)); + } + + DEVICE inline void finalize(const T& pool_field UNUSED, T* y) { + *y = static_cast(powf(intermediate_res, 1.0 / norm_type)); + } +}; + template class MaxPoolGrad { public: @@ -88,6 +109,21 @@ class AvgPoolGrad { } }; +template +class LPPoolGrad { + float norm_type; + + public: + static constexpr bool use_x = true; + HOSTDEVICE inline void setNormType(float ntype) { norm_type = ntype; } + HOSTDEVICE inline void compute( + const T& x, const T& y, const T& dy, T scale UNUSED, T* dx) { + *dx += static_cast(static_cast(dy) * + powf(static_cast(x) / static_cast(y), + norm_type - 1.0f)); + } +}; + /* used for adaptive pool to calculate start and end index of each divided grid */ HOSTDEVICE inline int AdaptStartIndex(int ph, int input_size, int output_size) { diff --git a/paddle/phi/kernels/funcs/segment_pooling.cc b/paddle/phi/kernels/funcs/segment_pooling.cc index 9af1211b9a144..31c67cace95d6 100644 --- a/paddle/phi/kernels/funcs/segment_pooling.cc +++ b/paddle/phi/kernels/funcs/segment_pooling.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/kernels/funcs/eigen/common.h" -namespace phi { -namespace funcs { +namespace phi::funcs { using Tensor = DenseTensor; @@ -168,5 +167,4 @@ template class SegmentPoolGradFunctor; template class SegmentPoolGradFunctor; template class SegmentPoolGradFunctor; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/sequence2batch.cc b/paddle/phi/kernels/funcs/sequence2batch.cc index 3e30bca02d8a4..924fb15c77218 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cc +++ b/paddle/phi/kernels/funcs/sequence2batch.cc @@ -14,8 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/sequence2batch.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template class CopyMatrixRowsFunctor { @@ -76,5 +75,4 @@ template class LoDTensor2BatchFunctor; template class Batch2LoDTensorFunctor; template class Batch2LoDTensorFunctor; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/sequence_pooling.cc b/paddle/phi/kernels/funcs/sequence_pooling.cc index f4ee9c323366e..1fdaadfea01a1 100644 --- a/paddle/phi/kernels/funcs/sequence_pooling.cc +++ b/paddle/phi/kernels/funcs/sequence_pooling.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/jit/kernels.h" #include "paddle/phi/kernels/funcs/math_function.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template ; template class SequencePoolGradFunctor; template class SequencePoolGradFunctor; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/funcs/softmax.cc b/paddle/phi/kernels/funcs/softmax.cc index 2d8dffc3aec6d..ce41590b84420 100644 --- a/paddle/phi/kernels/funcs/softmax.cc +++ b/paddle/phi/kernels/funcs/softmax.cc @@ -17,13 +17,11 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/kernels/funcs/softmax_impl.h" -namespace phi { -namespace funcs { +namespace phi::funcs { template class SoftmaxFunctor; template class SoftmaxFunctor; template class SoftmaxGradFunctor; template class SoftmaxGradFunctor; -} // namespace funcs -} // namespace phi +} // namespace phi::funcs diff --git a/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_grad_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_grad_kernel.cc new file mode 100644 index 0000000000000..818722a224867 --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_grad_kernel.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h" +PD_REGISTER_KERNEL(fused_elemwise_activation_grad, + CPU, + ALL_LAYOUT, + phi::FusedElemwiseActivationGradKernel, + float, + double) {} + +PD_REGISTER_KERNEL(fused_elemwise_add_activation_grad, + CPU, + ALL_LAYOUT, + phi::FusedElemwiseAddActivationGradKernel, + float, + double) {} diff --git a/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_kernel.cc new file mode 100644 index 0000000000000..9eb7668fb054e --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/fused_elemwise_activation_kernel.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h" +PD_REGISTER_KERNEL(fused_elemwise_activation, + CPU, + ALL_LAYOUT, + phi::FusedElemwiseActivationKernel, + float, + double) {} + +PD_REGISTER_KERNEL(fused_elemwise_add_activation, + CPU, + ALL_LAYOUT, + phi::FusedElemwiseAddActivationKernel, + float, + double) {} diff --git a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_kernel.cc index 851affbb21f0f..536b127b5bd71 100644 --- a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_kernel.cc @@ -16,8 +16,7 @@ #include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/softmax_kernel.h" -namespace phi { -namespace fusion { +namespace phi::fusion { template void FusedSoftmaxMaskKernel(const Context& dev_ctx, @@ -57,8 +56,7 @@ void FusedSoftmaxMaskKernel(const Context& dev_ctx, SoftmaxKernel(dev_ctx, t, 3, out); // axis for softmax } -} // namespace fusion -} // namespace phi +} // namespace phi::fusion PD_REGISTER_KERNEL(fused_softmax_mask, CPU, diff --git a/paddle/phi/kernels/fusion/cpu/fusion_lstm_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_lstm_kernel.cc new file mode 100644 index 0000000000000..522d7b77b559c --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/fusion_lstm_kernel.cc @@ -0,0 +1,443 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/kernels/funcs/fc_functor.h" +#include "paddle/phi/kernels/funcs/jit/kernels.h" +#include "paddle/phi/kernels/funcs/sequence2batch.h" + +namespace phi { + +#define INIT_BASE_DEFINES \ + auto *x = &x_in; \ + auto *h0 = h0_in.get_ptr(); \ + auto *c0 = c0_in.get_ptr(); \ + auto *wx = &weight_x_in; \ + auto *wh = &weight_h_in; \ + auto *bias = &bias_in; \ + auto *hidden_out = hidden; \ + auto *cell_out = cell; \ + auto x_dims = x->dims(); /* T x M*/ \ + auto wh_dims = wh->dims(); /* D x 4D*/ \ + const int M = x_dims[1]; \ + const int D = wh_dims[0]; \ + const int D4 = wh_dims[1] + +#define INIT_OTHER_DEFINES \ + const T *x_data = x->data(); \ + const T *wx_data = wx->data(); \ + const T *wh_data = wh->data(); \ + /* diagonal weight*/ \ + const T *wp_data = bias->data() + D4; \ + /* for peephole only*/ \ + T *checked_cell_data = nullptr; \ + if (use_peepholes) { \ + /* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \ + checked_cell_data = dev_ctx.template Alloc(checked_cell); \ + } \ + const phi::jit::lstm_attr_t attr( \ + D, \ + phi::jit::to_kerneltype(gate_activation), \ + phi::jit::to_kerneltype(candidate_activation), \ + phi::jit::to_kerneltype(cell_activation), \ + use_peepholes); \ + phi::jit::lstm_t one_step; \ + one_step.wp = wp_data; \ + one_step.checked = checked_cell_data; \ + auto ComputeC1H1 = phi::jit::KernelFuncs, \ + phi::CPUPlace>::Cache() \ + .At(attr); \ + auto ComputeCtHt = phi::jit::KernelFuncs, \ + phi::CPUPlace>::Cache() \ + .At(attr) + +// Wh GEMM +#define GEMM_WH_ADDON(bs, prev, out) \ + blas.GEMM(CblasNoTrans, \ + CblasNoTrans, \ + bs, \ + D4, \ + D, \ + static_cast(1), \ + prev, \ + D, \ + wh_data, \ + D4, \ + static_cast(1), \ + out, \ + D4) + +template +void SeqCompute(const Context &dev_ctx, + const DenseTensor &x_in, + const DenseTensor &weight_x_in, + const DenseTensor &weight_h_in, + const DenseTensor &bias_in, + const paddle::optional &h0_in, + const paddle::optional &c0_in, + bool use_peepholes, + bool is_reverse, + bool use_seq, + const std::string &gate_activation, + const std::string &cell_activation, + const std::string &candidate_activation, + float scale_data, + float shift_data, + const std::vector &scale_weights, + bool force_fp32_output, + DenseTensor *hidden, + DenseTensor *cell, + DenseTensor *xx, + DenseTensor *batched_input, + DenseTensor *batched_hidden, + DenseTensor *batched_cell, + DenseTensor *reordered_h0, + DenseTensor *reordered_c0, + DenseTensor *checked_cell) { + INIT_BASE_DEFINES; + INIT_OTHER_DEFINES; + auto x_lod = x->lod(); + const int total_T = static_cast(x_dims[0]); + const int N = static_cast(x_lod[0].size() - 1); + const T *h0_data = h0 ? h0->data() : nullptr; + const T *c0_data = c0 ? c0->data() : nullptr; + T *xx_data = dev_ctx.template Alloc(xx); + T *h_out_data = dev_ctx.template Alloc(hidden_out); + T *c_out_data = dev_ctx.template Alloc(cell_out); + auto blas = phi::funcs::GetBlas(dev_ctx); + + phi::funcs::FCFunctor fc; + fc(dev_ctx, total_T, D4, M, x_data, wx_data, xx_data, bias->data()); + + int xx_offset = D4; + int gate_offset = D; + if (is_reverse) { + const int offset = (total_T - 1) * D; + xx_data = xx_data + offset * 4; + h_out_data = h_out_data + offset; + c_out_data = c_out_data + offset; + xx_offset = -D4; + gate_offset = -D; + } + + for (int i = 0; i < N; ++i) { + int bid = is_reverse ? N - 1 - i : i; + int seq_len = static_cast(x_lod[0][bid + 1] - x_lod[0][bid]); + const T *prev_c_data = nullptr; + const T *prev_h_data = nullptr; + int tstart = 0; + if (h0_data) { + prev_h_data = h0_data + bid * D; + prev_c_data = c0_data + bid * D; + } else { + one_step.gates = xx_data; + one_step.ct = c_out_data; + one_step.ht = h_out_data; + ComputeC1H1(&one_step, &attr); + tstart = 1; + // move one step + prev_h_data = h_out_data; + prev_c_data = c_out_data; + xx_data = xx_data + xx_offset; + h_out_data = h_out_data + gate_offset; + c_out_data = c_out_data + gate_offset; + } + for (int step = tstart; step < seq_len; ++step) { + GEMM_WH_ADDON(1, prev_h_data, xx_data); + + one_step.gates = xx_data; + one_step.ct_1 = prev_c_data; + one_step.ct = c_out_data; + one_step.ht = h_out_data; + ComputeCtHt(&one_step, &attr); + // move one step + prev_h_data = h_out_data; + prev_c_data = c_out_data; + xx_data = xx_data + xx_offset; + h_out_data = h_out_data + gate_offset; + c_out_data = c_out_data + gate_offset; + } + } +} + +template +void BatchCompute(const Context &dev_ctx, + const DenseTensor &x_in, + const DenseTensor &weight_x_in, + const DenseTensor &weight_h_in, + const DenseTensor &bias_in, + const paddle::optional &h0_in, + const paddle::optional &c0_in, + bool use_peepholes, + bool is_reverse, + bool use_seq, + const std::string &gate_activation, + const std::string &cell_activation, + const std::string &candidate_activation, + float scale_data, + float shift_data, + const std::vector &scale_weights, + bool force_fp32_output, + DenseTensor *hidden, + DenseTensor *cell, + DenseTensor *xx, + DenseTensor *batched_input, + DenseTensor *batched_hidden, + DenseTensor *batched_cell, + DenseTensor *reordered_h0, + DenseTensor *reordered_c0, + DenseTensor *checked_cell) { + INIT_BASE_DEFINES; + if (x->lod()[0].size() == 2) { + xx->Resize({x_dims[0], D4}); + SeqCompute(dev_ctx, + x_in, + weight_x_in, + weight_h_in, + bias_in, + h0_in, + c0_in, + use_peepholes, + is_reverse, + use_seq, + gate_activation, + cell_activation, + candidate_activation, + scale_data, + shift_data, + scale_weights, + force_fp32_output, + hidden, + cell, + xx, + batched_input, + batched_hidden, + batched_cell, + reordered_h0, + reordered_c0, + checked_cell); + return; + } + INIT_OTHER_DEFINES; + + auto *batched_c_out = batched_cell; + auto *batched_h_out = batched_hidden; + T *xx_data = dev_ctx.template Alloc(xx); + T *batched_input_data = dev_ctx.template Alloc(batched_input); + T *batched_c_out_data = dev_ctx.template Alloc(batched_c_out); + T *batched_h_out_data = dev_ctx.template Alloc(batched_h_out); + dev_ctx.template Alloc(hidden_out); + dev_ctx.template Alloc(cell_out); + + phi::funcs::LoDTensor2BatchFunctor to_batch; + auto blas = phi::funcs::GetBlas(dev_ctx); + phi::funcs::FCFunctor fc; + if (M > D4) { + fc(dev_ctx, x_dims[0], D4, M, x_data, wx_data, xx_data, bias->data()); + to_batch(dev_ctx, *xx, batched_input, true, is_reverse); + } else { + to_batch(dev_ctx, *x, xx, true, is_reverse); + batched_input->set_lod(xx->lod()); + fc(dev_ctx, + x_dims[0], + D4, + M, + xx_data, + wx_data, + batched_input_data, + bias->data()); + } + + auto batched_lod = batched_input->lod(); + const auto &seq_order = batched_lod[2]; + const int max_bs = static_cast(seq_order.size()); + reordered_h0->Resize({max_bs, D}); + reordered_c0->Resize({max_bs, D}); + + int tstart = 0; + T *prev_h_data = nullptr; + T *prev_c_data = nullptr; + if (h0) { + // reorder h0, c0 + T *reordered_h0_data = dev_ctx.template Alloc(reordered_h0); + T *reordered_c0_data = dev_ctx.template Alloc(reordered_c0); + const T *h0_data = h0->data(); + const T *c0_data = c0->data(); + prev_h_data = reordered_h0_data; + prev_c_data = reordered_c0_data; + size_t sz = D; + for (int i = 0; i < max_bs; ++i) { + blas.VCOPY(sz, h0_data + seq_order[i] * D, reordered_h0_data); + blas.VCOPY(sz, c0_data + seq_order[i] * D, reordered_c0_data); + reordered_h0_data += D; + reordered_c0_data += D; + } + } else { + // compute without h0, c0 + T *cur_in_data = batched_input_data; + T *cur_h_out_data = batched_h_out_data; + T *cur_c_out_data = batched_c_out_data; + for (int i = 0; i < max_bs; ++i) { + one_step.gates = cur_in_data; + one_step.ct = cur_c_out_data; + one_step.ht = cur_h_out_data; + ComputeC1H1(&one_step, &attr); + + cur_in_data += D4; + cur_c_out_data += D; + cur_h_out_data += D; + } + tstart = 1; + prev_h_data = batched_h_out_data; + prev_c_data = batched_c_out_data; + } + + // compute kernel part + const auto &batch_starts = batched_lod[0]; + const int max_seq_len = static_cast(batch_starts.size() - 1); + const int offset = tstart * max_bs * D; + batched_input_data = batched_input_data + offset * 4; + batched_h_out_data = batched_h_out_data + offset; + batched_c_out_data = batched_c_out_data + offset; + for (int step = tstart; step < max_seq_len; ++step) { + const int cur_bs = + static_cast(batch_starts[step + 1] - batch_starts[step]); + GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); + T *cur_in_data = batched_input_data; + T *cur_prev_c_data = prev_c_data; + T *cur_c_out_data = batched_c_out_data; + T *cur_h_out_data = batched_h_out_data; + for (int i = 0; i < cur_bs; ++i) { + one_step.gates = cur_in_data; + one_step.ct_1 = cur_prev_c_data; + one_step.ct = cur_c_out_data; + one_step.ht = cur_h_out_data; + ComputeCtHt(&one_step, &attr); + + // move one batch + cur_in_data += D4; + cur_prev_c_data += D; + cur_c_out_data += D; + cur_h_out_data += D; + } + // move one step + prev_c_data = batched_c_out_data; + prev_h_data = batched_h_out_data; + batched_c_out_data = cur_c_out_data; + batched_h_out_data = cur_h_out_data; + batched_input_data = cur_in_data; + } + + phi::funcs::Batch2LoDTensorFunctor to_seq; + batched_h_out->set_lod(batched_lod); + to_seq(dev_ctx, *batched_h_out, hidden_out); + batched_c_out->set_lod(batched_lod); + to_seq(dev_ctx, *batched_c_out, cell_out); +} + +template +void FusionLSTMKernel(const Context &dev_ctx, + const DenseTensor &x_in, + const DenseTensor &weight_x_in, + const DenseTensor &weight_h_in, + const DenseTensor &bias_in, + const paddle::optional &h0_in, + const paddle::optional &c0_in, + bool use_peepholes, + bool is_reverse, + bool use_seq, + const std::string &gate_activation, + const std::string &cell_activation, + const std::string &candidate_activation, + float scale_data, + float shift_data, + const std::vector &scale_weights, + bool force_fp32_output, + DenseTensor *hidden, + DenseTensor *cell, + DenseTensor *xx, + DenseTensor *batched_input, + DenseTensor *batched_hidden, + DenseTensor *batched_cell, + DenseTensor *reordered_h0, + DenseTensor *reordered_c0, + DenseTensor *checked_cell) { + if (use_seq) { + SeqCompute(dev_ctx, + x_in, + weight_x_in, + weight_h_in, + bias_in, + h0_in, + c0_in, + use_peepholes, + is_reverse, + use_seq, + gate_activation, + cell_activation, + candidate_activation, + scale_data, + shift_data, + scale_weights, + force_fp32_output, + hidden, + cell, + xx, + batched_input, + batched_hidden, + batched_cell, + reordered_h0, + reordered_c0, + checked_cell); + } else { + BatchCompute(dev_ctx, + x_in, + weight_x_in, + weight_h_in, + bias_in, + h0_in, + c0_in, + use_peepholes, + is_reverse, + use_seq, + gate_activation, + cell_activation, + candidate_activation, + scale_data, + shift_data, + scale_weights, + force_fp32_output, + hidden, + cell, + xx, + batched_input, + batched_hidden, + batched_cell, + reordered_h0, + reordered_c0, + checked_cell); + } +} + +#undef GEMM_WH_ADDON +#undef INIT_OTHER_DEFINES +#undef INIT_BASE_DEFINES + +} // namespace phi + +PD_REGISTER_KERNEL( + fusion_lstm, CPU, ALL_LAYOUT, phi::FusionLSTMKernel, float, double) {} diff --git a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc index 4ff18849316d8..456d3370990cb 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc @@ -23,8 +23,7 @@ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/fc_functor.h" -namespace phi { -namespace fusion { +namespace phi::fusion { template void FusionSeqConvEltAddReluKernel(const Context& dev_ctx, @@ -148,8 +147,7 @@ void FusionSeqConvEltAddReluKernel(const Context& dev_ctx, true); } -} // namespace fusion -} // namespace phi +} // namespace phi::fusion PD_REGISTER_KERNEL(fusion_seqconv_eltadd_relu, CPU, diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt b/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt index d760ce773c135..abcf220aa5c54 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.23) +cmake_minimum_required(VERSION 3.18) if(NOT DEFINED PYTHON_EXECUTABLE) message( diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh b/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh index eb13c7dd6723d..b945df846ffe4 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/compile.sh @@ -1,11 +1,11 @@ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,21 +13,38 @@ # limitations under the License. set -e -cutlass_repo_directory="cutlass" -if [ ! -d "$cutlass_repo_directory" ]; then - git clone --branch v3.0.0 https://github.com/NVIDIA/cutlass -fi - build_directory="build" if [ ! -d "$build_directory" ]; then mkdir $build_directory fi -python_exe_path="python" -cuda_root_path="/usr/local/cuda" -gpu_cc="80" +libname="$build_directory/libCutlassConv2d.so" +if [ -e "$libname" ]; then + exit 0 +fi + +default_python_exe_path="/usr/bin/python" +default_cuda_root_path="/usr/local/cuda" +default_gpu_cc="80" +default_cmake_command="cmake" + +python_exe_path="${1:-$default_python_exe_path}" +cuda_root_path="${2:-$default_cuda_root_path}" +gpu_cc="${3:-$default_gpu_cc}" +cmake_command="${4:-$default_cmake_command}" + +case "$gpu_cc" in + 75|80|86|89) ;; + *) exit 0 ;; +esac + +cutlass_repo_directory="cutlass" +if [ ! -d "$cutlass_repo_directory" ]; then + git clone --branch v3.0.0 https://github.com/NVIDIA/cutlass +fi + cd $build_directory -cmake .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc -make -j +$cmake_command .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc +make -j8 cd - diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt index 6ad5035e9dcd6..fc9cfa1cfd919 100644 --- a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt +++ b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.23) +cmake_minimum_required(VERSION 3.18) if(NOT DEFINED PYTHON_EXECUTABLE) message( diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh index f8a5463239a95..07736079b72a3 100644 --- a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh +++ b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/compile.sh @@ -1,11 +1,11 @@ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,21 +13,38 @@ # limitations under the License. set -e -cutlass_repo_directory="cutlass" -if [ ! -d "$cutlass_repo_directory" ]; then - git clone --branch v2.11.0 https://github.com/NVIDIA/cutlass -fi - build_directory="build" if [ ! -d "$build_directory" ]; then mkdir $build_directory fi -python_exe_path="/usr/bin/python" -cuda_root_path="/usr/local/cuda" -gpu_cc="80" +libname="$build_directory/libCutlassGemmEpilogue.so" +if [ -e "$libname" ]; then + exit 0 +fi + +default_python_exe_path="/usr/bin/python" +default_cuda_root_path="/usr/local/cuda" +default_gpu_cc="80" +default_cmake_command="cmake" + +python_exe_path="${1:-$default_python_exe_path}" +cuda_root_path="${2:-$default_cuda_root_path}" +gpu_cc="${3:-$default_gpu_cc}" +cmake_command="${4:-$default_cmake_command}" + +case "$gpu_cc" in + 80|86|89) ;; + *) exit 0 ;; +esac + +cutlass_repo_directory="cutlass" +if [ ! -d "$cutlass_repo_directory" ]; then + git clone --branch v2.11.0 https://github.com/NVIDIA/cutlass +fi + cd $build_directory -cmake .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc -make -j +$cmake_command .. -DPYTHON_EXECUTABLE=$python_exe_path -DCUDA_TOOLKIT_ROOT_DIR=$cuda_root_path -DCOMPUTE_CAPABILITY=$gpu_cc +make -j8 cd - diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h index 8f1be5983f646..8b36a43fdf843 100644 --- a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h +++ b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_util.h @@ -13,6 +13,8 @@ // limitations under the License. #pragma once +#include +#include #include #include "paddle/phi/kernels/fusion/cutlass/gemm_epilogue/gemm_epilogue_decl.h" diff --git a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu index 9b18bbe0e9220..c552f1a00d763 100644 --- a/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu +++ b/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu @@ -204,6 +204,5 @@ PD_REGISTER_KERNEL(gemm_epilogue, GPU, ALL_LAYOUT, phi::fusion::cutlass_internal::GemmEpilogueKernel, - float, phi::dtype::bfloat16, phi::dtype::float16) {} diff --git a/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu new file mode 100644 index 0000000000000..456fa415e4873 --- /dev/null +++ b/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu @@ -0,0 +1,33 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h" + +PD_REGISTER_KERNEL(fused_elemwise_activation_grad, + GPU, + ALL_LAYOUT, + phi::FusedElemwiseActivationGradKernel, + float, + double, + phi::dtype::float16) {} + +PD_REGISTER_KERNEL(fused_elemwise_add_activation_grad, + GPU, + ALL_LAYOUT, + phi::FusedElemwiseAddActivationGradKernel, + float, + double, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu new file mode 100644 index 0000000000000..8bd925bbe0264 --- /dev/null +++ b/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu @@ -0,0 +1,33 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h" + +PD_REGISTER_KERNEL(fused_elemwise_activation, + GPU, + ALL_LAYOUT, + phi::FusedElemwiseActivationKernel, + float, + double, + phi::dtype::float16) {} + +PD_REGISTER_KERNEL(fused_elemwise_add_activation, + GPU, + ALL_LAYOUT, + phi::FusedElemwiseAddActivationKernel, + float, + double, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index c46d7e77c8420..4f3da493fb4e7 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -15,8 +15,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace fusion { +namespace phi::fusion { template void FusedElementwiseKernel(const OneDNNContext& dev_ctx, @@ -177,8 +176,7 @@ DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedSubtract, dnnl::algorithm::binary_sub) DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedMultiply, dnnl::algorithm::binary_mul) DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedDivide, dnnl::algorithm::binary_div) -} // namespace fusion -} // namespace phi +} // namespace phi::fusion PD_REGISTER_KERNEL(fused_elementwise_add, OneDNN, diff --git a/paddle/phi/kernels/fusion/onednn/fused_softplus_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_softplus_kernel.cc index 56e9a93c59dd2..cf557e7087f4b 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_softplus_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_softplus_kernel.cc @@ -17,8 +17,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace fusion { +namespace phi::fusion { template void FusedSoftplusKernel(const Context& dev_ctx, @@ -56,8 +55,7 @@ void FusedSoftplusKernel(const Context& dev_ctx, out->set_mem_desc(dst_memory_p->get_desc()); } -} // namespace fusion -} // namespace phi +} // namespace phi::fusion PD_REGISTER_KERNEL(fused_softplus, OneDNN, diff --git a/paddle/phi/kernels/fusion/onednn/fusion_lstm_kernel.cc b/paddle/phi/kernels/fusion/onednn/fusion_lstm_kernel.cc new file mode 100644 index 0000000000000..02a3fd7fc3fb9 --- /dev/null +++ b/paddle/phi/kernels/fusion/onednn/fusion_lstm_kernel.cc @@ -0,0 +1,573 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/convert_utils.h" +#include "paddle/phi/core/expect.h" +#include "paddle/phi/kernels/fusion/onednn/fusion_rnn_onednn.h" +#include "paddle/utils/optional.h" + +namespace phi { +namespace fusion { + +using phi::OneDNNContext; +using phi::funcs::OneDNNGetDataType; +using phi::funcs::OneDNNMemDesc; +using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; + +template +class LSTMMKLDNNHandler + : public RNNMKLDNNHandler { + public: + LSTMMKLDNNHandler(const OneDNNContext& dev_ctx, + const dnnl::engine onednn_engine, + phi::Place cpu_place UNUSED, + const phi::DenseTensor* input, + const phi::DenseTensor* weight_h, + const phi::DenseTensor* h0, + const phi::DenseTensor* c0 UNUSED, + const bool is_reverse, + const int64_t N, + const int64_t Ti, + const int64_t IC, + const int64_t OC, + const std::string& unique_name UNUSED, + float scale_data, + float shift_data, + std::vector scale_weights, + bool use_peepholes, + std::string gate_activation, + std::string cell_activation, + std::string candidate_activation) + : RNNMKLDNNHandler(dev_ctx, + onednn_engine, + dev_ctx.GetPlace(), + input, + weight_h, + h0, + is_reverse, + N, + Ti, + IC, + OC, + 4, + "x_weight_h", + scale_data, + shift_data, + scale_weights) { + if (unlikely(!this->isCached())) { + const bool is_INT8 = std::is_same::value; + // oneDNN kernel has hardcoded activation functions + PADDLE_ENFORCE_EQ( + gate_activation, + "sigmoid", + phi::errors::Unimplemented("oneDNN fusion_lstm supports only " + "sigmoid as a gate activation.")); + PADDLE_ENFORCE_EQ( + cell_activation, + "tanh", + phi::errors::Unimplemented( + "oneDNN fusion_lstm supports only tanh as a cell activation.")); + PADDLE_ENFORCE_EQ( + candidate_activation, + "tanh", + phi::errors::Unimplemented( + "oneDNN fusion_lstm supports only tanh a candidate activation.")); + + // Weights for int8 kernel are of a type s8 + const auto weights_dt = + is_INT8 ? dnnl::memory::data_type::s8 : OneDNNGetDataType(); + + // oneDNN RNN dimensions + const int64_t D = 1; // Directions + const int64_t L = 1; // Layers (PP supports only 1 stacked layer) + const int64_t G = 4; // Number of Gates, 4 for LSTM + + // Create memory descriptors + auto input_md = OneDNNMemDesc( + {Ti, N, IC}, OneDNNGetDataType(), OneDNNMemoryFormat::tnc); + auto weight_x_md = + OneDNNMemDesc({L, D, IC, G, OC}, weights_dt, OneDNNMemoryFormat::any); + auto weight_h_md = + OneDNNMemDesc({L, D, OC, G, OC}, weights_dt, OneDNNMemoryFormat::any); + auto bias_md = OneDNNMemDesc( + {L, D, G, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::ldgo); + auto hidden_md = OneDNNMemDesc( + {Ti, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); + + auto h0_md = OneDNNMemDesc( + {L, D, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); + auto c0_md = OneDNNMemDesc( + {L, D, N, OC}, OneDNNGetDataType(), OneDNNMemoryFormat::any); + + // Create LSTM oneDNN primitive + const auto direction = + is_reverse ? dnnl::rnn_direction::unidirectional_right2left + : dnnl::rnn_direction::unidirectional_left2right; + if (!use_peepholes) { + this->AcquireForwardPrimitiveDescriptor( + this->attr_, + dnnl::prop_kind::forward_inference, + direction, + input_md, + h0_md, + c0_md, + weight_x_md, + weight_h_md, + bias_md, + hidden_md, + dnnl::memory::desc(), + dnnl::memory::desc()); + } else { + auto weight_peephole_md = OneDNNMemDesc({L, D, 3, OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldgo); + this->AcquireForwardPrimitiveDescriptor( + this->attr_, + dnnl::prop_kind::forward_inference, + direction, + input_md, + h0_md, + c0_md, + weight_x_md, + weight_h_md, + weight_peephole_md, + bias_md, + hidden_md, + dnnl::memory::desc(), + dnnl::memory::desc()); + } + } + } + + // PaddlePaddle has different order of weights than oneDNN, so a reorder is + // needed + // PaddlePaddle: {c, i, f, o} + // oneDNN: {i, f, c, o} + template + void ReorderGates(U* weights, int64_t I) { + size_t inner_block_size = this->OC; + size_t block_size = inner_block_size * this->G; + for (size_t i = 0; i < (size_t)I; ++i) { // NOLINT + size_t offset = i * block_size; + + U* base_pos = weights + offset; + std::swap_ranges(base_pos, + base_pos + inner_block_size, + base_pos + inner_block_size); // c <-> i + std::swap_ranges(base_pos + inner_block_size, + base_pos + 2 * inner_block_size, + base_pos + 2 * inner_block_size); // c <-> f + } + } + + template + std::shared_ptr AcquireWeightXMemory( + const phi::DenseTensor* weight_x) { + const std::string wx_key = this->memory_key_ + "@weight_x"; + auto memory_p = + std::static_pointer_cast(this->dev_ctx_.GetBlob(wx_key)); + + if (!memory_p) { + auto user_md = OneDNNMemDesc({1, 1, this->IC, this->G, this->OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldigo); + auto user_memory = dnnl::memory(user_md, this->engine_); + + auto* weight_x_data = reinterpret_cast(user_memory.get_data_handle()); + memcpy(weight_x_data, + weight_x->data(), + sizeof(U) * this->IC * this->G * this->OC); + + ReorderGates(weight_x_data, this->IC); + + memory_p = std::make_shared( + this->fwd_pd_->weights_layer_desc(), this->engine_); + + auto& astream = OneDNNContext::tls().get_stream(); + dnnl::reorder(user_memory, *memory_p, this->attr_) + .execute(astream, user_memory, *memory_p); + + this->dev_ctx_.SetBlob(wx_key, memory_p); + } + return memory_p; + } + + template + std::shared_ptr AcquireWeightHMemory( + const phi::DenseTensor* weight_h) { + const std::string wh_key = this->memory_key_ + "@weight_h"; + auto memory_p = + std::static_pointer_cast(this->dev_ctx_.GetBlob(wh_key)); + + if (!memory_p) { + auto user_md = OneDNNMemDesc({1, 1, this->OC, this->G, this->OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldigo); + auto user_memory = dnnl::memory(user_md, this->engine_); + + auto* weight_h_data = reinterpret_cast(user_memory.get_data_handle()); + memcpy(weight_h_data, + weight_h->data(), + sizeof(U) * this->OC * this->G * this->OC); + + ReorderGates(weight_h_data, this->OC); + + memory_p = std::make_shared( + this->fwd_pd_->weights_iter_desc(), this->engine_); + + auto& astream = OneDNNContext::tls().get_stream(); + dnnl::reorder(user_memory, *memory_p, this->attr_) + .execute(astream, user_memory, *memory_p); + + this->dev_ctx_.SetBlob(wh_key, memory_p); + } + return memory_p; + } + + std::shared_ptr AcquireBiasMemory( + const phi::DenseTensor* bias) { + const std::string bias_key = this->memory_key_ + "@bias"; + auto memory_p = std::static_pointer_cast( + this->dev_ctx_.GetBlob(bias_key)); + + if (!memory_p) { + memory_p = std::make_shared(this->fwd_pd_->bias_desc(), + this->engine_); + auto* bias_data = reinterpret_cast(memory_p->get_data_handle()); + if (bias) { + const float* user_bias_data = + bias->data(); // Bias in oneDNN is always float + + memcpy(bias_data, user_bias_data, sizeof(float) * this->G * this->OC); + + ReorderGates(bias_data, 1); + } else { + // oneDNN always need bias memory, if it's not provided in PP, let + // oneDNN allocate memory and set it to 0 + memset(bias_data, 0, sizeof(float) * this->G * this->OC); + } + + this->dev_ctx_.SetBlob(bias_key, memory_p); + } + return memory_p; + } + + std::shared_ptr AcquirePeepholeWeights( + const phi::DenseTensor* bias) { + const std::string peepholes_key = this->memory_key_ + "@peepholes_weights"; + auto memory_p = std::static_pointer_cast( + this->dev_ctx_.GetBlob(peepholes_key)); + + if (!memory_p) { + auto user_md = OneDNNMemDesc({1, 1, 3, this->OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldgo); + auto user_memory = dnnl::memory(user_md, this->engine_); + memory_p = std::make_shared( + this->fwd_pd_->weights_peephole_desc(), this->engine_); + auto* peephole_weights_data = + reinterpret_cast(memory_p->get_data_handle()); + + const float* user_bias_data = + bias->data(); // Bias in oneDNN is always float + memcpy(peephole_weights_data, + user_bias_data + 4 * this->OC, + sizeof(float) * 3 * this->OC); + + this->dev_ctx_.SetBlob(peepholes_key, memory_p); + } + return memory_p; + } + + std::shared_ptr AcquireC0Memory(const phi::DenseTensor* c0) { + const std::string c0_key = this->memory_key_ + "@c0"; + auto memory_p = + std::static_pointer_cast(this->dev_ctx_.GetBlob(c0_key)); + + if (!memory_p) { + auto user_c0_memory = dnnl::memory(); + if (c0) { + user_c0_memory = + dnnl::memory({{1, 1, this->N, this->OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldnc}, + this->engine_, + phi::funcs::to_void_cast(c0->data())); + } else { + user_c0_memory = dnnl::memory({{1, 1, this->N, this->OC}, + OneDNNGetDataType(), + OneDNNMemoryFormat::ldnc}, + this->engine_); + memset(user_c0_memory.get_data_handle(), + 0, + sizeof(float) * this->N * this->OC); + } + memory_p = std::make_shared( + this->fwd_pd_->src_iter_c_desc(), this->engine_); + + auto& astream = OneDNNContext::tls().get_stream(); + dnnl::reorder(user_c0_memory, *memory_p) + .execute(astream, user_c0_memory, *memory_p); + + this->dev_ctx_.SetBlob(c0_key, memory_p); + } + return memory_p; + } +}; + +template +void RunKernel(const Context& dev_ctx, + const DenseTensor& x_in, + const DenseTensor& weight_x_in, + const DenseTensor& weight_h_in, + const DenseTensor& bias_in, + const paddle::optional& h0_in, + const paddle::optional& c0_in, + bool use_peepholes, + bool is_reverse, + bool use_seq, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + float scale_data, + float shift_data, + const std::vector& scale_weights, + bool force_fp32_output, + DenseTensor* hidden, + DenseTensor* cell, + DenseTensor* xx, + DenseTensor* batched_input, + DenseTensor* batched_hidden, + DenseTensor* batched_cell, + DenseTensor* reordered_h0, + DenseTensor* reordered_c0, + DenseTensor* checked_cell) { + const auto& onednn_engine = dev_ctx.GetEngine(); + + // Get Tensors + const auto* input = &x_in; + const auto* h0 = h0_in.get_ptr(); + const auto* c0 = c0_in.get_ptr(); + const auto* weight_x = &weight_x_in; + const auto* weight_h = &weight_h_in; + const auto* bias = &bias_in; + + auto x_dims = input->dims(); + auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) + ? common::flatten_to_2d(x_dims, 1) + : x_dims; + + // Get tensor dimensions + const auto x_mat_dims_vec = common::vectorize(x_mat_dims); + const auto weight_h_dims = common::vectorize(weight_h->dims()); + const auto& input_lod = input->lod()[0]; + + // Calculate RNN dimensions + const int64_t N = input_lod.size() - 1; // Number of sentences (batches) + const int64_t Ti = // Max length of the sentence in a batch + [&input_lod]() { + size_t res = 0; + for (size_t i = 0; i < (input_lod.size() - 1); ++i) { + res = std::max(res, input_lod[i + 1] - input_lod[i]); + } + return res; + }(); + const int64_t IC = x_mat_dims_vec[1]; // Input channels + const int64_t OC = weight_h_dims[0]; // Output channels + + LSTMMKLDNNHandler handler(dev_ctx, + onednn_engine, + dev_ctx.GetPlace(), + input, + weight_h, + h0, + c0, + is_reverse, + N, + Ti, + IC, + OC, + "x_weight_h", + scale_data, + shift_data, + scale_weights, + use_peepholes, + gate_activation, + cell_activation, + candidate_activation); + + auto input_memory_p = + handler.AcquireInputMemoryWithReorder(input, is_reverse); + auto c0_memory_p = handler.AcquireC0Memory(c0); + + std::shared_ptr h0_memory_p, weight_h_memory_p, + weight_x_memory_p; + + if (weight_h->dtype() == phi::DataType::FLOAT32) { + h0_memory_p = handler.template AcquireH0Memory(h0); + weight_x_memory_p = handler.template AcquireWeightXMemory(weight_x); + weight_h_memory_p = handler.template AcquireWeightHMemory(weight_h); + } else if (weight_h->dtype() == phi::DataType::BFLOAT16) { + h0_memory_p = handler.template AcquireH0Memory(h0); + weight_x_memory_p = + handler.template AcquireWeightXMemory(weight_x); + weight_h_memory_p = + handler.template AcquireWeightHMemory(weight_h); + } else { + h0_memory_p = handler.template AcquireH0Memory(h0); + weight_x_memory_p = handler.template AcquireWeightXMemory(weight_x); + weight_h_memory_p = handler.template AcquireWeightHMemory(weight_h); + } + + auto bias_memory_p = handler.AcquireBiasMemory(bias); + auto hidden_onednn_memory_p = handler.AcquireOutputMemory(); + + std::unordered_map lstm_args = { + {DNNL_ARG_SRC_LAYER, *input_memory_p}, + {DNNL_ARG_SRC_ITER, *h0_memory_p}, + {DNNL_ARG_SRC_ITER_C, *c0_memory_p}, + {DNNL_ARG_WEIGHTS_LAYER, *weight_x_memory_p}, + {DNNL_ARG_WEIGHTS_ITER, *weight_h_memory_p}, + {DNNL_ARG_BIAS, *bias_memory_p}, + {DNNL_ARG_DST_LAYER, *hidden_onednn_memory_p}}; + + if (use_peepholes) { + auto peephole_weight_p = handler.AcquirePeepholeWeights(bias); + std::pair peepholes_weights(DNNL_ARG_WEIGHTS_PEEPHOLE, + *peephole_weight_p); + lstm_args.insert(peepholes_weights); + } + + auto lstm_forward_p = handler.AcquireForwardPrimitive(); + + auto& astream = OneDNNContext::tls().get_stream(); + lstm_forward_p->execute(astream, lstm_args); + astream.wait(); + + auto* hidden_onednn_data = hidden_onednn_memory_p->get_data_handle(); + auto* hidden_data = + phi::funcs::to_void_cast(dev_ctx.template Alloc(hidden)); + if (handler.is_NTC()) { + handler.reorderRNNdata(hidden_onednn_data, + hidden_data, + input_lod, + is_reverse, + RNNReorderType::NTC_PP); + } else { + handler.reorderRNNdata(hidden_onednn_data, + hidden_data, + input_lod, + is_reverse, + RNNReorderType::TNC_PP); + } +} + +template +void FusionLSTMMKLDNNKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& weight_x, + const DenseTensor& weight_h, + const DenseTensor& bias, + const paddle::optional& h0, + const paddle::optional& c0, + bool use_peepholes, + bool is_reverse, + bool use_seq, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + float scale_data, + float shift_data, + const std::vector& scale_weights, + bool force_fp32_output, + DenseTensor* hidden, + DenseTensor* cell, + DenseTensor* xx, + DenseTensor* batched_input, + DenseTensor* batched_hidden, + DenseTensor* batched_cell, + DenseTensor* reordered_h0, + DenseTensor* reordered_c0, + DenseTensor* checked_cell) { + const bool is_bf16 = std::is_same::value; + + // BF16 does not support force output + if (!is_bf16 && force_fp32_output) { // NOLINT + RunKernel(dev_ctx, + x, + weight_x, + weight_h, + bias, + h0, + c0, + use_peepholes, + is_reverse, + use_seq, + gate_activation, + cell_activation, + candidate_activation, + scale_data, + shift_data, + scale_weights, + force_fp32_output, + hidden, + cell, + xx, + batched_input, + batched_hidden, + batched_cell, + reordered_h0, + reordered_c0, + checked_cell); + } else { + RunKernel(dev_ctx, + x, + weight_x, + weight_h, + bias, + h0, + c0, + use_peepholes, + is_reverse, + use_seq, + gate_activation, + cell_activation, + candidate_activation, + scale_data, + shift_data, + scale_weights, + force_fp32_output, + hidden, + cell, + xx, + batched_input, + batched_hidden, + batched_cell, + reordered_h0, + reordered_c0, + checked_cell); + } +} + +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(fusion_lstm, + OneDNN, + ONEDNN, + phi::fusion::FusionLSTMMKLDNNKernel, + float, + uint8_t, + phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h b/paddle/phi/kernels/fusion/onednn/fusion_rnn_onednn.h similarity index 87% rename from paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h rename to paddle/phi/kernels/fusion/onednn/fusion_rnn_onednn.h index c04dd0cebeec0..d429f0b3944bb 100644 --- a/paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h +++ b/paddle/phi/kernels/fusion/onednn/fusion_rnn_onednn.h @@ -1,24 +1,24 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #pragma once -#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/kernel_registry.h" -namespace paddle { -namespace operators { +namespace phi { +namespace fusion { using phi::funcs::CreateKey; using phi::funcs::OneDNNGetDataType; @@ -28,8 +28,7 @@ using OneDNNMemoryFormat = dnnl::memory::format_tag; template class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { public: - RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, - const phi::OneDNNContext& dev_ctx, + RNNMKLDNNHandler(const phi::OneDNNContext& dev_ctx, const dnnl::engine onednn_engine UNUSED, phi::Place cpu_place, const phi::DenseTensor* input UNUSED, @@ -41,7 +40,10 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { const int64_t IC, const int64_t OC, const int64_t G, - const std::string& unique_name) + const std::string& unique_name, + float scale_data, + float shift_data, + std::vector scale_weights) : phi::funcs::OneDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), @@ -62,9 +64,6 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { if (is_INT8) { // Int8 attributes - const float scale_data = ctx.Attr("Scale_data"); - const float shift_data = ctx.Attr("Shift_data"); - const auto scale_weights = ctx.Attr>("Scale_weights"); const int weights_scale_mask = 0 + @@ -237,5 +236,5 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { std::string memory_key_; dnnl::primitive_attr attr_; }; -} // namespace operators -} // namespace paddle +} // namespace fusion +} // namespace phi diff --git a/paddle/phi/kernels/fusion/xpu/block_multi_head_attention_kernel.cc b/paddle/phi/kernels/fusion/xpu/block_multi_head_attention_kernel.cc new file mode 100644 index 0000000000000..42c58f60bb654 --- /dev/null +++ b/paddle/phi/kernels/fusion/xpu/block_multi_head_attention_kernel.cc @@ -0,0 +1,610 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "glog/logging.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/common/memory_utils.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/flash_attn_kernel.h" +#include "xpu/xdnn.h" + +namespace phi { +namespace fusion { + +template +int GetMaxLen(const Context& dev_ctx, + const phi::DenseTensor& seq_lens_tensor, + phi::DenseTensor* max_len_tensor, + const int batch_size) { + int max_len_cpu = 0; + int r = baidu::xpu::api::reduce_max(dev_ctx.x_context(), + seq_lens_tensor.data(), + max_len_tensor->data(), + {batch_size}, + {0}); + PD_CHECK(r == 0, "baidu::xpu::api::reduce_max failed."); + xpu_wait(dev_ctx.x_context()->xpu_stream); + r = xpu_memcpy(&max_len_cpu, + max_len_tensor->data(), + sizeof(int), + XPUMemcpyKind::XPU_DEVICE_TO_HOST); + PD_CHECK(r == 0, "xpu_memcpy failed."); + return max_len_cpu; +} + +template +void qkv_split_rope_kernel( + const Context& xpu_ctx, + const DenseTensor& qkv_input, + const DenseTensor& rotary_emb, + const DenseTensor& seq_lens, + const baidu::xpu::api::VectorParam& lods, + const baidu::xpu::api::VectorParam& pos_emb_offset, + int bsz, + int max_seq_len, + int token_num, + int num_head, + int dim_head, + DenseTensor* q_out, + DenseTensor* k_out, + DenseTensor* v_out) { + xpu::ctx_guard RAII_GUARD(xpu_ctx.x_context()); + using XPUType = typename XPUTypeTrait::Type; + auto q_data = reinterpret_cast(q_out->data()); + auto k_data = reinterpret_cast(k_out->data()); + auto v_data = reinterpret_cast(v_out->data()); + int r = baidu::xpu::api::split( + xpu_ctx.x_context(), + reinterpret_cast(qkv_input.data()), + {q_data, k_data, v_data}, + {token_num, 3, num_head * dim_head}, + {1, 1, 1}, + 1); + const_cast(&qkv_input)->clear(); + PD_CHECK(r == 0, "baidu::xpu::api::split failed."); + r = baidu::xpu::api::vsl_rotary_neox_embedding( + xpu_ctx.x_context(), + q_data, + k_data, + rotary_emb.data(), + q_data, + k_data, + lods, + 1, + max_seq_len, + num_head, + dim_head, + "BLHD", + pos_emb_offset, + "NORMAL", + -1); + PD_CHECK(r == 0, "baidu::xpu::api::vsl_rotary_neox_embedding failed."); +} + +template +void BlockMultiheadAttentionXPUKernel( + const Context& dev_ctx, + const DenseTensor& qkv, + const DenseTensor& key_cache, + const DenseTensor& value_cache, + const DenseTensor& seq_lens_encoder, + const DenseTensor& seq_lens_decoder, + const DenseTensor& seq_lens_this_time, + const DenseTensor& padding_offsets, + const DenseTensor& cum_offsets, + const DenseTensor& cu_seqlens_q, + const DenseTensor& cu_seqlens_k, + const DenseTensor& block_tables, + const DenseTensor& cache_k_per_batch_maxs, + const DenseTensor& cache_v_per_batch_maxs, + const paddle::optional& pre_key_cache, + const paddle::optional& pre_value_cache, + const paddle::optional& rope_emb, + const paddle::optional& mask, + const paddle::optional& tgt_mask, + const paddle::optional& cache_k_quant_scales, + const paddle::optional& cache_v_quant_scales, + const paddle::optional& cache_k_dequant_scales, + const paddle::optional& cache_v_dequant_scales, + const paddle::optional& qkv_out_scale, + const paddle::optional& qkv_bias, + const paddle::optional& out_shift, + const paddle::optional& out_smooth, + const paddle::optional& max_enc_len_this_time, + const paddle::optional& max_dec_len_this_time, + int max_seq_len, + int block_size, + bool use_neox_style, + const bool dynamic_cachekv_quant, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + const float out_scale, + const std::string& compute_dtype, + DenseTensor* fmha_out, + DenseTensor* qkv_out, + DenseTensor* key_cache_out, + DenseTensor* value_cache_out) { + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + auto xpu_context = dev_ctx.x_context(); + + using XPUType = typename XPUTypeTrait::Type; + + phi::DenseTensor qkv_buf; + phi::DenseTensor fmha_buf; + VLOG(3) << "fmha_out " << fmha_out->dims(); + if (out_scale <= 0) { + dev_ctx.template Alloc(fmha_out); + fmha_buf = *fmha_out; + } else { + PADDLE_THROW(phi::errors::Unimplemented("Not supports out_scale > 0.")); + } + int r = xpu::constant(xpu_context, + reinterpret_cast(fmha_buf.data()), + fmha_buf.numel(), + 0); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); + const auto& input_dims = qkv.dims(); + const auto& key_cache_dims = key_cache.dims(); + const int token_num = input_dims[0]; + const int num_head = key_cache_dims[1]; + const int dim_head = key_cache_dims[3]; + const int bsz = cum_offsets.dims()[0]; + const int max_block_per_seq = block_tables.dims()[1]; + VLOG(3) << "bsz: " << bsz << " token_num: " << token_num + << " num_head: " << num_head << " dim_head: " << dim_head + << " max_block_per_seq: " << max_block_per_seq; + VLOG(3) << "fmha_out_dims: " << fmha_out->dims(); + bool causual = true; + if (mask) { + causual = false; + } + bool use_pre_cache = false; + int pre_cache_length = 0; + if (pre_key_cache) { + PADDLE_THROW(phi::errors::Unimplemented("Not supports pre_key_cache now.")); + } + VLOG(3) << "token_num: " << token_num + << " pre_cache_length: " << pre_cache_length; + + int max_dec_len_this_time_data(0); + if (!max_dec_len_this_time) { + phi::DenseTensor max_dec_len_tensor; + max_dec_len_tensor.Resize({{1}}); + dev_ctx.template Alloc(&max_dec_len_tensor, + max_dec_len_tensor.numel() * sizeof(int)); + max_dec_len_this_time_data = + GetMaxLen(dev_ctx, seq_lens_decoder, &max_dec_len_tensor, bsz); + } else { + PADDLE_ENFORCE_EQ( + max_dec_len_this_time.get().place().GetType(), + phi::AllocationType::CPU, + errors::InvalidArgument( + "The place of input max_dec_len_this_time must be CPU, but got %s.", + max_dec_len_this_time.get().place())); + max_dec_len_this_time_data = *max_dec_len_this_time.get().data(); + } + int max_enc_len_this_time_data(0); + if (!max_enc_len_this_time) { + phi::DenseTensor max_enc_len_tensor; + max_enc_len_tensor.Resize({{1}}); + dev_ctx.template Alloc(&max_enc_len_tensor, + max_enc_len_tensor.numel() * sizeof(int)); + max_enc_len_this_time_data = + GetMaxLen(dev_ctx, seq_lens_encoder, &max_enc_len_tensor, bsz); + } else { + PADDLE_ENFORCE_EQ( + max_enc_len_this_time.get().place().GetType(), + phi::AllocationType::CPU, + errors::InvalidArgument( + "The place of input max_enc_len_this_time must be CPU, but got %s.", + max_enc_len_this_time.get().place())); + max_enc_len_this_time_data = *max_enc_len_this_time.get().data(); + } + + const int MAXPTR_N = xpu_context->max_ptr_size(); + VLOG(3) << "max_len end"; + phi::DenseTensor unpadding_q, unpadding_k, unpadding_v; + phi::DenseTensor softmax_out, softmax_lse, seed_offset; + phi::DenseTensor q_trans, k_trans, v_trans, qktv_out; + if (!use_pre_cache) { + unpadding_q.Resize({{token_num, num_head, dim_head}}); + unpadding_k.Resize({{token_num, num_head, dim_head}}); + unpadding_v.Resize({{token_num, num_head, dim_head}}); + + dev_ctx.template Alloc(&unpadding_q, unpadding_q.numel() * sizeof(T)); + dev_ctx.template Alloc(&unpadding_k, unpadding_k.numel() * sizeof(T)); + dev_ctx.template Alloc(&unpadding_v, unpadding_v.numel() * sizeof(T)); + } else { + PADDLE_THROW(phi::errors::Unimplemented("Not supports pre_key_cache now.")); + } + VLOG(3) << "encoder"; + VLOG(3) << "max_enc_len_this_time_data: " << max_enc_len_this_time_data; + if (qkv_out_scale) { + PADDLE_THROW(phi::errors::Unimplemented("Not supports qkv_out_scale now.")); + } else { + VLOG(1) << "qkv_out_scale is none"; + qkv_buf = qkv; + } + if (qkv_bias) { + PADDLE_THROW(phi::errors::Unimplemented("Not supports qkv_bias now.")); + } + std::vector lods_cpu(bsz + 1, 0); + xpu_wait(xpu_context->xpu_stream); + xpu_memcpy(lods_cpu.data() + 1, + seq_lens_this_time.data(), + sizeof(int32_t) * bsz, + XPUMemcpyKind::XPU_DEVICE_TO_HOST); + for (int i = 1; i < bsz + 1; i++) { + lods_cpu[i] += lods_cpu[i - 1]; + } + using XPUType = typename XPUTypeTrait::Type; + baidu::xpu::api::VectorParam lods = + baidu::xpu::api::VectorParam{lods_cpu.data(), bsz + 1, nullptr} + .to_xpu(RAII_GUARD); + float* p_batch_max_ptrs = RAII_GUARD.alloc_l3_or_gm(bsz); + + if (!rope_emb || !use_neox_style) { + PADDLE_THROW(phi::errors::Unimplemented( + "only supports use_neox_style rope_emb now.")); + } + if (max_enc_len_this_time_data > 0) { + // const int* sequence_lengths_data = seq_lens_encoder.data(); + xpu::VectorParam pos_emb_offset = + xpu::VectorParam{nullptr, 0, nullptr}; + qkv_split_rope_kernel(dev_ctx, + qkv, + rope_emb.get(), + seq_lens_encoder, + lods, + pos_emb_offset, + bsz, + rope_emb.get().dims()[2], + token_num, + num_head, + dim_head, + &unpadding_q, + &unpadding_k, + &unpadding_v); + + VLOG(3) << "rope end"; + VLOG(3) << "causual: " << causual; + if (!use_pre_cache) { + phi::FlashAttnUnpaddedKernel(dev_ctx, + unpadding_q, + unpadding_k, + unpadding_v, + cu_seqlens_q, + cu_seqlens_k, + paddle::none /*fixed_seed_offset*/, + causual ? paddle::none : mask, + max_enc_len_this_time_data, + max_enc_len_this_time_data, + 1.0f / sqrt(static_cast(dim_head)), + 0.0, + causual, + false, + true /* is_test*/, + "" /*rng_name*/, + &fmha_buf, + &softmax_out, + &softmax_lse, + &seed_offset); + } else { + PADDLE_THROW( + phi::errors::Unimplemented("Not supports use_pre_cache now.")); + } + VLOG(3) << "flash end"; + if (cache_k_quant_scales && dynamic_cachekv_quant) { + PADDLE_THROW(phi::errors::Unimplemented("Not supports quant now.")); + } else { + std::vector start_token_ctx(bsz, 0); + xpu::VectorParam start_token_ctx_VP = + xpu::VectorParam{ + start_token_ctx.data(), + static_cast(start_token_ctx.size()), + nullptr} + .to_xpu(RAII_GUARD); + + std::vector ordered_index_ctx(bsz, 0); + std::iota(ordered_index_ctx.begin(), ordered_index_ctx.end(), 0); + xpu::VectorParam ordered_index_ctx_VP = + xpu::VectorParam{ + ordered_index_ctx.data(), static_cast(bsz), nullptr} + .to_xpu(RAII_GUARD); + int ret = xpu::reshape_cached_kv( + xpu_context, + reinterpret_cast(unpadding_k.data()), + reinterpret_cast(const_cast(key_cache.data())), + block_tables.data(), + lods, + start_token_ctx_VP, + ordered_index_ctx_VP, + bsz, + num_head, + dim_head, + bsz, + block_size, + max_block_per_seq, + "BLHD", + "HLD"); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reshape_cached_kv"); + ret = xpu::batch_findmax( + xpu_context, + reinterpret_cast(const_cast(key_cache.data())), + token_num, + num_head * dim_head, + bsz, + lods.xpu, + p_batch_max_ptrs); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "batch_findmax"); + ret = xpu::copy2d( + xpu_context, + p_batch_max_ptrs, + const_cast(cache_k_per_batch_maxs.data()), + bsz, + 1, + MAXPTR_N, + 1); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy2d"); + ret = xpu::reshape_cached_kv( + xpu_context, + reinterpret_cast(unpadding_v.data()), + reinterpret_cast(const_cast(value_cache.data())), + block_tables.data(), + lods, + start_token_ctx_VP, + ordered_index_ctx_VP, + bsz, + num_head, + dim_head, + bsz, + block_size, + max_block_per_seq, + "BLHD", + "HLD"); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reshape_cached_kv"); + ret = xpu::batch_findmax( + xpu_context, + reinterpret_cast(const_cast(value_cache.data())), + token_num, + num_head * dim_head, + bsz, + lods.xpu, + p_batch_max_ptrs); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "batch_findmax"); + ret = xpu::copy2d( + xpu_context, + p_batch_max_ptrs, + const_cast(cache_v_per_batch_maxs.data()), + bsz, + 1, + MAXPTR_N, + 1); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy2d"); + } + VLOG(3) << "cache end"; + } + VLOG(3) << "encoder done"; + VLOG(3) << "max_dec_len_this_time_data: " << max_dec_len_this_time_data; + + if (max_dec_len_this_time_data > 0) { + int cachekv_quant_mode = 0; + if (cache_k_quant_scales || cachekv_quant_mode) { + PADDLE_THROW(phi::errors::Unimplemented( + "Not supports cache_k_quant_scales or cachekv_quant_mode now.")); + } + std::vector lods_decoder_cpu(bsz + 1, 0); + xpu_wait(xpu_context->xpu_stream); + xpu_memcpy(lods_decoder_cpu.data() + 1, + seq_lens_decoder.data(), + sizeof(int32_t) * bsz, + XPUMemcpyKind::XPU_DEVICE_TO_HOST); + for (int i = 1; i < bsz + 1; i++) { + lods_decoder_cpu[i] += lods_decoder_cpu[i - 1]; + } + std::vector kv_seq_lod_dec(bsz + 1, 0); + std::iota(kv_seq_lod_dec.begin(), kv_seq_lod_dec.end(), 0); + xpu::VectorParam kv_seq_lod_dec_VP = + xpu::VectorParam{kv_seq_lod_dec.data(), + static_cast(kv_seq_lod_dec.size()), + nullptr} + .to_xpu(RAII_GUARD); + std::vector start_token_ctx(bsz, 0); + for (int i = 0; i < bsz; i++) { + start_token_ctx[i] = lods_decoder_cpu[i + 1] - lods_decoder_cpu[i]; + } + xpu::VectorParam start_token_ctx_VP = + xpu::VectorParam{start_token_ctx.data(), + static_cast(start_token_ctx.size()), + nullptr} + .to_xpu(RAII_GUARD); + qkv_split_rope_kernel(dev_ctx, + qkv, + rope_emb.get(), + seq_lens_encoder, + lods, + start_token_ctx_VP, + bsz, + rope_emb.get().dims()[2], + token_num, + num_head, + dim_head, + &unpadding_q, + &unpadding_k, + &unpadding_v); + + std::vector ordered_index_ctx(bsz, 0); + std::iota(ordered_index_ctx.begin(), ordered_index_ctx.end(), 0); + xpu::VectorParam ordered_index_ctx_VP = + xpu::VectorParam{ + ordered_index_ctx.data(), static_cast(bsz), nullptr} + .to_xpu(RAII_GUARD); + + float* p_batch_max_ptrs_fill = + RAII_GUARD.alloc_l3_or_gm(bsz * MAXPTR_N); + int ret = xpu::constant( + xpu_context, p_batch_max_ptrs_fill, bsz * MAXPTR_N, 0.0); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "constant"); + float* p_cache_k_max_data = RAII_GUARD.alloc_l3_or_gm(MAXPTR_N); + float* p_cache_v_max_data = RAII_GUARD.alloc_l3_or_gm(MAXPTR_N); + ret = xpu::reshape_cached_kv( + xpu_context, + reinterpret_cast(unpadding_k.data()), + reinterpret_cast(const_cast(key_cache.data())), + block_tables.data(), + kv_seq_lod_dec_VP, + start_token_ctx_VP, + ordered_index_ctx_VP, + bsz, + num_head, + dim_head, + bsz, + block_size, + max_block_per_seq, + "BLHD", + "HLD"); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reshape_cached_kv"); + ret = xpu::batch_findmax( + xpu_context, + reinterpret_cast(unpadding_k.data()), + bsz, + num_head * dim_head, + p_batch_max_ptrs); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "batch_findmax"); + unpadding_k.clear(); + ret = xpu::copy2d(xpu_context, + p_batch_max_ptrs, + p_batch_max_ptrs_fill, + bsz, + 1, + MAXPTR_N, + 1); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy2d"); + ret = xpu::max( + xpu_context, + cache_k_per_batch_maxs.data(), + p_batch_max_ptrs_fill, + const_cast(cache_k_per_batch_maxs.data()), + bsz * MAXPTR_N); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "max"); + ret = xpu::findmax( + xpu_context, + const_cast(cache_k_per_batch_maxs.data()), + p_cache_k_max_data, + bsz * MAXPTR_N); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "findmax"); + ret = xpu::reshape_cached_kv( + xpu_context, + reinterpret_cast(unpadding_v.data()), + reinterpret_cast(const_cast(value_cache.data())), + block_tables.data(), + kv_seq_lod_dec_VP, + start_token_ctx_VP, + ordered_index_ctx_VP, + bsz, + num_head, + dim_head, + bsz, + block_size, + max_block_per_seq, + "BLHD", + "HLD"); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "reshape_cached_kv"); + ret = xpu::batch_findmax( + xpu_context, + reinterpret_cast(unpadding_v.data()), + bsz, + num_head * dim_head, + p_batch_max_ptrs); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "batch_findmax"); + unpadding_v.clear(); + ret = xpu::copy2d(xpu_context, + p_batch_max_ptrs, + p_batch_max_ptrs_fill, + bsz, + 1, + MAXPTR_N, + 1); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy2d"); + ret = xpu::max( + xpu_context, + cache_v_per_batch_maxs.data(), + p_batch_max_ptrs_fill, + const_cast(cache_v_per_batch_maxs.data()), + bsz * MAXPTR_N); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "max"); + ret = xpu::findmax( + xpu_context, + const_cast(cache_v_per_batch_maxs.data()), + p_cache_v_max_data, + bsz * MAXPTR_N); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "findmax"); + + VLOG(1) << "cachekv_quant_mode " << cachekv_quant_mode; + std::vector qkvlod_dec(2 * (bsz + 1), 0); + for (int bs = 0; bs < bsz; bs++) { + qkvlod_dec[bs + 1] = bs + 1; + qkvlod_dec[bsz + 1 + bs + 1] = lods_decoder_cpu[bs + 1] + bs + 1; + } + auto qkvlod_dec_vp = + xpu::VectorParam{ + qkvlod_dec.data(), static_cast(qkvlod_dec.size()), nullptr} + .to_xpu(RAII_GUARD); + xpu::DecodeAttnParam decoder_attn_vsl_param( + qkvlod_dec_vp, max_seq_len, num_head, dim_head, -1, 0, bsz, {}); + xpu::PageAttnParam page_param( + block_size, bsz, max_block_per_seq, ordered_index_ctx_VP, 0, "HLD"); + float* max_q_ptr = RAII_GUARD.alloc_l3_or_gm(MAXPTR_N); + ret = xpu::findmax(xpu_context, + reinterpret_cast(unpadding_q.data()), + max_q_ptr, + token_num * num_head * dim_head); + + ret = xpu::qkv_paged_attention( + xpu_context, + reinterpret_cast(unpadding_q.data()), + reinterpret_cast(const_cast(key_cache.data())), + reinterpret_cast(const_cast(value_cache.data())), + block_tables.data(), // [pagep.max_batch_size, + // pagep.max_num_blocks_per_seq] + reinterpret_cast(fmha_buf.data()), + max_q_ptr, + p_cache_k_max_data, // shape=[6], nullptr if pagep.quant_type == 1 + p_cache_v_max_data, // shape=[6], nullptr if pagep.quant_type == 1 + nullptr, + decoder_attn_vsl_param, // attention 相关参数 + page_param); // page attention 相关参数 + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "qkv_paged_attention"); + } + VLOG(3) << "decoder done"; +} +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(block_multihead_attention_xpu, + XPU, + ALL_LAYOUT, + phi::fusion::BlockMultiheadAttentionXPUKernel, + phi::dtype::float16) { + kernel->InputAt(26).SetBackend(phi::Backend::CPU); + kernel->InputAt(27).SetBackend(phi::Backend::CPU); +} diff --git a/paddle/phi/kernels/fusion/xpu/fused_layernorm_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_layernorm_kernel.cc index 833caa6688787..cac0182feaa2b 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_layernorm_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_layernorm_kernel.cc @@ -63,6 +63,11 @@ void FusedLayerNormKernel(const Context& dev_ctx, dev_ctx.template Alloc(&residual_alpha_tmp); dev_ctx.template Alloc(&residual_alpha_ptr); + r = baidu::xpu::api::constant(xpu_ctx->x_context(), + reinterpret_cast(out->data()), + out->numel(), + static_cast(0.f)); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); r = baidu::xpu::api::constant(xpu_ctx->x_context(), residual_alpha_tmp.data(), diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index 594eefe5b8de1..ecfd46852c134 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -209,9 +209,6 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, CudaSwishGradFunctor); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, CudaLeakyReluGradFunctor, alpha); -DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, - CudaThresholdedReluGradFunctor, - threshold); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, CudaSoftShrinkGradFunctor, lambda); @@ -247,7 +244,10 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, CudaHardSigmoidGradFunctor, slope, offset); - +DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu, + CudaThresholdedReluGradFunctor, + threshold, + value); template void SiluGradKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index 1bf3d92d80620..aa874c5e0dd81 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -123,9 +123,6 @@ DEFINE_GPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Expm1, CudaExpm1Functor) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LogitCUDA, CudaLogitFunctor, eps) -DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, - CudaThresholdedReluFunctor, - threshold) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, CudaHardShrinkFunctor, threshold) @@ -148,6 +145,10 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset) DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Selu, CudaSeluFunctor, scale, alpha) +DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu, + CudaThresholdedReluFunctor, + threshold, + value) template void HardSwishKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu index 564090490f847..3a020e4359d9d 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu @@ -60,7 +60,7 @@ void FillDiagonalGradKernel(const Context& ctx, auto strides = funcs::CalStride(out_dims); auto wrapsize = std::min(size, out_dims[1] * out_dims[1]); - // The wrap mode supported only the dims equels to 2; In wrap mode, the + // The wrap mode supported only the dims equals to 2; In wrap mode, the // value will be filled in cycles if (wrap) { wrapsize = size; diff --git a/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu index aed2380628ceb..8a06bd33fa4f3 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu @@ -61,7 +61,7 @@ void FillDiagonalKernel(const Context& ctx, auto out_dims = out->dims(); auto strides = funcs::CalStride(out_dims); - // The wrap mode supported only the dims equels to 2; In wrap mode, the + // The wrap mode supported only the dims equals to 2; In wrap mode, the // value will be filled in cycles if (!wrap) { size = std::min(size, out_dims[1] * out_dims[1]); diff --git a/paddle/phi/kernels/gpu/group_norm_kernel.cu b/paddle/phi/kernels/gpu/group_norm_kernel.cu index 4835b643efcc7..720447ea41a0e 100644 --- a/paddle/phi/kernels/gpu/group_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/group_norm_kernel.cu @@ -123,6 +123,17 @@ inline __device__ void UpdateSum(const T* srcX, float* sum, float* sumSq) { *sumSq += src_data * src_data; } +template +inline __device__ void UpdateSum(const T* srcX, + const T* srcR, + float* sum, + float* sumSq) { + float src_data = phi::__2float(*srcX); + float srcy_data = phi::__2float(*srcR); + *sum += src_data + srcy_data; + *sumSq += (src_data + srcy_data) * (src_data + srcy_data); +} + template <> inline __device__ void UpdateSum<__half, 2>(const __half* srcX, float* sum, @@ -133,6 +144,20 @@ inline __device__ void UpdateSum<__half, 2>(const __half* srcX, *sumSq += f2.x * f2.x + f2.y * f2.y; } +template <> +inline __device__ void UpdateSum<__half, 2>(const __half* srcX, + const __half* srcR, + float* sum, + float* sumSq) { + __half2 h2 = *reinterpret_cast<__half2 const*>(srcX); + __half2 h2_r = *reinterpret_cast<__half2 const*>(srcR); + float2 f2 = __half22float2(h2); + float2 f2_r = __half22float2(h2_r); + *sum += f2.x + f2_r.x + f2.y + f2_r.y; + *sumSq += + (f2.x + f2_r.x) * (f2.x + f2_r.x) + (f2.y + f2_r.y) * (f2.y + f2_r.y); +} + template <> inline __device__ void UpdateSum( const phi::dtype::float16* srcX, float* sum, float* sumSq) { @@ -142,6 +167,21 @@ inline __device__ void UpdateSum( *sumSq += f2.x * f2.x + f2.y * f2.y; } +template <> +inline __device__ void UpdateSum( + const phi::dtype::float16* srcX, + const phi::dtype::float16* srcR, + float* sum, + float* sumSq) { + __half2 h2 = *reinterpret_cast<__half2 const*>(srcX); + __half2 h2_r = *reinterpret_cast<__half2 const*>(srcR); + float2 f2 = __half22float2(h2); + float2 f2_r = __half22float2(h2_r); + *sum += f2.x + f2_r.x + f2.y + f2_r.y; + *sumSq += + (f2.x + f2_r.x) * (f2.x + f2_r.x) + (f2.y + f2_r.y) * (f2.y + f2_r.y); +} + #ifdef PADDLE_CUDA_BF16 template <> inline __device__ void UpdateSum( @@ -151,6 +191,21 @@ inline __device__ void UpdateSum( *sum += f2.x + f2.y; *sumSq += f2.x * f2.x + f2.y * f2.y; } + +template <> +inline __device__ void UpdateSum( + const phi::dtype::bfloat16* srcX, + const phi::dtype::bfloat16* srcR, + float* sum, + float* sumSq) { + __nv_bfloat162 h2 = *reinterpret_cast<__nv_bfloat162 const*>(srcX); + __nv_bfloat162 h2_r = *reinterpret_cast<__nv_bfloat162 const*>(srcR); + float2 f2 = phi::bfloat1622float2(h2); + float2 f2_r = phi::bfloat1622float2(h2_r); + *sum += f2.x + f2_r.x + f2.y + f2_r.y; + *sumSq += + (f2.x + f2_r.x) * (f2.x + f2_r.x) + (f2.y + f2_r.y) * (f2.y + f2_r.y); +} #endif template @@ -177,7 +232,13 @@ __global__ void groupNormNDHWCSumSingerChannelKernel( int64_t offset = static_cast(ni) * params.dhwc + static_cast(dhwi) * params.c + ci; float src_data = *reinterpret_cast(¶ms.srcX[offset]); - UpdateSum(¶ms.srcX[offset], &sum, &sumSq); + if (params.srcR != nullptr) { + int64_t g_offset = params.y_same_with_x ? offset : ci; + UpdateSum( + ¶ms.srcX[offset], ¶ms.srcR[g_offset], &sum, &sumSq); + } else { + UpdateSum(¶ms.srcX[offset], &sum, &sumSq); + } } smem[threadIdx.x] = make_float2(sum, sumSq); @@ -185,7 +246,6 @@ __global__ void groupNormNDHWCSumSingerChannelKernel( __syncthreads(); float2 sums = smem[threadIdx.x]; - atomicAdd(¶ms.redBuffer[(2 * ni + 0) * params.groups + ci], sums.x * params.invDHWC); atomicAdd(¶ms.redBuffer[(2 * ni + 1) * params.groups + ci], sums.y); @@ -209,7 +269,8 @@ __global__ void groupNormNDHWCSumKernel(const GroupNormNDHWCParams params) { if (ci >= params.c || threadIdx.x * THREADS_PER_CHANNEL >= params.cPerBlock) { return; } - // The first activation loaded by that block. + int32_t gj = ci / params.cPerGroup; + int32_t cj = ci % params.cPerGroup; int32_t dhwBegin = blockIdx.y * params.dhwPerBlock; // The last activation loaded by that block. int32_t dhwEnd = min(dhwBegin + params.dhwPerBlock, params.dhw); @@ -223,13 +284,19 @@ __global__ void groupNormNDHWCSumKernel(const GroupNormNDHWCParams params) { int64_t offset = static_cast(ni) * params.dhwc + static_cast(dhwi) * params.c + ci; float src_data = *reinterpret_cast(¶ms.srcX[offset]); - UpdateSum(¶ms.srcX[offset], &sum, &sumSq); + if (params.srcR != nullptr) { + int64_t g_offset = + params.y_same_with_x ? offset : gj * params.cPerGroup + cj; + UpdateSum( + ¶ms.srcX[offset], ¶ms.srcR[g_offset], &sum, &sumSq); + } else { + UpdateSum(¶ms.srcX[offset], &sum, &sumSq); + } } // The group that thread works on and the channel in the group (modulus). int32_t gi = ci / params.cPerGroup - blockIdx.x * params.cPerBlock / params.cPerGroup; - int32_t cj = ci % params.cPerGroup; int flag = (cj == 0 || threadIdx.x == 0) ? 1 : 0; GroupSums inp{flag, sum, sumSq}; GroupSums out; @@ -243,7 +310,6 @@ __global__ void groupNormNDHWCSumKernel(const GroupNormNDHWCParams params) { __syncthreads(); - int32_t gj = ci / params.cPerGroup; if (cj == params.cPerGroup - THREADS_PER_CHANNEL || threadIdx.x * THREADS_PER_CHANNEL == params.cPerBlock - THREADS_PER_CHANNEL) { @@ -351,7 +417,15 @@ inline __device__ void GroupNormCompute(int32_t dhwBegin, for (int32_t dhwi = dhwBegin; dhwi < dhwEnd; ++dhwi) { // The src/dst offset. int64_t offset = (int64_t)blockIdx.z * params.dhwc + dhwi * params.c + ci; - const float src_data = phi::__2float(params.srcX[offset]); + float src_data = phi::__2float(params.srcX[offset]); + if (params.srcR != nullptr) { + auto gi = ci / params.cPerGroup; + auto gj = ci % params.cPerGroup; + int64_t g_offset = + params.y_same_with_x ? offset : gi * params.cPerGroup + gj; + src_data += phi::__2float(params.srcR[g_offset]); + *reinterpret_cast(¶ms.eleOut[offset]) = phi::__2dst(src_data); + } // Normalize the channels. float dst_data = (src_data - mean) * invStdDev; // Scale by gamma and add beta. @@ -392,6 +466,18 @@ inline __device__ void GroupNormCompute( // Extract the two half values. float2 f2 = __half22float2(h2); + if (params.srcR != nullptr) { + auto gi = ci / params.cPerGroup; + auto gj = ci % params.cPerGroup; + int64_t g_offset = + params.y_same_with_x ? offset : gi * params.cPerGroup + gj; + __half2 r2 = *reinterpret_cast<__half2 const*>(¶ms.srcR[g_offset]); + float2 r_f2 = __half22float2(r2); + f2.x += r_f2.x; + f2.y += r_f2.y; + *reinterpret_cast<__half2*>(¶ms.eleOut[offset]) = + __float22half2_rn(f2); + } // Normalize the channels. f2.x = (f2.x - mean) * invStdDev; f2.y = (f2.y - mean) * invStdDev; @@ -434,7 +520,18 @@ inline __device__ void GroupNormCompute<__half, 2>( // Extract the two half values. float2 f2 = __half22float2(h2); - + if (params.srcR != nullptr) { + auto gi = ci / params.cPerGroup; + auto gj = ci % params.cPerGroup; + int64_t g_offset = + params.y_same_with_x ? offset : gi * params.cPerGroup + gj; + __half2 r2 = *reinterpret_cast<__half2 const*>(¶ms.srcR[g_offset]); + float2 r_f2 = __half22float2(r2); + f2.x += r_f2.x; + f2.y += r_f2.y; + *reinterpret_cast<__half2*>(¶ms.eleOut[offset]) = + __float22half2_rn(f2); + } // Normalize the channels. f2.x = (f2.x - mean) * invStdDev; f2.y = (f2.y - mean) * invStdDev; @@ -480,6 +577,19 @@ inline __device__ void GroupNormCompute( // Extract the two half values. float2 f2 = phi::bfloat1622float2(h2); + if (params.srcR != nullptr) { + auto gi = ci / params.cPerGroup; + auto gj = ci % params.cPerGroup; + int64_t g_offset = + params.y_same_with_x ? offset : gi * params.cPerGroup + gj; + __nv_bfloat162 r2 = + *reinterpret_cast<__nv_bfloat162 const*>(¶ms.srcR[g_offset]); + float2 r_f2 = phi::bfloat1622float2(r2); + f2.x += r_f2.x; + f2.y += r_f2.y; + *reinterpret_cast<__nv_bfloat162*>(¶ms.eleOut[offset]) = + phi::float22bfloat162_rn(f2); + } // Normalize the channels. f2.x = (f2.x - mean) * invStdDev; f2.y = (f2.y - mean) * invStdDev; @@ -511,6 +621,7 @@ __global__ void groupNormNDHWCScaleKernel( // The group that thread works on and the channel in the group (modulus). int32_t gi = ci / params.cPerGroup; + int32_t gj = ci % params.cPerGroup; if (ci >= params.c || gi >= params.groups) { return; @@ -597,17 +708,24 @@ template class groupNormNDHWCScale; template void GroupNormNDHWCKernel(const Context& dev_ctx, const DenseTensor& x, + const paddle::optional& residual, const paddle::optional& scale, const paddle::optional& bias, float epsilon, int groups, const std::string& data_layout_str, + const std::string& activation, DenseTensor* y, + DenseTensor* residual_out, DenseTensor* mean, DenseTensor* var) { + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); + if (data_layout != DataLayout::kNHWC) { + PD_THROW("data_layout only supports NHWC and NDHWC"); + } using AccT = typename phi::dtype::MPTypeTrait::Type; GroupNormNDHWCParams params_; - params_.withSilu = false; + params_.withSilu = activation == "silu" ? true : false; const auto x_dims = x.dims(); dev_ctx.template Alloc(y); @@ -639,6 +757,23 @@ void GroupNormNDHWCKernel(const Context& dev_ctx, params_.w = x_dims[3]; } + const T* residual_data = nullptr; + const auto residual_ptr = residual.get_ptr(); + T* residual_out_data = nullptr; + if (residual_ptr) { + dev_ctx.template Alloc(residual_out); + residual_data = residual_ptr->data(); + residual_out_data = residual_out->data(); + const auto r_dims = residual_ptr->dims(); + int32_t r_dim = 1; + for (size_t i = 0; i < r_dims.size(); i++) { + r_dim *= r_dims[i]; + } + params_.y_same_with_x = + r_dim == params_.n * params_.c * params_.d * params_.h * params_.w + ? true + : false; + } dev_ctx.template Alloc(mean); dev_ctx.template Alloc(var); auto* mean_data = mean->data(); @@ -673,7 +808,10 @@ void GroupNormNDHWCKernel(const Context& dev_ctx, } params_.srcX = reinterpret_cast(x_data); params_.dst = reinterpret_cast(y_data); - + if (residual_ptr) { + params_.srcR = reinterpret_cast(residual_data); + params_.eleOut = reinterpret_cast(residual_out_data); + } params_.gamma = scale_data; params_.beta = bias_data; params_.dhw = params_.d * params_.h * params_.w; @@ -1027,14 +1165,19 @@ void GroupNormKernel(const Context& dev_ctx, DenseTensor* var) { using std::is_same; if (is_same::value && data_layout_str == "NHWC") { + const paddle::optional& residual = + paddle::optional(paddle::none); GroupNormNDHWCKernel(dev_ctx, x, + residual, scale, bias, epsilon, groups, data_layout_str, + "", y, + new DenseTensor(), mean, var); return; @@ -1042,14 +1185,19 @@ void GroupNormKernel(const Context& dev_ctx, #ifdef PADDLE_CUDA_BF16 if (is_same::value && data_layout_str == "NHWC") { + const paddle::optional& residual = + paddle::optional(paddle::none); GroupNormNDHWCKernel(dev_ctx, x, + residual, scale, bias, epsilon, groups, data_layout_str, + "", y, + new DenseTensor(), mean, var); return; @@ -1076,3 +1224,13 @@ PD_REGISTER_KERNEL(group_norm, kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); } } + +PD_REGISTER_KERNEL(add_group_norm_silu, + GPU, + ALL_LAYOUT, + phi::GroupNormNDHWCKernel, + phi::dtype::bfloat16, + phi::dtype::float16) { + kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(3).SetDataType(phi::DataType::FLOAT32); +} diff --git a/paddle/phi/kernels/gpu/inverse_grad_kernel.cu b/paddle/phi/kernels/gpu/inverse_grad_kernel.cu index 2fdc02934fedc..15c24719adfc3 100644 --- a/paddle/phi/kernels/gpu/inverse_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/inverse_grad_kernel.cu @@ -18,5 +18,11 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/inverse_grad_kernel_impl.h" -PD_REGISTER_KERNEL( - inverse_grad, GPU, ALL_LAYOUT, phi::InverseGradKernel, float, double) {} +PD_REGISTER_KERNEL(inverse_grad, + GPU, + ALL_LAYOUT, + phi::InverseGradKernel, + float, + double, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/inverse_kernel.cu b/paddle/phi/kernels/gpu/inverse_kernel.cu index 4c011337c6f8f..a9b4fcc763b0b 100644 --- a/paddle/phi/kernels/gpu/inverse_kernel.cu +++ b/paddle/phi/kernels/gpu/inverse_kernel.cu @@ -18,5 +18,11 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/inverse_kernel_impl.h" -PD_REGISTER_KERNEL( - inverse, GPU, ALL_LAYOUT, phi::InverseKernel, float, double) {} +PD_REGISTER_KERNEL(inverse, + GPU, + ALL_LAYOUT, + phi::InverseKernel, + float, + double, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/fluid/operators/ops_signature/channel_shuffle_sig.cc b/paddle/phi/kernels/gpu/lstm_grad_kernel.cu similarity index 50% rename from paddle/fluid/operators/ops_signature/channel_shuffle_sig.cc rename to paddle/phi/kernels/gpu/lstm_grad_kernel.cu index d3bf58bdec3c8..5590541dcb385 100644 --- a/paddle/fluid/operators/ops_signature/channel_shuffle_sig.cc +++ b/paddle/phi/kernels/gpu/lstm_grad_kernel.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,19 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/core/compat/op_utils.h" +#include "paddle/phi/kernels/impl/lstm_kernel_impl.h" +#include "paddle/phi/kernels/lstm_kernel.h" -namespace phi { - -KernelSignature ChannelShuffleGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("channel_shuffle_grad", - {"Out@GRAD"}, - {"groups", "data_format"}, - {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(channel_shuffle_grad, - phi::ChannelShuffleGradOpArgumentMapping); +PD_REGISTER_KERNEL( + lstm_grad, GPU, ALL_LAYOUT, phi::LSTMGradKernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/lstm_kernel.cu b/paddle/phi/kernels/gpu/lstm_kernel.cu new file mode 100644 index 0000000000000..7bcf1f78ab604 --- /dev/null +++ b/paddle/phi/kernels/gpu/lstm_kernel.cu @@ -0,0 +1,18 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/lstm_kernel.h" +#include "paddle/phi/kernels/impl/lstm_kernel_impl.h" + +PD_REGISTER_KERNEL(lstm, GPU, ALL_LAYOUT, phi::LSTMKernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc index 2dd9e7dc6ceec..3244f28c77700 100644 --- a/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc @@ -27,4 +27,6 @@ PD_REGISTER_KERNEL(meshgrid_grad, double, int, int64_t, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc b/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc index 5a1c74f4193d3..9176305d94fec 100644 --- a/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc @@ -27,4 +27,6 @@ PD_REGISTER_KERNEL(meshgrid, double, int, int64_t, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/pool_grad_kernel.cu b/paddle/phi/kernels/gpu/pool_grad_kernel.cu index 450bfc07a7b46..59afcdfe9884f 100644 --- a/paddle/phi/kernels/gpu/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/pool_grad_kernel.cu @@ -27,6 +27,14 @@ PD_REGISTER_KERNEL(pool2d_grad, double, phi::dtype::float16, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(lp_pool2d_grad, + GPU, + ALL_LAYOUT, + phi::LPPool2dGradKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(pool2d_double_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/gpu/pool_kernel.cu b/paddle/phi/kernels/gpu/pool_kernel.cu index 33abba0a51a50..b9ab97da86fe1 100644 --- a/paddle/phi/kernels/gpu/pool_kernel.cu +++ b/paddle/phi/kernels/gpu/pool_kernel.cu @@ -27,6 +27,14 @@ PD_REGISTER_KERNEL(pool2d, double, phi::dtype::float16, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(lp_pool2d, + GPU, + ALL_LAYOUT, + phi::LPPool2dKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(max_pool2d_with_index, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu index 24e79c77a50e1..4250ffb76dbe3 100644 --- a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu @@ -73,6 +73,7 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, global_pooling, adaptive, padding_algorithm, + 0, dx); return; } diff --git a/paddle/phi/kernels/group_norm_kernel.h b/paddle/phi/kernels/group_norm_kernel.h index 3dc10df6a1109..7f4b83f065bde 100644 --- a/paddle/phi/kernels/group_norm_kernel.h +++ b/paddle/phi/kernels/group_norm_kernel.h @@ -67,6 +67,8 @@ struct GroupNormNDHWCParams { T const* srcX; // The input buffer. Layout NDHWC. T const* srcY; + // The input buffer. Layout NDHWC. + T const* srcR = nullptr; // The gamma scaling factor. void const* gamma; // The beta term to add in GN. @@ -87,7 +89,8 @@ struct GroupNormNDHWCParams { int32_t groups; // Do we apply the Silu activation function? bool withSilu; - + // + bool y_same_with_x = false; // Precomputed values and parameters to control the execution of the kernels. // The number of activations per instance (d * h * w) and the number of diff --git a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h index 72c8bc659a632..debc4ad1b6db6 100644 --- a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h @@ -92,17 +92,17 @@ void FFTC2RGradKernel(const Context& ctx, const int64_t double_length = out_grad.dims()[axes.back()] - x_grad->dims()[axes.back()]; - const phi::DDim strides = common::stride(x_grad->dims()); - -#if defined(__NVCC__) || defined(__HIPCC__) - const thrust::device_vector strides_g(common::vectorize(strides)); - const int64_t* pstrides = thrust::raw_pointer_cast(strides_g.data()); -#else - const int64_t* pstrides = strides.Get(); -#endif - - funcs::FFTFillConjGradFunctor func( - x_grad->data(), axes.back(), pstrides, double_length); + int64_t stride_to_last_axis = 1; + auto ddim = x_grad->dims(); + for (int i = ddim.size() - 2; i >= axes.back(); --i) { + stride_to_last_axis *= ddim[i + 1]; + } + int64_t stride_second_to_last_axis = stride_to_last_axis * ddim[axes.back()]; + funcs::FFTFillConjGradFunctor func(x_grad->data(), + axes.back(), + stride_second_to_last_axis, + stride_to_last_axis, + double_length); size_t limit = x_grad->numel(); funcs::ForRange for_range(ctx, limit); for_range(func); diff --git a/paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h b/paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h new file mode 100644 index 0000000000000..bcaf21cc22df8 --- /dev/null +++ b/paddle/phi/kernels/impl/fused_elemwise_activation_kernel_impl.h @@ -0,0 +1,260 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/kernels/funcs/compound_functors.h" +#include "paddle/phi/kernels/funcs/elementwise/elementwise_op_function.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/funcs/functors.h" +#include "paddle/phi/kernels/funcs/fused_elemwise_activation_functor.h" + +namespace phi { + +template +void FusedElemwiseActivationKernel(const Context &dev_ctx, + const DenseTensor &x, + const DenseTensor &y, + const std::vector &functor_list, + int axis, + float scale, + bool save_intermediate_out, + DenseTensor *out, + DenseTensor *intermediate_out) { + auto &in_x = GET_DATA_SAFELY(&x, "Input", "X", "FusedElemwiseActivation"); + auto &in_y = GET_DATA_SAFELY(&y, "Input", "Y", "FusedElemwiseActivation"); + + PADDLE_ENFORCE_EQ( + out != nullptr, + true, + phi::errors::InvalidArgument("The output(Out) should not be empty")); + auto output = out; + + std::vector outputs; + outputs.emplace_back(output); + + if (save_intermediate_out) { + PADDLE_ENFORCE_EQ(intermediate_out != nullptr, + true, + phi::errors::InvalidArgument( + "The save_intermediate_out is enable, so the " + "IntermediateOut should not be empty.")); + + outputs.emplace_back(intermediate_out); + } else { + outputs.emplace_back(nullptr); + } + + phi::funcs::RunFunctors(dev_ctx, + in_x, + in_y, + &outputs, + functor_list, + scale, + axis, + save_intermediate_out); +} + +template +void FusedElemwiseActivationGradKernel( + const Context &dev_ctx, + const DenseTensor &x, + const DenseTensor &y, + const DenseTensor &out, + const DenseTensor &intermediate_out, + const DenseTensor &out_grad, + const std::vector &functor_list, + int axis, + float scale, + bool save_intermediate_out, + DenseTensor *x_grad, + DenseTensor *y_grad) { + auto *in_y = &y; + PADDLE_ENFORCE_NE( + in_y, + nullptr, + phi::errors::InvalidArgument("Input(Y) should not be nullptr.")); + phi::DenseTensor *in_out = const_cast(&out); + + auto in_out_grad = &out_grad; + PADDLE_ENFORCE_NE( + in_out_grad, + nullptr, + phi::errors::InvalidArgument("Input(Out@Grad) should not be nullptr.")); + + std::vector functor_list_new = functor_list; + size_t sz = functor_list_new[0].size(); + int start = sz < 5 ? 0 : (sz - 5); + if (functor_list_new[0].substr(start, 5) != "_grad") { + functor_list_new[0] += "_grad"; + } + sz = functor_list_new[1].size(); + start = sz < 5 ? 0 : (sz - 5); + if (functor_list_new[1].substr(start, 5) != "_grad") { + functor_list_new[1] += "_grad"; + } + + phi::DenseTensor *in_x = const_cast(&x); + phi::DenseTensor *d_intermediate_out = + nullptr; // intermediate_out_grad is not supported in ops.yaml, so use + // nullptr + + // Get intermediate_out + phi::DenseTensor *in_intermediate_out = nullptr; + if (save_intermediate_out) { + // if save_intermediate_out is true, for Unary(Binary(x, y)) and + // Binary(x, Unary(y)), the Binary(x, y) and Unary(y) not need to + // recompute. + in_intermediate_out = const_cast(&intermediate_out); + PADDLE_ENFORCE_NE(in_intermediate_out, + nullptr, + phi::errors::InvalidArgument( + "The option of 'save_intermediate_out' is opened," + " so the number of 'Out' should be two.")); + } else { + if (!phi::funcs::InputXCanBeAbsent(functor_list_new)) { + PADDLE_ENFORCE_NE( + in_x, + nullptr, + phi::errors::InvalidArgument("Input(X) should not be null.")); + } + } + + // Get in_x + if (x.initialized()) { + PADDLE_ENFORCE_NE( + in_x, + nullptr, + phi::errors::InvalidArgument("Input(X) should not be null.")); + } else { + // If functor_list contains elementwise_add, the backward doesn't use + // in_x, in_y and in_out. + PADDLE_ENFORCE_EQ(phi::funcs::InputXCanBeAbsent(functor_list_new), + true, + phi::errors::InvalidArgument( + "Only when the compoundfunctor contains " + "elementwise_add_grad, the 'X' could be absent.")); + in_x = const_cast(in_out_grad); + } + + // Get in_Out + if (out.initialized()) { + PADDLE_ENFORCE_NE( + in_out, + nullptr, + phi::errors::InvalidArgument("Input(X) should not be null.")); + } else { + // If functor_list contains elementwise_add, the backward doesn't use + // in_x, in_y and in_out. + PADDLE_ENFORCE_EQ(phi::funcs::InputXCanBeAbsent(functor_list_new), + true, + phi::errors::InvalidArgument( + "Only when the compoundfunctor contains " + "elementwise_add_grad, the 'X' could be absent.")); + in_out = const_cast(in_out_grad); + } + + bool has_in_place = phi::funcs::HasInPlaceUnary(functor_list_new); + if (has_in_place) { + phi::funcs::RunGradFunctors( + dev_ctx, + in_x, + in_y, + in_out, + in_intermediate_out, + in_out_grad, + x_grad, + y_grad, + d_intermediate_out, + functor_list_new, + scale, + axis); + } else { + phi::funcs::RunGradFunctors( + dev_ctx, + in_x, + in_y, + in_out, + in_intermediate_out, + in_out_grad, + x_grad, + y_grad, + d_intermediate_out, + functor_list_new, + scale, + axis); + } +} + +template +void FusedElemwiseAddActivationKernel( + const Context &dev_ctx, + const DenseTensor &x, + const DenseTensor &y, + const std::vector &functor_list, + int axis, + float scale, + bool save_intermediate_out, + DenseTensor *out, + DenseTensor *intermediate_out) { + FusedElemwiseActivationKernel(dev_ctx, + x, + y, + functor_list, + axis, + scale, + save_intermediate_out, + out, + intermediate_out); +} + +template +void FusedElemwiseAddActivationGradKernel( + const Context &dev_ctx, + const paddle::optional &x, + const DenseTensor &y, + const DenseTensor &out, + const paddle::optional &intermediate_out, + const DenseTensor &out_grad, + const std::vector &functor_list, + int axis, + float scale, + bool save_intermediate_out, + DenseTensor *x_grad, + DenseTensor *y_grad) { + phi::DenseTensor tmp_x; + phi::DenseTensor tmp_i; + if (x) { + tmp_x = x.get(); + } + if (intermediate_out) { + tmp_i = intermediate_out.get(); + } + FusedElemwiseActivationGradKernel(dev_ctx, + tmp_x, + y, + out, + tmp_i, + out_grad, + functor_list, + axis, + scale, + save_intermediate_out, + x_grad, + y_grad); +} + +} // namespace phi diff --git a/paddle/phi/kernels/impl/inverse_grad_kernel_impl.h b/paddle/phi/kernels/impl/inverse_grad_kernel_impl.h index 26e2898bf73ff..aa23bddb5b979 100644 --- a/paddle/phi/kernels/impl/inverse_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/inverse_grad_kernel_impl.h @@ -18,6 +18,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/matrix_inverse.h" @@ -37,15 +38,35 @@ void InverseGradKernel(const Context& dev_ctx, tmp_out.Resize(out.dims()); dev_ctx.template Alloc(&tmp_out); - auto mat_dim_a0 = - phi::funcs::CreateMatrixDescriptor(out_grad.dims(), 0, false); - auto mat_dim_b0 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); - blas.MatMul(out_grad, mat_dim_a0, out, mat_dim_b0, T(1), &tmp_out, T(0)); + if (IsComplexType(out.dtype())) { + DenseTensor out_conj; + out_conj.Resize(out.dims()); + dev_ctx.template Alloc(&out_conj); - auto mat_dim_a1 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); - auto mat_dim_b1 = - phi::funcs::CreateMatrixDescriptor(tmp_out.dims(), 0, false); - blas.MatMul(out, mat_dim_a1, tmp_out, mat_dim_b1, T(-1), in_grad, T(0)); + phi::ConjKernel(dev_ctx, out, &out_conj); + + auto mat_dim_a0 = + phi::funcs::CreateMatrixDescriptor(out_grad.dims(), 0, false); + auto mat_dim_b0 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); + blas.MatMul( + out_grad, mat_dim_a0, out_conj, mat_dim_b0, T(1), &tmp_out, T(0)); + + auto mat_dim_a1 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); + auto mat_dim_b1 = + phi::funcs::CreateMatrixDescriptor(tmp_out.dims(), 0, false); + blas.MatMul( + out_conj, mat_dim_a1, tmp_out, mat_dim_b1, T(-1), in_grad, T(0)); + } else { + auto mat_dim_a0 = + phi::funcs::CreateMatrixDescriptor(out_grad.dims(), 0, false); + auto mat_dim_b0 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); + blas.MatMul(out_grad, mat_dim_a0, out, mat_dim_b0, T(1), &tmp_out, T(0)); + + auto mat_dim_a1 = phi::funcs::CreateMatrixDescriptor(out.dims(), 0, true); + auto mat_dim_b1 = + phi::funcs::CreateMatrixDescriptor(tmp_out.dims(), 0, false); + blas.MatMul(out, mat_dim_a1, tmp_out, mat_dim_b1, T(-1), in_grad, T(0)); + } } } diff --git a/paddle/phi/kernels/impl/lstm_kernel_impl.h b/paddle/phi/kernels/impl/lstm_kernel_impl.h new file mode 100644 index 0000000000000..1f4b4dcac0f14 --- /dev/null +++ b/paddle/phi/kernels/impl/lstm_kernel_impl.h @@ -0,0 +1,443 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/kernels/funcs/detail/activation_functions.h" +#include "paddle/phi/kernels/funcs/lstm_compute.h" +#include "paddle/phi/kernels/funcs/lstm_utils.h" + +namespace phi { + +template +void LSTMKernel(const Context& dev_ctx, + const DenseTensor& input, + const paddle::optional& h0, + const paddle::optional& c0, + const DenseTensor& weight, + const DenseTensor& bias, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + DenseTensor* hidden, + DenseTensor* cell, + DenseTensor* batch_gate, + DenseTensor* batch_cell_pre_act) { + auto* hidden_t0 = h0.get_ptr(); + auto* cell_t0 = c0.get_ptr(); + + phi::DenseTensor* batch_gate_new = nullptr; + phi::DenseTensor batch_gate_temp; + if (is_test) { + batch_gate_new = &batch_gate_temp; + batch_gate_new->Resize(input.dims()); + } else { + batch_gate_new = batch_gate; + } + + dev_ctx.template Alloc(batch_gate_new); + dev_ctx.template Alloc(hidden); + dev_ctx.template Alloc(cell); + + phi::funcs::LoDTensor2BatchFunctor to_batch; + to_batch(dev_ctx, input, batch_gate_new, true, is_reverse); + + auto in_dims = input.dims(); + int frame_size = static_cast(in_dims[1] / 4); + phi::DDim dims({in_dims[0], frame_size}); + + if (bias.initialized()) { + phi::DenseTensor b = bias; + b.Resize({bias.numel(), 1}); + phi::DenseTensor gate_bias = b.Slice(0, 4 * frame_size); + phi::funcs::RowwiseAdd add_bias; + add_bias(dev_ctx, *batch_gate_new, gate_bias, batch_gate_new); + } + + phi::funcs::LstmMetaValue lstm_value; + if (bias.initialized() && use_peepholes) { + T* bias_data = const_cast(bias.data()); + // the code style in LstmMetaValue will be updated later. + + lstm_value.check_ig = bias_data + 4 * frame_size; + lstm_value.check_fg = lstm_value.check_ig + frame_size; + lstm_value.check_og = lstm_value.check_fg + frame_size; + } else { + lstm_value.check_ig = nullptr; + lstm_value.check_fg = nullptr; + lstm_value.check_og = nullptr; + } + lstm_value.prev_state_value = nullptr; + phi::DenseTensor ordered_c0; + + phi::Vector order(batch_gate_new->lod()[2]); + + if (cell_t0) { + // Since the batch computing for LSTM reorders the input sequence + // according to their length. The initialized cell state also needs + // to reorder. + ReorderInitState(dev_ctx, *cell_t0, order, &ordered_c0, true); + lstm_value.prev_state_value = ordered_c0.data(); + } + + // Use the local variable as here. + phi::DenseTensor batch_hidden, batch_cell, batch_cell_pre_act_temp; + phi::DenseTensor* batch_cell_pre_act_p; + if (is_test) { + batch_cell_pre_act_p = &batch_cell_pre_act_temp; + } else { + batch_cell_pre_act_p = batch_cell_pre_act; + } + batch_hidden.Resize(dims); + batch_cell.Resize(dims); + dev_ctx.template Alloc(&batch_hidden); + dev_ctx.template Alloc(&batch_cell); + batch_cell_pre_act_p->Resize(dims); + dev_ctx.template Alloc(batch_cell_pre_act_p); + + auto batch_starts = batch_gate_new->lod()[0]; + size_t num_batch = batch_starts.size() - 1; + auto gate_act = phi::funcs::detail::GetActivationType(gate_activation); + auto cell_act = phi::funcs::detail::GetActivationType(cell_activation); + auto cand_act = phi::funcs::detail::GetActivationType(candidate_activation); + + auto blas = phi::funcs::GetBlas(dev_ctx); + for (size_t n = 0; n < num_batch; n++) { + int bstart = static_cast(batch_starts[n]); + int bend = static_cast(batch_starts[n + 1]); + + phi::DenseTensor gate_t = batch_gate_new->Slice(bstart, bend); + phi::DenseTensor out_t = batch_hidden.Slice(bstart, bend); + phi::DenseTensor cell_t = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act_t = batch_cell_pre_act_p->Slice(bstart, bend); + + int cur_batch_size = bend - bstart; + + if (n > 0) { + int pre_h_start = static_cast(batch_starts[n - 1]); + int pre_h_end = pre_h_start + cur_batch_size; + auto pre_hidden_t = batch_hidden.Slice(pre_h_start, pre_h_end); + blas.MatMul(pre_hidden_t, + false, + weight, + false, + static_cast(1.0), + &gate_t, + static_cast(1.0)); + } else if (hidden_t0 != nullptr) { + // If n == 0 and there is no initialized hidden state, that is to say + // the H0 is zeros, the calculation W_h * H0 will be skiped. + // If n == 0 and there is initialized hidden state, calculate W_h * H0. + + // Since the batch computing for LSTM reorders the input sequence + // according to their length. The initialized hidden state also needs + // to reorder. + phi::DenseTensor ordered_h0; + ReorderInitState( + dev_ctx, *hidden_t0, order, &ordered_h0, true); + blas.MatMul(ordered_h0, + false, + weight, + false, + static_cast(1.0), + &gate_t, + static_cast(1.0)); + } + + lstm_value.gate_value = gate_t.data(); + lstm_value.output_value = out_t.data(); + lstm_value.state_value = cell_t.data(); + lstm_value.state_active_value = cell_pre_act_t.data(); + T cell_clip = 0.0; + phi::funcs::LstmUnitFunctor::compute(dev_ctx, + lstm_value, + frame_size, + cur_batch_size, + cell_clip, + gate_act, + cell_act, + cand_act); + lstm_value.prev_state_value = lstm_value.state_value; + } + + phi::funcs::Batch2LoDTensorFunctor to_seq; + batch_hidden.set_lod(batch_gate_new->lod()); + // restore the output hidden in phi::DenseTensor from the batch hidden + to_seq(dev_ctx, batch_hidden, hidden); + + batch_cell.set_lod(batch_gate_new->lod()); + // restore the output cell state in phi::DenseTensor from the batch cell + to_seq(dev_ctx, batch_cell, cell); +} + +template +void LSTMGradKernel(const Context& dev_ctx, + const DenseTensor& input_in, + const paddle::optional& h0_in, + const paddle::optional& c0_in, + const DenseTensor& weight_in, + const DenseTensor& bias_in, + const DenseTensor& hidden_in, + const DenseTensor& cell_in, + const DenseTensor& batch_gate_in, + const DenseTensor& batch_cell_pre_act_in, + const DenseTensor& hidden_grad, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + DenseTensor* input_grad, + DenseTensor* h0_grad, + DenseTensor* c0_grad, + DenseTensor* weight_grad, + DenseTensor* bias_grad) { + auto* input = &input_in; + auto* weight = &weight_in; + auto* bias = &bias_in; + + auto* hidden_out = &hidden_in; + auto* cell_out = &cell_in; + + auto* batch_gate = &batch_gate_in; + auto* batch_cell_pre_act = &batch_cell_pre_act_in; + + auto* hidden_g = &hidden_grad; + + auto* in_g = input_grad; + auto* weight_g = weight_grad; + auto* bias_g = bias_grad; + + auto* h0 = h0_in.get_ptr(); + auto* c0 = c0_in.get_ptr(); + + auto* h0_g = h0_grad; + auto* c0_g = c0_grad; + + phi::funcs::SetConstant zero; + if (weight_g) { + dev_ctx.template Alloc(weight_g); + zero(dev_ctx, weight_g, static_cast(0.0)); + } + + // ordered_h0/c0 is the reordered hidden/cell initialization. + // ordered_h0_g/c0_g is the reordered gradient of hidden/cell + // initialization. + phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; + phi::Vector order(batch_gate->lod()[2]); + + if (c0) { + ReorderInitState(dev_ctx, *c0, order, &ordered_c0, true); + } + if (c0 && c0_g) { + ordered_c0_g.Resize(c0_g->dims()); + dev_ctx.template Alloc(&ordered_c0_g); + } + + auto in_dims = input->dims(); + auto out_dims = hidden_g->dims(); + int frame_size = static_cast(in_dims[1] / 4); + PADDLE_ENFORCE_EQ(frame_size, + out_dims[1], + phi::errors::InvalidArgument( + "The second dimension of Input(hidden_grad) should be " + "%d, but received %d in LSTM@Grad operator.", + frame_size, + out_dims[1])); + + phi::funcs::LstmMetaValue lstm_value; + if (bias && use_peepholes) { + T* bias_data = const_cast(bias->data()); + lstm_value.check_ig = bias_data + 4 * frame_size; + lstm_value.check_fg = lstm_value.check_ig + frame_size; + lstm_value.check_og = lstm_value.check_fg + frame_size; + } else { + lstm_value.check_ig = nullptr; + lstm_value.check_fg = nullptr; + lstm_value.check_og = nullptr; + } + + phi::funcs::LstmMetaGrad lstm_grad; + + if (bias && bias_g) { + dev_ctx.template Alloc(bias_g); + zero(dev_ctx, bias_g, static_cast(0.0)); + } + if (bias && bias_g && use_peepholes) { + T* bias_g_data = bias_g->data(); + lstm_grad.check_ig_grad = bias_g_data + 4 * frame_size; + lstm_grad.check_fg_grad = lstm_grad.check_ig_grad + frame_size; + lstm_grad.check_og_grad = lstm_grad.check_fg_grad + frame_size; + } else { + lstm_grad.check_ig_grad = nullptr; + lstm_grad.check_fg_grad = nullptr; + lstm_grad.check_og_grad = nullptr; + } + + phi::funcs::LoDTensor2BatchFunctor to_batch; + + auto ToBatch = [&batch_gate, &to_batch](const Context& ctx, + const phi::DenseTensor& src, + const phi::DDim& dims, + phi::DenseTensor& dst) { + dst.Resize(dims); + ctx.template Alloc(&dst); + dst.set_lod(batch_gate->lod()); + to_batch(ctx, src, &dst, false); + }; + + phi::DenseTensor batch_hidden, batch_hidden_g, batch_cell; + ToBatch(dev_ctx, *hidden_out, out_dims, batch_hidden); + ToBatch(dev_ctx, *hidden_g, out_dims, batch_hidden_g); + ToBatch(dev_ctx, *cell_out, out_dims, batch_cell); + + phi::DenseTensor batch_cell_g, batch_gate_g; + batch_cell_g.Resize(out_dims); + dev_ctx.template Alloc(&batch_cell_g); + // TODO(qingqing) support the case output cell has gradient. + // to_batch(dev_ctx, *cell_g, batch_cell_g, false); + zero(dev_ctx, &batch_cell_g, static_cast(0.0)); + batch_gate_g.Resize(batch_gate->dims()); + dev_ctx.template Alloc(&batch_gate_g); + batch_gate_g.set_lod(batch_gate->lod()); + + auto gate_act = phi::funcs::detail::GetActivationType(gate_activation); + auto cell_act = phi::funcs::detail::GetActivationType(cell_activation); + auto cand_act = phi::funcs::detail::GetActivationType(candidate_activation); + + auto batch_starts = batch_gate->lod()[0]; + size_t num_batch = batch_starts.size() - 1; + auto blas = phi::funcs::GetBlas(dev_ctx); + for (int n = static_cast(num_batch) - 1; n >= 0; n--) { + int bstart = static_cast(batch_starts[n]); + int bend = static_cast(batch_starts[n + 1]); + + phi::DenseTensor gate = batch_gate->Slice(bstart, bend); + phi::DenseTensor cell = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); + lstm_value.gate_value = gate.data(); + lstm_value.state_value = cell.data(); + lstm_value.state_active_value = cell_pre_act.data(); + + phi::DenseTensor out_g = batch_hidden_g.Slice(bstart, bend); + phi::DenseTensor gate_g = batch_gate_g.Slice(bstart, bend); + phi::DenseTensor cell_g = batch_cell_g.Slice(bstart, bend); + lstm_grad.state_grad = cell_g.data(); + lstm_grad.gate_grad = gate_g.data(); + lstm_grad.output_grad = out_g.data(); + + if (n > 0) { + int bstart_pre = static_cast(batch_starts[n - 1]); + phi::DenseTensor cell_pre = batch_cell.Slice(bstart_pre, bstart); + phi::DenseTensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); + lstm_value.prev_state_value = cell_pre.data(); + lstm_grad.prev_state_grad = cell_pre_g.data(); + } else { + lstm_value.prev_state_value = c0 ? ordered_c0.data() : nullptr; + lstm_grad.prev_state_grad = c0_g ? ordered_c0_g.data() : nullptr; + } + + // lstm_value.output_value not used in bp, set to nullptr + // lstm_grad.state_active_grad not used in bp, set to nullptr + lstm_value.output_value = nullptr; + lstm_grad.state_active_grad = nullptr; + int cur_batch_size = bend - bstart; + T cell_clip = 0.0; + phi::funcs::LstmUnitGradFunctor::compute(dev_ctx, + lstm_value, + lstm_grad, + frame_size, + cur_batch_size, + cell_clip, + gate_act, + cell_act, + cand_act); + + if (n > 0) { + int pre_h_start = static_cast(batch_starts[n - 1]); + int pre_h_end = pre_h_start + cur_batch_size; + auto pre_hidden_g = batch_hidden_g.Slice(pre_h_start, pre_h_end); + blas.MatMul(gate_g, + false, + *weight, + true, + static_cast(1.0), + &pre_hidden_g, + static_cast(1.0)); + if (weight_g) { + /* backward weight */ + auto pre_hidden = batch_hidden.Slice(pre_h_start, pre_h_end); + blas.MatMul(pre_hidden, + true, + gate_g, + false, + static_cast(1.0), + weight_g, + static_cast(1.0)); + } + } else { + if (h0 && weight_g) { + ReorderInitState(dev_ctx, *h0, order, &ordered_h0, true); + blas.MatMul(ordered_h0, + true, + gate_g, + false, + static_cast(1.0), + weight_g, + static_cast(1.0)); + } + if (h0 && h0_g) { + ordered_h0_g.Resize(h0_g->dims()); + dev_ctx.template Alloc(&ordered_h0_g); + blas.MatMul(gate_g, + false, + *weight, + true, + static_cast(1.0), + &ordered_h0_g, + static_cast(0.0)); + } + } + } + + phi::funcs::Batch2LoDTensorFunctor to_seq; + if (in_g) { + /* backward data */ + dev_ctx.template Alloc(in_g); + to_seq(dev_ctx, batch_gate_g, in_g); + } + if (bias && bias_g) { + /* backward bias */ + phi::DenseTensor b_g = *bias_g; + b_g.Resize({bias_g->numel(), 1}); + phi::DenseTensor gate_bias_g = b_g.Slice(0, 4 * frame_size); + phi::funcs::ColwiseSum col_sum; + col_sum(dev_ctx, batch_gate_g, &gate_bias_g); + } + + if (h0 && h0_g) { + ReorderInitState(dev_ctx, ordered_h0_g, order, h0_g, false); + } + if (c0 && c0_g) { + ReorderInitState(dev_ctx, ordered_c0_g, order, c0_g, false); + } +} +} // namespace phi diff --git a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h index c8a42e0265fb8..7ed3d65b6410e 100644 --- a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h @@ -36,6 +36,7 @@ void PoolGradRawKernel(const Context& ctx, bool global_pooling, bool adaptive, const std::string& padding_algorithm, + const float norm_type, DenseTensor* dx) { const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); std::vector paddings_ = paddings; @@ -71,9 +72,15 @@ void PoolGradRawKernel(const Context& ctx, funcs::SetConstant set_constant; set_constant(ctx, dx, static_cast(0.0)); + std::string true_type; + if (norm_type == INFINITY) + true_type = "max"; + else + true_type = pooling_type; + switch (kernel_size_.size()) { case 2: { - if (pooling_type == "max") { + if (true_type == "max") { funcs::MaxPool2dGradFunctor pool2d_backward; pool2d_backward(ctx, x, @@ -84,7 +91,7 @@ void PoolGradRawKernel(const Context& ctx, paddings_, data_format, dx); - } else if (pooling_type == "avg") { + } else if (true_type == "avg") { funcs::Pool2dGradFunctor, T> pool2d_backward; funcs::AvgPoolGrad pool_process; @@ -100,6 +107,23 @@ void PoolGradRawKernel(const Context& ctx, adaptive, dx, pool_process); + } else { // lp_pool2d + funcs::Pool2dGradFunctor, T> + pool2d_backward; + funcs::LPPoolGrad pool_process; + pool_process.setNormType(norm_type); + pool2d_backward(ctx, + x, + out, + dout, + kernel_size_, + strides, + paddings_, + data_format, + exclusive, + adaptive, + dx, + pool_process); } } break; case 3: { @@ -215,6 +239,43 @@ void Pool2dGradKernel(const Context& ctx, global_pooling, adaptive, padding_algorithm, + 0, + dx); +} + +template +void LPPool2dGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + const IntArray& kernel_size, + const std::vector& strides, + const std::vector& paddings, + bool ceil_mode UNUSED, + bool exclusive, + const std::string& data_format, + const std::string& pooling_type, + bool global_pooling, + bool adaptive, + const std::string& padding_algorithm, + const float norm_type, + DenseTensor* dx) { + std::vector kernel_size_val(kernel_size.GetData().begin(), + kernel_size.GetData().end()); + PoolGradRawKernel(ctx, + x, + out, + dout, + kernel_size_val, + strides, + paddings, + exclusive, + data_format, + pooling_type, + global_pooling, + adaptive, + padding_algorithm, + norm_type, dx); } @@ -304,6 +365,7 @@ void Pool3dGradKernel(const Context& ctx, global_pooling, adaptive, padding_algorithm, + 0, dx); } diff --git a/paddle/phi/kernels/impl/pool_kernel_impl.h b/paddle/phi/kernels/impl/pool_kernel_impl.h index 50a5195e771e8..2a370c7c876e5 100644 --- a/paddle/phi/kernels/impl/pool_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_kernel_impl.h @@ -61,6 +61,7 @@ void PoolRawKernel(const Context& ctx, bool global_pooling, bool adaptive, const std::string& padding_algorithm, + const float norm_type, DenseTensor* out) { const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); std::vector paddings_ = paddings; @@ -75,6 +76,15 @@ void PoolRawKernel(const Context& ctx, data_dims = slice_ddim(x_dims, 2, x_dims.size()); } + std::string true_type; + if (norm_type == INFINITY) + true_type = "max"; + else + true_type = pooling_type; + if (true_type == "lp" && norm_type == 0) + PADDLE_THROW( + errors::InvalidArgument("norm_type of LPPool op cannot be 0.")); + funcs::UpdatePadding(&paddings_, global_pooling, adaptive, @@ -95,7 +105,7 @@ void PoolRawKernel(const Context& ctx, switch (kernel_size_.size()) { case 2: { - if (pooling_type == "max") { + if (true_type == "max") { funcs::Pool2dFunctor, T> pool2d_forward; funcs::MaxPool pool_process; pool2d_forward(ctx, @@ -109,7 +119,7 @@ void PoolRawKernel(const Context& ctx, out, pool_process); - } else if (pooling_type == "avg") { + } else if (true_type == "avg") { std::vector reduce_dim; int reduce_num = GetReduceNum(x, out, channel_last, &reduce_dim); if (reduce_num > 0 && @@ -146,10 +156,24 @@ void PoolRawKernel(const Context& ctx, out, pool_process); } + } else { // lp_pool2d + funcs::Pool2dFunctor, T> pool2d_forward; + funcs::LPPool pool_process; + pool_process.setNormType(norm_type); + pool2d_forward(ctx, + x, + kernel_size_, + strides, + paddings_, + data_format, + exclusive, + adaptive, + out, + pool_process); } } break; case 3: { - if (pooling_type == "max") { + if (true_type == "max") { funcs::Pool3dFunctor, T> pool3d_forward; funcs::MaxPool pool_process; pool3d_forward(ctx, @@ -162,7 +186,7 @@ void PoolRawKernel(const Context& ctx, false, out, pool_process); - } else if (pooling_type == "avg") { + } else if (true_type == "avg") { funcs::Pool3dFunctor, T> pool3d_forward; funcs::AvgPool pool_process; pool3d_forward(ctx, @@ -175,6 +199,9 @@ void PoolRawKernel(const Context& ctx, adaptive, out, pool_process); + } else { // lp_pool3d + PADDLE_THROW( + errors::InvalidArgument("LPPool op only supports 2D input.")); } } break; default: { @@ -249,6 +276,39 @@ void Pool2dKernel(const Context& ctx, global_pooling, adaptive, padding_algorithm, + 0, + out); +} + +template +void LPPool2dKernel(const Context& ctx, + const DenseTensor& x, + const IntArray& kernel_size, + const std::vector& strides, + const std::vector& paddings, + bool ceil_mode UNUSED, + bool exclusive, + const std::string& data_format, + const std::string& pooling_type, + bool global_pooling, + bool adaptive, + const std::string& padding_algorithm, + const float norm_type, + DenseTensor* out) { + std::vector kernel_size_val(kernel_size.GetData().begin(), + kernel_size.GetData().end()); + PoolRawKernel(ctx, + x, + kernel_size_val, + strides, + paddings, + exclusive, + data_format, + pooling_type, + global_pooling, + adaptive, + padding_algorithm, + norm_type, out); } @@ -298,6 +358,7 @@ void Pool3dKernel(const Context& ctx, global_pooling, adaptive, padding_algorithm, + 0, out); } diff --git a/paddle/phi/kernels/lstm_kernel.h b/paddle/phi/kernels/lstm_kernel.h new file mode 100644 index 0000000000000..42195e375c3a9 --- /dev/null +++ b/paddle/phi/kernels/lstm_kernel.h @@ -0,0 +1,66 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void LSTMKernel(const Context& dev_ctx, + const DenseTensor& input, + const paddle::optional& h0, + const paddle::optional& c0, + const DenseTensor& weight, + const DenseTensor& bias, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + DenseTensor* hidden, + DenseTensor* cell, + DenseTensor* batch_gate, + DenseTensor* batch_cell_pre_act); + +template +void LSTMGradKernel(const Context& dev_ctx, + const DenseTensor& input, + const paddle::optional& h0, + const paddle::optional& c0, + const DenseTensor& weight, + const DenseTensor& bias, + const DenseTensor& hidden, + const DenseTensor& cell, + const DenseTensor& batch_gate, + const DenseTensor& batch_cell_pre_act, + const DenseTensor& hidden_grad, + bool use_peepholes, + bool is_reverse, + bool is_test, + const std::string& gate_activation, + const std::string& cell_activation, + const std::string& candidate_activation, + DenseTensor* input_grad, + DenseTensor* h0_grad, + DenseTensor* c0_grad, + DenseTensor* weight_grad, + DenseTensor* bias_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/onednn/concat_kernel.cc b/paddle/phi/kernels/onednn/concat_kernel.cc index c7c258ea88001..725ed9f34cf98 100644 --- a/paddle/phi/kernels/onednn/concat_kernel.cc +++ b/paddle/phi/kernels/onednn/concat_kernel.cc @@ -106,7 +106,7 @@ static const std::vector ReduceMultiInput( template void ConcatKernel(const Context& dev_ctx, const std::vector& x, - const Scalar& axis, + const Scalar& axis_, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); // If any of the multiple inputs of concat has an input size of 0, the @@ -114,6 +114,9 @@ void ConcatKernel(const Context& dev_ctx, auto multi_input = ReduceMultiInput(x); EnforceLayouts(multi_input); + int64_t axis = axis_.to(); + axis = phi::funcs::ComputeAxis(axis, x[0]->dims().size()); + auto out_dims_vec = common::vectorize(out->dims()); if (std::any_of(out_dims_vec.begin(), out_dims_vec.end(), [](int64_t i) { return i < 0; @@ -125,12 +128,12 @@ void ConcatKernel(const Context& dev_ctx, } DDim out_dims = - funcs::ComputeAndCheckShape(true, x_dims, axis.to()); + funcs::ComputeAndCheckShape(true, x_dims, static_cast(axis)); out->Resize(out_dims); } funcs::ConcatOneDNNHandler handler( - dev_ctx.GetPlace(), axis.to(), onednn_engine, multi_input, out); + dev_ctx.GetPlace(), axis, onednn_engine, multi_input, out); std::vector> srcs; srcs.reserve(multi_input.size()); diff --git a/paddle/phi/kernels/pool_grad_kernel.h b/paddle/phi/kernels/pool_grad_kernel.h index 2f813aa9dc050..d027a97b42f68 100644 --- a/paddle/phi/kernels/pool_grad_kernel.h +++ b/paddle/phi/kernels/pool_grad_kernel.h @@ -39,6 +39,24 @@ void Pool2dGradKernel(const Context& ctx, const std::string& padding_algorithm, DenseTensor* dx); +template +void LPPool2dGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + const IntArray& kernel_size, + const std::vector& strides, + const std::vector& paddings, + bool ceil_mode, + bool exclusive, + const std::string& data_format, + const std::string& pooling_type, + bool global_pooling, + bool adaptive, + const std::string& padding_algorithm, + const float norm_type, + DenseTensor* dx); + template void Pool2dGradGPUDNNKernel(const Context& ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/pool_kernel.h b/paddle/phi/kernels/pool_kernel.h index e958d62d8c225..28e65d837818f 100644 --- a/paddle/phi/kernels/pool_kernel.h +++ b/paddle/phi/kernels/pool_kernel.h @@ -37,6 +37,22 @@ void Pool2dKernel(const Context& ctx, const std::string& padding_algorithm, DenseTensor* out); +template +void LPPool2dKernel(const Context& ctx, + const DenseTensor& x, + const IntArray& kernel_size, + const std::vector& strides, + const std::vector& paddings, + bool ceil_mode, + bool exclusive, + const std::string& data_format, + const std::string& pooling_type, + bool global_pooling, + bool adaptive, + const std::string& padding_algorithm, + const float norm_type, + DenseTensor* out); + template void Pool2dGPUDNNKernel(const Context& ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/reduce_kernel_impl.cc b/paddle/phi/kernels/reduce_kernel_impl.cc index 000cb99034c26..9319248099903 100644 --- a/paddle/phi/kernels/reduce_kernel_impl.cc +++ b/paddle/phi/kernels/reduce_kernel_impl.cc @@ -20,10 +20,16 @@ namespace phi { // oneDNN's reduction kernel is optimized only for reducing throughout the // most outer dims, so in case of another type of reduction, it would be // better to fallback to native implementation -inline bool HasOptimizedOneDNNKernel(const KernelContext* ctx) { +inline bool HasOptimizedOneDNNKernel(const KernelContext* ctx, + const bool mean_op) { const DenseTensor& x = ctx->InputAt(0); - const TensorRef& dims_tmp = ctx->AttrAt(0); - IntArray dims_array = IntArray(*dims_tmp.Get()); + IntArray dims_array; + if (mean_op) { + dims_array = ctx->AttrAt(0); + } else { + const TensorRef& dims_tmp = ctx->AttrAt(0); + dims_array = IntArray(*dims_tmp.Get()); + } int ndims = x.dims().size(); const bool reduce_all = recompute_reduce_all(x, dims_array); auto dims = dims_array.GetData(); @@ -53,7 +59,15 @@ inline bool HasOptimizedOneDNNKernel(const KernelContext* ctx) { bool ReduceCheckIfOneDNNSupport(const KernelContext* ctx) { if (ctx->InputAt(0).dims().size() > 5 || - !HasOptimizedOneDNNKernel(ctx)) { + !HasOptimizedOneDNNKernel(ctx, false)) { + return false; + } + return true; +} + +bool ReduceMeanCheckIfOneDNNSupport(const KernelContext* ctx) { + if (ctx->InputAt(0).dims().size() > 5 || + !HasOptimizedOneDNNKernel(ctx, true)) { return false; } return true; diff --git a/paddle/phi/kernels/reduce_kernel_impl.h b/paddle/phi/kernels/reduce_kernel_impl.h index aef4f57ddbdcf..e117f6ab335dd 100644 --- a/paddle/phi/kernels/reduce_kernel_impl.h +++ b/paddle/phi/kernels/reduce_kernel_impl.h @@ -21,4 +21,6 @@ bool ReduceCheckIfOneDNNSupport(const KernelContext* ctx); bool ReduceGradCheckIfOneDNNSupport(const KernelContext* ctx); +bool ReduceMeanCheckIfOneDNNSupport(const KernelContext* ctx); + } // namespace phi diff --git a/paddle/phi/kernels/reduce_mean_kernel.cc b/paddle/phi/kernels/reduce_mean_kernel.cc index 16b3abf0e2931..a657e7ba8c01d 100644 --- a/paddle/phi/kernels/reduce_mean_kernel.cc +++ b/paddle/phi/kernels/reduce_mean_kernel.cc @@ -67,7 +67,7 @@ PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} #if defined(PADDLE_WITH_DNNL) PD_REGISTER_KERNEL( mean, OneDNN, ONEDNN, phi::MeanKernel, float, phi::dtype::bfloat16) { - kernel->check_if_onednn_kernel_support_ = phi::ReduceCheckIfOneDNNSupport; + kernel->check_if_onednn_kernel_support_ = phi::ReduceMeanCheckIfOneDNNSupport; } #endif diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc index 77ae06206f19d..8664f3b4aaf20 100644 --- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc +++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc @@ -21,8 +21,7 @@ #include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h" -namespace phi { -namespace sr { +namespace phi::sr { static std::vector PathToRows(const DenseTensor& path) { std::set rows; @@ -80,8 +79,7 @@ void HSigmoidLossGradKernel(const Context& ctx, w_grad); } -} // namespace sr -} // namespace phi +} // namespace phi::sr PD_REGISTER_KERNEL(hsigmoid_loss_grad_sr, CPU, diff --git a/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc b/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc index a5d2e66787316..19b72361feda7 100644 --- a/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc +++ b/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc @@ -20,8 +20,7 @@ #include "paddle/phi/kernels/funcs/selected_rows_functor.h" -namespace phi { -namespace sr { +namespace phi::sr { template void MergeSelectedRowsKernel(const Context& dev_ctx, @@ -31,8 +30,7 @@ void MergeSelectedRowsKernel(const Context& dev_ctx, merge_func(dev_ctx, x, out); } -} // namespace sr -} // namespace phi +} // namespace phi::sr PD_REGISTER_KERNEL(merge_selected_rows, CPU, diff --git a/paddle/phi/kernels/selected_rows/shape_kernel.cc b/paddle/phi/kernels/selected_rows/shape_kernel.cc index e4b53658f42ed..ee7c0d64670d4 100644 --- a/paddle/phi/kernels/selected_rows/shape_kernel.cc +++ b/paddle/phi/kernels/selected_rows/shape_kernel.cc @@ -21,8 +21,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/shape_kernel.h" -namespace phi { -namespace sr { +namespace phi::sr { template void ShapeKernel(const Context& ctx, @@ -31,8 +30,7 @@ void ShapeKernel(const Context& ctx, phi::ShapeKernel(ctx, input.value(), out); } -} // namespace sr -} // namespace phi +} // namespace phi::sr PD_REGISTER_KERNEL(shape_sr, CPU, diff --git a/paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc b/paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc index 73af07da806e0..37c517246f89e 100644 --- a/paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void BatchNormCooGradKernel(const Context& dev_ctx, @@ -76,8 +75,7 @@ void BatchNormCooGradKernel(const Context& dev_ctx, bias_grad); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(batch_norm_coo_grad, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/addmm_kernel.cc b/paddle/phi/kernels/sparse/cpu/addmm_kernel.cc index 991ee7bcaa778..430fd2462b1a7 100644 --- a/paddle/phi/kernels/sparse/cpu/addmm_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/addmm_kernel.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { /* DENSE + COO @ DENSE -> DENSE */ template @@ -45,8 +44,7 @@ void AddmmCsrDenseKernel(const Context& dev_ctx UNUSED, "Not support CPU kernel of 'sparse.addmm' now.")); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(addmm_coo_dense, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc index d18bdc4b12e96..4d62c8f70b579 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc @@ -19,8 +19,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/sparse/cpu/conv.h" -namespace phi { -namespace sparse { +namespace phi::sparse { // rulebook: //[ @@ -215,8 +214,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, kernel_grad); })); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(conv3d_coo_grad, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc index 88a01e1135b7b..c5cd5ac42c275 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc @@ -30,8 +30,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void AllocCsrPtr(const Context& dev_ctx, @@ -432,8 +431,7 @@ DEFINE_ELEMENTWISE_GRAD_KERNEL(Add) DEFINE_ELEMENTWISE_GRAD_KERNEL(Subtract) DEFINE_ELEMENTWISE_GRAD_KERNEL(Multiply) -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(add_csr_csr_grad, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/fused_attention_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/fused_attention_grad_kernel.cc index 416b715a9a6a2..74436cbc85b52 100644 --- a/paddle/phi/kernels/sparse/cpu/fused_attention_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/fused_attention_grad_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void FusedAttentionCsrGradKernel(const Context& dev_ctx, @@ -34,5 +33,4 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, "Not support CPU kernel of 'sparse.nn.functional.fused_attention' now"); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse diff --git a/paddle/phi/kernels/sparse/cpu/fused_attention_kernel.cc b/paddle/phi/kernels/sparse/cpu/fused_attention_kernel.cc index 11c9e2d5c2007..2847ebff7e092 100644 --- a/paddle/phi/kernels/sparse/cpu/fused_attention_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/fused_attention_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void FusedAttentionCsrKernel( @@ -35,5 +34,4 @@ void FusedAttentionCsrKernel( "Not support CPU kernel of 'sparse.nn.functional.fused_attention' now"); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse diff --git a/paddle/phi/kernels/sparse/cpu/mask_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_grad_kernel.cc new file mode 100644 index 0000000000000..3503c88b2ef8b --- /dev/null +++ b/paddle/phi/kernels/sparse/cpu/mask_grad_kernel.cc @@ -0,0 +1,56 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/mask_grad_kernel.h" +#include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +PD_REGISTER_KERNEL(mask_as_coo_grad, + CPU, + ALL_LAYOUT, + phi::sparse::MaskAsCooGradKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); +} + +PD_REGISTER_KERNEL(mask_as_csr_grad, + CPU, + ALL_LAYOUT, + phi::sparse::MaskAsCsrGradKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc index 5213dd44a4c07..7b8d24a440e74 100644 --- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" #include "paddle/common/ddim.h" #include "paddle/phi/api/ext/dispatch.h" @@ -24,8 +25,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void MaskCooCPUKernel(const CPUContext& dev_ctx, @@ -75,16 +75,116 @@ void MaskCooCPUKernel(const CPUContext& dev_ctx, * x and mask must have the same shape. **/ template -void MaskCooKernel(const Context& dev_ctx, - const DenseTensor& x, - const SparseCooTensor& mask, - SparseCooTensor* out) { +void MaskAsCooKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCooTensor& mask, + SparseCooTensor* out) { PD_VISIT_BASE_INTEGRAL_TYPES( mask.indices().dtype(), "MaskCooCPUKernel", ([&] { MaskCooCPUKernel(dev_ctx, x, mask, out); })); } +template +void MaskCsr2DCPUKernel(const CPUContext& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const DenseTensor& mask_cols = mask.cols(); + const DenseTensor& mask_crows = mask.crows(); + int64_t num_non_zeros = mask.nnz(); + + DenseTensor out_cols = phi::EmptyLike(dev_ctx, mask_cols); + DenseTensor out_crows = phi::EmptyLike(dev_ctx, mask_crows); + DenseTensor out_values = phi::Empty(dev_ctx, {num_non_zeros}); + + phi::Copy(dev_ctx, mask_cols, dev_ctx.GetPlace(), false, &out_cols); + phi::Copy(dev_ctx, mask_crows, dev_ctx.GetPlace(), false, &out_crows); + + int64_t numel = 0; + for (int64_t i = 0; i < mask_crows.numel() - 1; ++i) { + for (int64_t j = mask_crows.data()[i]; + j < mask_crows.data()[i + 1]; + ++j) { + IntT col_idx = mask_cols.data()[numel]; + + out_values.data()[numel] = + x.data()[(i / x.dims()[0]) * x.dims()[1] + + (i % x.dims()[0]) * x.dims()[1] + col_idx]; + + ++numel; + } + } + + out->SetMember(out_crows, out_cols, out_values, x.dims()); +} + +template +void MaskCsr3DCPUKernel(const CPUContext& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const DenseTensor& mask_cols = mask.cols(); + const DenseTensor& mask_crows = mask.crows(); + int64_t num_non_zeros = mask.nnz(); + + DenseTensor out_cols = phi::EmptyLike(dev_ctx, mask_cols); + DenseTensor out_crows = phi::EmptyLike(dev_ctx, mask_crows); + DenseTensor out_values = phi::Empty(dev_ctx, {num_non_zeros}); + + phi::Copy(dev_ctx, mask_cols, dev_ctx.GetPlace(), false, &out_cols); + phi::Copy(dev_ctx, mask_crows, dev_ctx.GetPlace(), false, &out_crows); + + int64_t numel = 0; + for (int64_t i = 0; i < mask_crows.numel() - 1; ++i) { + for (int64_t j = mask_crows.data()[i]; + j < mask_crows.data()[i + 1]; + ++j) { + IntT col_idx = mask_cols.data()[numel]; + + out_values.data()[numel] = + x.data()[(i / (mask_crows.numel() / x.dims()[0])) * + (x.dims()[1] * x.dims()[2]) + + (i % (mask_crows.numel() / x.dims()[0])) * x.dims()[2] + + col_idx]; + + ++numel; + } + } + + out->SetMember(out_crows, out_cols, out_values, x.dims()); +} + +/** + * @brief Filter the DenseTensor x by the + * mask.crows(), mask.cols() and output a SparseCsrTensor + * x and mask must have the same shape. + **/ +template +void MaskAsCsrKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const phi::DDim& x_dims = x.dims(); + if (x_dims.size() == 2) { + PD_VISIT_BASE_INTEGRAL_TYPES( + mask.crows().dtype(), "MaskCsr2DCPUKernel", ([&] { + MaskCsr2DCPUKernel(dev_ctx, x, mask, out); + })); + } else if (x_dims.size() == 3) { + PD_VISIT_BASE_INTEGRAL_TYPES( + mask.crows().dtype(), "MaskCsr3DCPUKernel", ([&] { + MaskCsr3DCPUKernel(dev_ctx, x, mask, out); + })); + } else { + // throw exception + phi::errors::InvalidArgument( + "mask_as for Sparse CSR Tensor only support 2-D or 3-D, but got " + "%d-D.", + x_dims.size()); + } +} + template void MaskHelperCooCPUKernel(const CPUContext& dev_ctx, const SparseCooTensor& x, @@ -154,13 +254,28 @@ void MaskHelperCooKernel(const Context& dev_ctx, })); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse + +PD_REGISTER_KERNEL(mask_helper_coo, + CPU, + ALL_LAYOUT, + phi::sparse::MaskHelperCooKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} -PD_REGISTER_KERNEL(mask_coo, +PD_REGISTER_KERNEL(mask_as_coo, CPU, ALL_LAYOUT, - phi::sparse::MaskCooKernel, + phi::sparse::MaskAsCooKernel, float, double, uint8_t, @@ -174,18 +289,19 @@ PD_REGISTER_KERNEL(mask_coo, kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); } -PD_REGISTER_KERNEL(mask_helper_coo, +PD_REGISTER_KERNEL(mask_as_csr, CPU, ALL_LAYOUT, - phi::sparse::MaskHelperCooKernel, + phi::sparse::MaskAsCsrKernel, float, double, - phi::dtype::float16, uint8_t, + int8_t, int16_t, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) { - kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR); } diff --git a/paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc index 6d22d2a336e7e..cdd7efdb20924 100644 --- a/paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { // TODO(zhouwei25): implement CPU backward kernel of " CSR @ DENSE -> DENSE" template @@ -44,8 +43,7 @@ void MaskedMatmulCsrGradKernel(const Context& dev_ctx UNUSED, "Not support CPU backward kernel of 'sparse.masked_matmul' now.")); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(matmul_csr_dense_grad, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/matmul_kernel.cc b/paddle/phi/kernels/sparse/cpu/matmul_kernel.cc index fd70dc911cfde..5e6aa016d6c3e 100644 --- a/paddle/phi/kernels/sparse/cpu/matmul_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/matmul_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { // TODO(zhouwei25): implement CPU kernel of " CSR @ DENSE -> DENSE" template @@ -41,8 +40,7 @@ void MaskedMatmulCsrKernel(const Context& dev_ctx UNUSED, "Not support CPU kernel of 'sparse.masked_matmul' now.")); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(matmul_csr_dense, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/mv_kernel.cc b/paddle/phi/kernels/sparse/cpu/mv_kernel.cc index 22abdb3ad12a3..68f7efd05d70d 100644 --- a/paddle/phi/kernels/sparse/cpu/mv_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mv_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void MvCsrKernel(const Context& dev_ctx UNUSED, @@ -38,8 +37,7 @@ void MvCooKernel(const Context& dev_ctx UNUSED, phi::errors::Unimplemented("Not support CPU kernel of 'sparse.mv' now.")); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL( mv_csr, CPU, ALL_LAYOUT, phi::sparse::MvCsrKernel, float, double) { diff --git a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc index 81af8339f88a9..20614fa10b04b 100644 --- a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc @@ -20,8 +20,7 @@ #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/slice_utils.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void SliceCooCompute(const Context& dev_ctx, @@ -303,8 +302,7 @@ void SliceCsrKernel(const Context& dev_ctx, x_dims, &axes_vec, &starts_vec, &ends_vec); SliceCsrCompute(dev_ctx, x, axes_vec, starts_vec, ends_vec, out); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(slice_coo, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index 0c5e6857de24c..4eea70631bd60 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -262,9 +262,9 @@ void CooToDenseCPUKernel(const CPUContext& dev_ctx, const SparseCooTensor& x, DenseTensor* out) { const auto non_zero_num = x.nnz(); - const auto dense_dims = x.dims(); - const auto indices = x.indices(); - const auto values = x.values(); + const auto& dense_dims = x.dims(); + const auto& indices = x.indices(); + const auto& values = x.values(); const auto indices_dims = common::vectorize(indices.dims()); int64_t sparse_dim = indices_dims[0]; if (indices_dims.size() == 1) { diff --git a/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc index 58a9720e1732b..70b737c2ec0a2 100644 --- a/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc @@ -20,8 +20,7 @@ #include "paddle/phi/kernels/sparse/empty_kernel.h" #include "paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h" -namespace phi { -namespace sparse { +namespace phi::sparse { std::vector get_cpu_grad_perm(std::vector perm) { std::vector grad_perm(perm.size()); @@ -48,8 +47,7 @@ void TransposeCsrGradKernel(const Context& dev_ctx, std::vector grad_perm = get_cpu_grad_perm(perm); TransposeCsrKernel(dev_ctx, dout, grad_perm, dx); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(transpose_coo_grad, CPU, diff --git a/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc b/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc index bee2fe61ded54..6ae389ad90f46 100644 --- a/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc @@ -21,8 +21,7 @@ #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void TransposeCooKernel(const Context& dev_ctx, @@ -201,8 +200,7 @@ void TransposeCsrKernel(const Context& dev_ctx, } } } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(transpose_coo, CPU, diff --git a/paddle/phi/kernels/sparse/empty_kernel.cc b/paddle/phi/kernels/sparse/empty_kernel.cc index 2fb11e7a66f2e..07087445b1eb6 100644 --- a/paddle/phi/kernels/sparse/empty_kernel.cc +++ b/paddle/phi/kernels/sparse/empty_kernel.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void EmptyLikeCooKernel(const Context& dev_ctx, @@ -47,8 +46,7 @@ void EmptyLikeCsrKernel(const Context& dev_ctx, out->set_meta(x.meta()); dev_ctx.template Alloc(out_values); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(empty_like_coo, CPU, diff --git a/paddle/phi/kernels/sparse/gpu/conv_kernel_impl.cuh b/paddle/phi/kernels/sparse/gpu/conv_kernel_impl.cuh index 775c23def14b0..3b6de498ef5b5 100644 --- a/paddle/phi/kernels/sparse/gpu/conv_kernel_impl.cuh +++ b/paddle/phi/kernels/sparse/gpu/conv_kernel_impl.cuh @@ -566,7 +566,7 @@ __global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f16f16f3 // conv_forward_cuda_m128n16k16_f32f32f32 template -__global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) +__global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) { const int K_tile = 16; @@ -578,27 +578,27 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 __shared__ float B_shared[256]; #pragma unroll - for (int i = 0; i < 32; ++i) + for (int i = 0; i < 32; ++i) { C_local[i] = 0.0; } - + int K_loops = K_implicit / 16; - int block_num_n = (N - 1) / 16 + 1; + int block_num_n = (N - 1) / 16 + 1; int blockIdx_m = (int)blockIdx.x / block_num_n; int blockIdx_n = (int)blockIdx.x % block_num_n; int threadIdx_x = (int)threadIdx.x; // hoisting shared pointer offsets - int * out_in_map_ptr = out_in_map - + (blockIdx_m * 128 + (threadIdx_x / (16/4)))* kernel_volume; + int * out_in_map_ptr = out_in_map + + (blockIdx_m * 128 + (threadIdx_x / (16/4)))* kernel_volume; - float * B_ptr = B - + (threadIdx_x / (16/4)) * N - + (blockIdx_n * 16) + ((threadIdx_x * 4) % 16); + float * B_ptr = B + + (threadIdx_x / (16/4)) * N + + (blockIdx_n * 16) + ((threadIdx_x * 4) % 16); float * A_shared_ptr = A_shared + (threadIdx_x * 4); - float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 4) * 16); + float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 4) * 16); float * B_shared_ptr = B_shared + (threadIdx_x * 4); float * B_shared_reduce_ptr = B_shared + (threadIdx_x % 4); @@ -648,7 +648,7 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 } int* out_in_map_ptr_local = out_in_map_ptr + k_0 * 16 / K_tile_padded; - float* A_ptr_local = A + (k_0 * 16 % K_tile_padded) + channel_offset_A; + float* A_ptr_local = A + (k_0 * 16 % K_tile_padded) + channel_offset_A; float* B_ptr_local; if constexpr (K_ld_check) @@ -661,14 +661,14 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 for (int ax0_ax1_fused_0 = 0; ax0_ax1_fused_0 < 8; ++ax0_ax1_fused_0) { - int input_idx = *(out_in_map_ptr_local + (ax0_ax1_fused_0 *16) * kernel_volume); + int input_idx = *(out_in_map_ptr_local + (ax0_ax1_fused_0 *16) * kernel_volume); if (input_idx != -1) { uint4 A_loaded = make_uint4(0, 0, 0, 0); global_load(A_loaded, A_ptr_local + (input_idx * K_original) , A_pred_guard); *(uint4 *)(A_shared_ptr + (ax0_ax1_fused_0 * 256)) = A_loaded; } - else + else { *(uint4*)(A_shared_ptr + (ax0_ax1_fused_0 * 256)) = make_uint4(0, 0, 0, 0); } @@ -678,23 +678,23 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 for (int ax0_ax1_fused_0_1 = 0; ax0_ax1_fused_0_1 < 1; ++ax0_ax1_fused_0_1) { uint4 B_loaded = make_uint4(0, 0, 0, 0); - global_load(B_loaded, B_ptr_local + (ax0_ax1_fused_0_1 * 16) * N, B_pred_guard); + global_load(B_loaded, B_ptr_local + (ax0_ax1_fused_0_1 * 16) * N, B_pred_guard); *(uint4 *)(B_shared_ptr + (ax0_ax1_fused_0_1 * 256)) = B_loaded; } __syncthreads(); #pragma unroll - for (int k_1 = 0; k_1 < ( 16 / 4); ++k_1) + for (int k_1 = 0; k_1 < ( 16 / 4); ++k_1) { #pragma unroll - for (int k_2 = 0; k_2 < 4; ++k_2) + for (int k_2 = 0; k_2 < 4; ++k_2) { int vk_in_block = (k_1 << 2) + k_2; #pragma unroll - for (int i = 0; i < 32; ++i) + for (int i = 0; i < 32; ++i) { - C_local[i] = C_local[i] + - A_shared_reduce_ptr[((i / 4) * 16) * 16 + vk_in_block] + C_local[i] = C_local[i] + + A_shared_reduce_ptr[((i / 4) * 16) * 16 + vk_in_block] * B_shared_reduce_ptr[(vk_in_block * 16) + ((i % 4) * 4)]; } @@ -707,7 +707,7 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 for (int i = 0; i < 32; ++i) { int location_cur = location_offset + ((i / 4) * 16); - int vn = C_n_offset + ((i % 4) * 4); + int vn = C_n_offset + ((i % 4) * 4); if constexpr (N_ld_check) { @@ -723,34 +723,34 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting1_mode0_f32f32f32 } // conv_forward_cuda_m128n16k32_f32f32f32 -__global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) +__global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) { float C_local[32]; __shared__ float A_shared[4096]; __shared__ float B_shared[512]; #pragma unroll - for (int i = 0; i < 32; ++i) + for (int i = 0; i < 32; ++i) { C_local[i] = 0.0; } - + int K_loops = (K_original * kernel_volume - 1) / 32 + 1; - int block_num_n = (N - 1) / 16 + 1; + int block_num_n = (N - 1) / 16 + 1; int blockIdx_m = (int)blockIdx.x / block_num_n; int blockIdx_n = (int)blockIdx.x % block_num_n; int threadIdx_x = (int)threadIdx.x; // hoisting shared pointer offsets - int * out_in_map_ptr = out_in_map - + (blockIdx_m * 128 + (threadIdx_x / (32/4)))* kernel_volume; + int * out_in_map_ptr = out_in_map + + (blockIdx_m * 128 + (threadIdx_x / (32/4)))* kernel_volume; - float * B_ptr = B - + (threadIdx_x / (16/4)) * N - + (blockIdx_n * 16) + ((threadIdx_x * 4) % 16); + float * B_ptr = B + + (threadIdx_x / (16/4)) * N + + (blockIdx_n * 16) + ((threadIdx_x * 4) % 16); float * A_shared_ptr = A_shared + (threadIdx_x * 4); - float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 4) * 32); + float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 4) * 32); float * B_shared_ptr = B_shared + (threadIdx_x * 4); float * B_shared_reduce_ptr = B_shared + (threadIdx_x % 4); @@ -762,7 +762,7 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32 #pragma unroll for (int k_0 = 0; k_0 < K_loops; ++k_0) { - int channel_offset = k_0 % (K_original / 32) * 32 + channel_offset_A; + int channel_offset = k_0 % (K_original / 32) * 32 + channel_offset_A; int kernel_offset = k_0 / (K_original / 32); int *out_in_map_ptr_k = out_in_map_ptr + kernel_offset; @@ -772,8 +772,8 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32 for (int ax0_ax1_fused_0 = 0; ax0_ax1_fused_0 < 16; ++ax0_ax1_fused_0) { - int input_idx = *(out_in_map_ptr_k + (ax0_ax1_fused_0 *8) * kernel_volume); - if (input_idx != -1) + int input_idx = *(out_in_map_ptr_k + (ax0_ax1_fused_0 *8) * kernel_volume); + if (input_idx != -1) { *(float4*)(A_shared_ptr + (ax0_ax1_fused_0 * 256)) = // ax0_ax1_fused_0 * elements loaded in each loop @@ -788,27 +788,27 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32 } #pragma unroll - for (int ax0_ax1_fused_0_1 = 0; ax0_ax1_fused_0_1 < 2; ++ax0_ax1_fused_0_1) + for (int ax0_ax1_fused_0_1 = 0; ax0_ax1_fused_0_1 < 2; ++ax0_ax1_fused_0_1) { *(float4*)(B_shared_ptr + (ax0_ax1_fused_0_1 * 256)) = // ax0_ax1_fused_0_1 * elements loaded in each loop - *(float4*)(B_ptr + ((k_0 * 32) + (ax0_ax1_fused_0_1 * 16)) * N); + *(float4*)(B_ptr + ((k_0 * 32) + (ax0_ax1_fused_0_1 * 16)) * N); } __syncthreads(); #pragma unroll - for (int k_1 = 0; k_1 < ( 32 / 4); ++k_1) + for (int k_1 = 0; k_1 < ( 32 / 4); ++k_1) { #pragma unroll - for (int k_2 = 0; k_2 < 4; ++k_2) + for (int k_2 = 0; k_2 < 4; ++k_2) { int vk_in_block = (k_1 << 2) + k_2; #pragma unroll - for (int i = 0; i < 32; ++i) + for (int i = 0; i < 32; ++i) { - C_local[i] = C_local[i] + - A_shared_reduce_ptr[((i / 4) * 16) * 32 + vk_in_block] + C_local[i] = C_local[i] + + A_shared_reduce_ptr[((i / 4) * 16) * 32 + vk_in_block] * B_shared_reduce_ptr[(vk_in_block * 16) + ((i % 4) * 4)]; } @@ -818,44 +818,44 @@ __global__ void __launch_bounds__(64) conv_forward_cuda_setting2_mode0_f32f32f32 } #pragma unroll - for (int i = 0; i < 32; ++i) + for (int i = 0; i < 32; ++i) { int location_cur = location_offset + ((i / 4) * 16); - int vn = C_n_offset + ((i % 4) * 4); + int vn = C_n_offset + ((i % 4) * 4); if (location_cur < M) C[location_cur * N + vn] = C_local[i]; } } // conv_forward_cuda_m128n64k32_f32f32f32 -__global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) +__global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f32(int M, int K_original, int N, int kernel_volume, float* __restrict__ A, float* __restrict__ B, int* __restrict__ out_in_map, float* __restrict__ C) { float C_local[64]; __shared__ float A_shared[4096]; __shared__ float B_shared[2048]; #pragma unroll - for (int i = 0; i < 64; ++i) + for (int i = 0; i < 64; ++i) { C_local[i] = 0.0; } - + int K_loops = (K_original * kernel_volume - 1) / 32 + 1; - int block_num_n = (N - 1) / 64 + 1; + int block_num_n = (N - 1) / 64 + 1; int blockIdx_m = (int)blockIdx.x / block_num_n; int blockIdx_n = (int)blockIdx.x % block_num_n; int threadIdx_x = (int)threadIdx.x; // hoisting shared pointer offsets - int * out_in_map_ptr = out_in_map - + (blockIdx_m * 128 + (threadIdx_x / (32/4)))* kernel_volume; + int * out_in_map_ptr = out_in_map + + (blockIdx_m * 128 + (threadIdx_x / (32/4)))* kernel_volume; - float * B_ptr = B - + (threadIdx_x / (64/4)) * N - + (blockIdx_n * 64) + ((threadIdx_x * 4) % 64); + float * B_ptr = B + + (threadIdx_x / (64/4)) * N + + (blockIdx_n * 64) + ((threadIdx_x * 4) % 64); float * A_shared_ptr = A_shared + (threadIdx_x * 4); - float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 16) * 32); + float * A_shared_reduce_ptr = A_shared + ((threadIdx_x / 16) * 32); float * B_shared_ptr = B_shared + (threadIdx_x * 4); float * B_shared_reduce_ptr = B_shared + (threadIdx_x % 16); @@ -867,7 +867,7 @@ __global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f3 #pragma unroll for (int k_0 = 0; k_0 < K_loops; ++k_0) { - int channel_offset = k_0 % (K_original / 32) * 32 + channel_offset_A; + int channel_offset = k_0 % (K_original / 32) * 32 + channel_offset_A; int kernel_offset = k_0 / (K_original / 32); int *out_in_map_ptr_k = out_in_map_ptr + kernel_offset; @@ -877,8 +877,8 @@ __global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f3 for (int ax0_ax1_fused_0 = 0; ax0_ax1_fused_0 < 8; ++ax0_ax1_fused_0) { - int input_idx = *(out_in_map_ptr_k + (ax0_ax1_fused_0 *16) * kernel_volume); - if (input_idx != -1) + int input_idx = *(out_in_map_ptr_k + (ax0_ax1_fused_0 *16) * kernel_volume); + if (input_idx != -1) { *(float4*)(A_shared_ptr + (ax0_ax1_fused_0 * 512)) = // ax0_ax1_fused_0 * elements loaded in each loop @@ -893,27 +893,27 @@ __global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f3 } #pragma unroll - for (int ax0_ax1_fused_0_1 = 0; ax0_ax1_fused_0_1 < 4; ++ax0_ax1_fused_0_1) + for (int ax0_ax1_fused_0_1 = 0; ax0_ax1_fused_0_1 < 4; ++ax0_ax1_fused_0_1) { *(float4*)(B_shared_ptr + (ax0_ax1_fused_0_1 * 512)) = // ax0_ax1_fused_0_1 * elements loaded in each loop - *(float4*)(B_ptr + ((k_0 * 32) + (ax0_ax1_fused_0_1 * 8)) * N); + *(float4*)(B_ptr + ((k_0 * 32) + (ax0_ax1_fused_0_1 * 8)) * N); } __syncthreads(); #pragma unroll - for (int k_1 = 0; k_1 < ( 32 / 4); ++k_1) + for (int k_1 = 0; k_1 < ( 32 / 4); ++k_1) { #pragma unroll - for (int k_2 = 0; k_2 < 4; ++k_2) + for (int k_2 = 0; k_2 < 4; ++k_2) { int vk_in_block = (k_1 << 2) + k_2; #pragma unroll - for (int i = 0; i < 64; ++i) + for (int i = 0; i < 64; ++i) { - C_local[i] = C_local[i] + - A_shared_reduce_ptr[((i / 4) * 8) * 32 + vk_in_block] + C_local[i] = C_local[i] + + A_shared_reduce_ptr[((i / 4) * 8) * 32 + vk_in_block] * B_shared_reduce_ptr[(vk_in_block * 64) + ((i % 4) * 16)]; } @@ -923,10 +923,10 @@ __global__ void __launch_bounds__(128) conv_forward_cuda_setting3_mode0_f32f32f3 } #pragma unroll - for (int i = 0; i < 64; ++i) + for (int i = 0; i < 64; ++i) { int location_cur = location_offset + ((i / 4) * 8); - int vn = C_n_offset + ((i % 4) * 16); + int vn = C_n_offset + ((i % 4) * 16); if (location_cur < M) C[location_cur * N + vn] = C_local[i]; } @@ -944,10 +944,10 @@ void conv_forward_implicit_gemm_cuda( auto compute_capability = dev_ctx.GetComputeCapability(); bool allow_fp16 = compute_capability >= 75; bool is_half = _in_feats.dtype() == phi::DataType::FLOAT16; - + int num_in_feats = _in_feats.dims()[0]; int num_in_channels = _in_feats.dims()[1]; - + int kernel_volume = _out_in_map.dims()[1]; auto out_in_map = const_cast(_out_in_map.data()); @@ -1141,7 +1141,7 @@ void conv_forward_implicit_gemm_cuda( { int block_num_M = (num_out_feats + 127) / 128; int block_num_N = num_out_channels / 64; //j_factors1 - dim3 num_blocks(block_num_M * block_num_N); + dim3 num_blocks(block_num_M * block_num_N); dim3 threads_per_block(128); conv_forward_cuda_setting3_mode0_f32f32f32<<>>( _out_feats.dims()[0], num_in_channels, num_out_channels, kernel_volume, in_feats, kernel, out_in_map, out_feats); @@ -1150,7 +1150,7 @@ void conv_forward_implicit_gemm_cuda( { int block_num_M = (num_out_feats + 127) / 128; int block_num_N = num_out_channels / 16; //j_factors1 - dim3 num_blocks(block_num_M * block_num_N); + dim3 num_blocks(block_num_M * block_num_N); dim3 threads_per_block(64); conv_forward_cuda_setting2_mode0_f32f32f32<<>>( _out_feats.dims()[0], num_in_channels, num_out_channels, kernel_volume, in_feats, kernel, out_in_map, out_feats); @@ -1159,7 +1159,7 @@ void conv_forward_implicit_gemm_cuda( { int block_num_M = (num_out_feats + 127) / 128; int block_num_N = (num_out_channels + 15) / 16; //j_factors1 - dim3 num_blocks(block_num_M * block_num_N); + dim3 num_blocks(block_num_M * block_num_N); dim3 threads_per_block(64); if (num_in_channels % 16 == 0) diff --git a/paddle/phi/kernels/sparse/gpu/mask_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/mask_grad_kernel.cu new file mode 100644 index 0000000000000..1e4e3276d82e1 --- /dev/null +++ b/paddle/phi/kernels/sparse/gpu/mask_grad_kernel.cu @@ -0,0 +1,56 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/mask_grad_kernel.h" +#include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +PD_REGISTER_KERNEL(mask_as_coo_grad, + GPU, + ALL_LAYOUT, + phi::sparse::MaskAsCooGradKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); +} + +PD_REGISTER_KERNEL(mask_as_csr_grad, + GPU, + ALL_LAYOUT, + phi::sparse::MaskAsCsrGradKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int8_t, + int16_t, + int, + int64_t, + bool, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu index 0941ad69b0dd2..3459f6802b881 100644 --- a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu @@ -12,7 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" #include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_info.h" @@ -106,22 +109,256 @@ void MaskCooGPUKernel(const GPUContext& dev_ctx, out->SetMember(out_indices, out_values, dims, true); } +template +__global__ void ConvertCsrCrowsToCooRows(const IntT* crows_ptr, + const IntT* crows_offsets, + IntT* rows_ptr, + IntT* batch_ptr, + const int rows) { + const int b = blockIdx.y; + const int64_t offset = crows_offsets ? crows_offsets[b] : 0; + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < rows; i += gridDim.x * blockDim.x) { + for (int j = crows_ptr[b * (rows + 1) + i]; + j < crows_ptr[b * (rows + 1) + i + 1]; + j++) { + rows_ptr[offset + j] = i; + if (batch_ptr) { + batch_ptr[offset + j] = b; + } + } + } +} + +template +__global__ void GetBatchSizes(const IntT* crows, + const int rows, + const int batches, + IntT* batch_sizes) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < batches) { + batch_sizes[tid] = crows[tid * (rows + 1) + rows]; + } +} + +template +void MaskCsr2DGPUKernel(const GPUContext& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const DenseTensor& mask_cols = mask.cols(); + const DenseTensor& mask_crows = mask.crows(); + int64_t num_non_zeros = mask.nnz(); + + DenseTensor out_cols = phi::EmptyLike(dev_ctx, mask_cols); + DenseTensor out_crows = phi::EmptyLike(dev_ctx, mask_crows); + DenseTensor out_values = phi::Empty(dev_ctx, {num_non_zeros}); + + phi::Copy(dev_ctx, mask_cols, dev_ctx.GetPlace(), false, &out_cols); + phi::Copy(dev_ctx, mask_crows, dev_ctx.GetPlace(), false, &out_crows); + + const DDim& dims = x.dims(); + const int64_t non_zero_num = mask.nnz(); + int64_t sparse_dim = 2; + DenseTensor sparse_offsets = phi::Empty(dev_ctx, {sparse_dim}); + std::vector h_sparse_offsets(sparse_dim); + phi::funcs::sparse::CalcOffsetsPerDim( + dims, sparse_dim, h_sparse_offsets.data()); + + phi::backends::gpu::GpuMemcpyAsync(sparse_offsets.data(), + &h_sparse_offsets[0], + sizeof(int64_t) * sparse_dim, + gpuMemcpyHostToDevice, + dev_ctx.stream()); + + const auto& csr_crows = mask.crows(); + const auto& csr_cols = mask.cols(); + const IntT* csr_crows_data = csr_crows.data(); + const IntT* csr_cols_data = csr_cols.data(); + + const int batches = 1; + const int rows = dims[0]; + auto dims_2d = flatten_to_2d(dims, sparse_dim); + const int cols = dims_2d[1]; + + DenseTensor indices = phi::Empty(dev_ctx, {sparse_dim, non_zero_num}); + IntT* coo_indices = indices.data(); + IntT* batch_ptr = nullptr; + IntT* coo_rows_data = coo_indices; + IntT* coo_cols_data = coo_rows_data + non_zero_num; + IntT* offsets_ptr = nullptr; + + auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rows, 1); + config.block_per_grid.y = batches; + ConvertCsrCrowsToCooRows + <<>>( + csr_crows_data, offsets_ptr, coo_rows_data, batch_ptr, rows); + phi::backends::gpu::GpuMemcpyAsync(coo_cols_data, + csr_cols_data, + sizeof(IntT) * non_zero_num, + gpuMemcpyDeviceToDevice, + dev_ctx.stream()); + + const T* x_ptr = x.data(); + const IntT* indices_ptr = coo_indices; + T* out_values_ptr = out_values.data(); + + auto config_mask = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num * cols, 1); + MaskKernel<<>>(x_ptr, + indices_ptr, + sparse_offsets.data(), + non_zero_num, + cols, + sparse_dim, + out_values_ptr); + + out->SetMember(out_crows, out_cols, out_values, x.dims()); +} + +template +void MaskCsr3DGPUKernel(const GPUContext& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const DenseTensor& mask_cols = mask.cols(); + const DenseTensor& mask_crows = mask.crows(); + int64_t num_non_zeros = mask.nnz(); + + DenseTensor out_cols = phi::EmptyLike(dev_ctx, mask_cols); + DenseTensor out_crows = phi::EmptyLike(dev_ctx, mask_crows); + DenseTensor out_values = phi::Empty(dev_ctx, {num_non_zeros}); + + phi::Copy(dev_ctx, mask_cols, dev_ctx.GetPlace(), false, &out_cols); + phi::Copy(dev_ctx, mask_crows, dev_ctx.GetPlace(), false, &out_crows); + + const DDim& dims = x.dims(); + const int64_t non_zero_num = mask.nnz(); + int64_t sparse_dim = 3; + DenseTensor sparse_offsets = phi::Empty(dev_ctx, {sparse_dim}); + std::vector h_sparse_offsets(sparse_dim); + phi::funcs::sparse::CalcOffsetsPerDim( + dims, sparse_dim, h_sparse_offsets.data()); + + phi::backends::gpu::GpuMemcpyAsync(sparse_offsets.data(), + &h_sparse_offsets[0], + sizeof(int64_t) * sparse_dim, + gpuMemcpyHostToDevice, + dev_ctx.stream()); + + const auto& csr_crows = mask.crows(); + const auto& csr_cols = mask.cols(); + const IntT* csr_crows_data = csr_crows.data(); + const IntT* csr_cols_data = csr_cols.data(); + + const int batches = dims[0]; + const int rows = dims[1]; + auto dims_2d = flatten_to_2d(dims, sparse_dim); + const int cols = dims_2d[1]; + + DenseTensor indices = phi::Empty(dev_ctx, {sparse_dim, non_zero_num}); + DenseTensor offsets = phi::Empty(dev_ctx, {batches}); + IntT* coo_indices = indices.data(); + IntT* batch_ptr = coo_indices; + IntT* coo_rows_data = batch_ptr + non_zero_num; + IntT* coo_cols_data = coo_rows_data + non_zero_num; + IntT* offsets_ptr = offsets.data(); + + auto config_batch = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, batches, 1); + GetBatchSizes + <<>>( + csr_crows_data, rows, batches, offsets_ptr); + +#ifdef PADDLE_WITH_HIP + thrust::exclusive_scan(thrust::hip::par.on(dev_ctx.stream()), +#else + thrust::exclusive_scan(thrust::cuda::par.on(dev_ctx.stream()), +#endif + offsets_ptr, + offsets_ptr + batches, + offsets_ptr); + + auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rows, 1); + config.block_per_grid.y = batches; + ConvertCsrCrowsToCooRows + <<>>( + csr_crows_data, offsets_ptr, coo_rows_data, batch_ptr, rows); + phi::backends::gpu::GpuMemcpyAsync(coo_cols_data, + csr_cols_data, + sizeof(IntT) * non_zero_num, + gpuMemcpyDeviceToDevice, + dev_ctx.stream()); + + const T* x_ptr = x.data(); + const IntT* indices_ptr = coo_indices; + T* out_values_ptr = out_values.data(); + + auto config_mask = + phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num * cols, 1); + MaskKernel<<>>(x_ptr, + indices_ptr, + sparse_offsets.data(), + non_zero_num, + cols, + sparse_dim, + out_values_ptr); + + out->SetMember(out_crows, out_cols, out_values, x.dims()); +} + /** * @brief Filter the DenseTensor x by the * mask.indices() and output a SparseCooTensor * x and mask must have the same shape. **/ template -void MaskCooKernel(const Context& dev_ctx, - const DenseTensor& x, - const SparseCooTensor& mask, - SparseCooTensor* out) { +void MaskAsCooKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCooTensor& mask, + SparseCooTensor* out) { PD_VISIT_BASE_INTEGRAL_TYPES( mask.indices().dtype(), "MaskCooGPUKernel", ([&] { MaskCooGPUKernel(dev_ctx, x, mask, out); })); } +/** + * @brief Filter the DenseTensor x by the + * mask.crows(), mask.cols() and output a SparseCsrTensor + * x and mask must have the same shape. + **/ +template +void MaskAsCsrKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out) { + const phi::DDim& x_dims = x.dims(); + if (x_dims.size() == 2) { + PD_VISIT_BASE_INTEGRAL_TYPES( + mask.crows().dtype(), "MaskCsr2DGPUKernel", ([&] { + MaskCsr2DGPUKernel(dev_ctx, x, mask, out); + })); + } else if (x_dims.size() == 3) { + PD_VISIT_BASE_INTEGRAL_TYPES( + mask.crows().dtype(), "MaskCsr3DGPUKernel", ([&] { + MaskCsr3DGPUKernel(dev_ctx, x, mask, out); + })); + } else { + // throw exception + phi::errors::InvalidArgument( + "mask_as for Sparse CSR Tensor only support 2-D or 3-D, but got " + "%d-D.", + x_dims.size()); + } +} + template __global__ void MaskTable(const IntT* x_indexs, const int n, @@ -296,10 +533,26 @@ void MaskHelperCooKernel(const Context& dev_ctx, } // namespace sparse } // namespace phi -PD_REGISTER_KERNEL(mask_coo, +PD_REGISTER_KERNEL(mask_helper_coo, GPU, ALL_LAYOUT, - phi::sparse::MaskCooKernel, + phi::sparse::MaskHelperCooKernel, + float, + double, + phi::dtype::float16, + uint8_t, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); +} + +PD_REGISTER_KERNEL(mask_as_coo, + GPU, + ALL_LAYOUT, + phi::sparse::MaskAsCooKernel, float, double, phi::dtype::float16, @@ -314,18 +567,20 @@ PD_REGISTER_KERNEL(mask_coo, kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO); } -PD_REGISTER_KERNEL(mask_helper_coo, +PD_REGISTER_KERNEL(mask_as_csr, GPU, ALL_LAYOUT, - phi::sparse::MaskHelperCooKernel, + phi::sparse::MaskAsCsrKernel, float, double, phi::dtype::float16, uint8_t, + int8_t, int16_t, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) { - kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO); + kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_CSR); } diff --git a/paddle/phi/kernels/sparse/gpu/sparse_conv_hashmap.cuh b/paddle/phi/kernels/sparse/gpu/sparse_conv_hashmap.cuh index 73ad53de502da..380abb419b40a 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_conv_hashmap.cuh +++ b/paddle/phi/kernels/sparse/gpu/sparse_conv_hashmap.cuh @@ -65,7 +65,7 @@ class GPUHashTable { key_type* table_keys; val_type* table_vals; void insert_many_coords(const phi::GPUContext& dev_ctx, const int *coords, const int n); - void lookup_many_coords(const phi::GPUContext& dev_ctx, const int *coords, val_type *results, + void lookup_many_coords(const phi::GPUContext& dev_ctx, const int *coords, val_type *results, const int* kernel_sizes, const int* tensor_strides, const int n, const int kernel_volume); public: @@ -112,8 +112,8 @@ __global__ void insert_coords_kernel(key_type* table_keys, val_type* table_vals, template __global__ void lookup_coords_kernel( - key_type* table_keys, val_type* table_vals, const int* coords, val_type* vals, - const int* kernel_sizes, const int* strides, + key_type* table_keys, val_type* table_vals, const int* coords, val_type* vals, + const int* kernel_sizes, const int* strides, int n, int _capacity, int kernel_volume, int _width) { int tidx = blockIdx.x * blockDim.x + threadIdx.x; @@ -125,8 +125,8 @@ __global__ void lookup_coords_kernel( //coords_out[2] = in_coords[2]; //coords_out[3] = in_coords[3]; coords_out[0] = in_coords[0]; - - if constexpr (odd) + + if constexpr (odd) { #pragma unroll for(int i = 0; i <= _width-2; i++){ @@ -146,7 +146,7 @@ __global__ void lookup_coords_kernel( _kernel_idx /= kernel_sizes[i]; } } - + if (idx < n) { key_type key = (key_type)(hash_func_64b(coords_out, _width)); @@ -156,7 +156,7 @@ __global__ void lookup_coords_kernel( { key_type cur_key = table_keys[slot]; if (key == cur_key) - { + { vals[idx * kernel_volume + kernel_idx] = table_vals[slot] - 1; // need to subtract 1 to avoid extra operations in python } if (table_keys[slot] == EMPTY_CELL) @@ -181,7 +181,7 @@ void GPUHashTable::insert_coords(const phi::GPUContext& dev_ template void GPUHashTable::lookup_many_coords( const phi::GPUContext& dev_ctx, - const int* coords, val_type* results, + const int* coords, val_type* results, const int* kernel_sizes, const int* strides, const int n, const int kernel_volume){ if (kernel_volume % 2) diff --git a/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu index b0da1e7ab42f0..dc82d427c53c8 100644 --- a/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu @@ -54,7 +54,7 @@ __global__ void SumCsr3DGradCudaKernel(const int64_t* x_crows_data, const int64_t x_dim1, T* dx_values_data) { // dout_crows_data[index] should be equal to number; - CUDA_KERNEL_LOOP_TYPE(index, x_dim0 * (x_dim1 + 1), int64_t) { + CUDA_KERNEL_LOOP_TYPE(index, x_dim0 * (x_dim1 + 1) - 1, int64_t) { int64_t batch = index / (x_dim1 + 1); int64_t number = index % (x_dim1 + 1); diff --git a/paddle/phi/kernels/sparse/gpu/sum_kernel.cu b/paddle/phi/kernels/sparse/gpu/sum_kernel.cu index c9efc79e29b6c..29fc3a1d9b327 100644 --- a/paddle/phi/kernels/sparse/gpu/sum_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sum_kernel.cu @@ -137,11 +137,16 @@ __global__ void SumCsr3DCudaKernel(const int64_t* x_crows_data, int64_t* out_crows_data, int64_t* out_cols_data, T* out_values_data) { + { + CUDA_KERNEL_LOOP_TYPE(index, x_dim0 * x_dim1, int64_t) { + out_cols_data[index] = 0; + } + } + CUDA_KERNEL_LOOP_TYPE(index, x_dim0 * (x_dim1 + 1), int64_t) { int64_t batch = index / (x_dim1 + 1); int64_t number = index % (x_dim1 + 1); out_crows_data[index] = number; - out_cols_data[index] = 0; if (number != x_dim1) { T sum_value = 0; @@ -154,6 +159,8 @@ __global__ void SumCsr3DCudaKernel(const int64_t* x_crows_data, for (int64_t j = x_crows_data[index]; j < x_crows_data[index + 1]; ++j) { sum_value += x_values_data[j + x_values_data_offset]; } + + // `index - batch` would never exceed x_dim0 * x_dim1. out_values_data[index - batch] = sum_value; } } diff --git a/paddle/phi/kernels/sparse/mask_grad_kernel.h b/paddle/phi/kernels/sparse/mask_grad_kernel.h new file mode 100644 index 0000000000000..687562aa300d1 --- /dev/null +++ b/paddle/phi/kernels/sparse/mask_grad_kernel.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/sparse_coo_tensor.h" +#include "paddle/phi/core/sparse_csr_tensor.h" +#include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" + +namespace phi { +namespace sparse { + +template +void MaskAsCooGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCooTensor& mask, + const SparseCooTensor& out_grad, + DenseTensor* x_grad) { + CooToDenseKernel(dev_ctx, out_grad, x_grad); +} + +template +void MaskAsCsrGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + const SparseCsrTensor& out_grad, + DenseTensor* x_grad) { + CsrToDenseKernel(dev_ctx, out_grad, x_grad); +} + +} // namespace sparse +} // namespace phi diff --git a/paddle/phi/kernels/sparse/mask_kernel.h b/paddle/phi/kernels/sparse/mask_kernel.h index 5ffc7fb4aa44d..5be993e243b19 100644 --- a/paddle/phi/kernels/sparse/mask_kernel.h +++ b/paddle/phi/kernels/sparse/mask_kernel.h @@ -16,21 +16,28 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" +#include "paddle/phi/core/sparse_csr_tensor.h" namespace phi { namespace sparse { -template -void MaskCooKernel(const Context& dev_ctx, - const DenseTensor& x, - const SparseCooTensor& mask, - SparseCooTensor* out); - template void MaskHelperCooKernel(const Context& dev_ctx, const SparseCooTensor& x, const DenseTensor& mask_indices, DenseTensor* out); +template +void MaskAsCooKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCooTensor& mask, + SparseCooTensor* out); + +template +void MaskAsCsrKernel(const Context& dev_ctx, + const DenseTensor& x, + const SparseCsrTensor& mask, + SparseCsrTensor* out); + } // namespace sparse } // namespace phi diff --git a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc index f5915c7acb84c..4933aac3c23ec 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc @@ -16,8 +16,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace sparse { +namespace phi::sparse { template void ValuesCooGradKernel(const Context& dev_ctx UNUSED, @@ -32,11 +31,10 @@ void CooToDenseGradKernel(const Context& dev_ctx, const SparseCooTensor& x, const DenseTensor& out_grad, SparseCooTensor* x_grad) { - MaskCooKernel(dev_ctx, out_grad, x, x_grad); + MaskAsCooKernel(dev_ctx, out_grad, x, x_grad); } -} // namespace sparse -} // namespace phi +} // namespace phi::sparse PD_REGISTER_KERNEL(values_coo_grad, CPU, diff --git a/paddle/phi/kernels/stride/slice_kernel.cc b/paddle/phi/kernels/stride/slice_kernel.cc index b5efcd49166fd..f4ff64b5cd2af 100644 --- a/paddle/phi/kernels/stride/slice_kernel.cc +++ b/paddle/phi/kernels/stride/slice_kernel.cc @@ -33,7 +33,7 @@ void SliceStridedKernel(const Context& ctx, DenseTensor* out) { std::vector starts = starts_arr.GetData(); std::vector ends = ends_arr.GetData(); - auto in_dims = input.dims(); + const auto& in_dims = input.dims(); auto new_axes = axes; for (auto& item : new_axes) { diff --git a/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc b/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc index da8fba85accf9..06bbe8c15903a 100644 --- a/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc +++ b/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/phi/common/pstring.h" #include "paddle/phi/core/kernel_registry.h" -namespace phi { -namespace strings { +namespace phi::strings { template void Copy(const Context& dev_ctx, @@ -50,8 +49,7 @@ void Copy(const Context& dev_ctx, } } -} // namespace strings -} // namespace phi +} // namespace phi::strings PD_REGISTER_KERNEL_FOR_ALL_DTYPE(strings_copy, CPU, diff --git a/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc b/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc index b470f3b211f6a..ec3b2b731d7e6 100644 --- a/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc +++ b/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc @@ -17,8 +17,7 @@ limitations under the License. */ using pstring = ::phi::dtype::pstring; -namespace phi { -namespace strings { +namespace phi::strings { template void StringLowerKernel(const ContextT& dev_ctx, @@ -40,8 +39,7 @@ void StringUpperKernel(const ContextT& dev_ctx, ContextT>()(dev_ctx, x, use_utf8_encoding, out); } -} // namespace strings -} // namespace phi +} // namespace phi::strings PD_REGISTER_KERNEL_FOR_ALL_DTYPE( strings_lower, diff --git a/paddle/phi/kernels/xpu/plugin/build.sh b/paddle/phi/kernels/xpu/plugin/build.sh index 65228c101d354..3b57efba50f38 100755 --- a/paddle/phi/kernels/xpu/plugin/build.sh +++ b/paddle/phi/kernels/xpu/plugin/build.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/phi/kernels/xpu/plugin/example/build.sh b/paddle/phi/kernels/xpu/plugin/example/build.sh index d96636707d15a..a54277c769540 100755 --- a/paddle/phi/kernels/xpu/plugin/example/build.sh +++ b/paddle/phi/kernels/xpu/plugin/example/build.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/phi/kernels/xpu/plugin/example/run.sh b/paddle/phi/kernels/xpu/plugin/example/run.sh index 25b4a9dbd244e..ae41223f79bcb 100755 --- a/paddle/phi/kernels/xpu/plugin/example/run.sh +++ b/paddle/phi/kernels/xpu/plugin/example/run.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/phi/kernels/xpu/rms_norm_kernel.cc b/paddle/phi/kernels/xpu/rms_norm_kernel.cc index 698b2b195da82..85a4ea7291a14 100644 --- a/paddle/phi/kernels/xpu/rms_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/rms_norm_kernel.cc @@ -63,10 +63,10 @@ void RmsNormKernel(const Context& dev_ctx, const T* norm_weight_data = norm_weight.data(); const T* norm_bias_data = norm_bias ? norm_bias.get().data() : nullptr; // float* inv_var_data = nullptr; - if (inv_var != nullptr) { - // inv_var_data = dev_ctx.template Alloc(inv_var); - PD_THROW("rms_norm in XPU kernel does not support inv_var output"); - } + // if (inv_var != nullptr) { + // inv_var_data = dev_ctx.template Alloc(inv_var); + // PD_THROW("rms_norm in XPU kernel does not support inv_var output"); + // } int32_t rows = 1; int32_t cols = 1; diff --git a/paddle/phi/kernels/xpu/swiglu_kernel.cc b/paddle/phi/kernels/xpu/swiglu_kernel.cc index a7815931fa6a8..9ba9c10ea1a43 100644 --- a/paddle/phi/kernels/xpu/swiglu_kernel.cc +++ b/paddle/phi/kernels/xpu/swiglu_kernel.cc @@ -50,7 +50,7 @@ void SwiGluKernel(const Context& ctx, reinterpret_cast(z_data), dims_vec, axis, - false, + true, const_nullptr, nullptr, y_ptr); diff --git a/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc b/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc index 994699a9fa63a..290081a48f36d 100644 --- a/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc +++ b/paddle/phi/kernels/xpu/swiglu_kernel_grad.cc @@ -64,7 +64,7 @@ void SwiGluGradKernel(const Context& ctx, reinterpret_cast(dx_data), dims_vec, axis, - false, + true, y_ptr, dy_ptr); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "swiglu_grad"); diff --git a/paddle/phi/kernels/xpu/tile_kernel.cc b/paddle/phi/kernels/xpu/tile_kernel.cc index 5e665711efc8d..6b8dbf641f803 100644 --- a/paddle/phi/kernels/xpu/tile_kernel.cc +++ b/paddle/phi/kernels/xpu/tile_kernel.cc @@ -143,4 +143,5 @@ PD_REGISTER_KERNEL(tile, double, int, int64_t, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + phi::dtype::float16) {} diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml index 934e55ad90a92..69a737eebaaa8 100644 --- a/paddle/phi/ops/yaml/backward.yaml +++ b/paddle/phi/ops/yaml/backward.yaml @@ -1067,6 +1067,7 @@ infer_meta : func : KernelWithXShapeInferMeta param : [xshape, out_grad] + spmd_rule : FlattenGradInferSpmd kernel : func : flatten_grad data_type : out_grad @@ -1825,6 +1826,33 @@ kernel : func : logsumexp_grad +- backward_op : lp_pool2d_grad + forward : lp_pool2d(Tensor x, IntArray kernel_size, int[] strides = {1,1}, int[] paddings = {0,0}, bool ceil_mode = false, bool exclusive = true, str data_format = "NCHW", str pooling_type = "", bool global_pooling = false, bool adaptive = false, str padding_algorithm = "EXPLICIT", float norm_type = 0.0f) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, float norm_type) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : lp_pool2d_grad + param : [x, out, out_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm, norm_type] + +- backward_op : lstm_grad + forward: lstm (Tensor input, Tensor h0, Tensor c0, Tensor weight, Tensor bias, bool use_peepholes + = true, bool is_reverse = false, bool is_test = false, str gate_activation = "sigmoid", + str cell_activation = "tanh", str candidate_activation = "tanh") -> Tensor (hidden), Tensor (cell), Tensor (batch_gate), Tensor (batch_cell_pre_act) + args: (Tensor input, Tensor h0, Tensor c0, Tensor weight, Tensor bias, Tensor hidden, Tensor cell, + Tensor batch_gate, Tensor batch_cell_pre_act, Tensor hidden_grad, bool use_peepholes, bool is_reverse, bool is_test, str gate_activation, + str cell_activation, str candidate_activation) + output: Tensor(input_grad), Tensor(h0_grad), Tensor(c0_grad), Tensor(weight_grad), Tensor(bias_grad) + infer_meta: + func: LSTMGradInferMeta + param: [input, h0, c0, weight, bias] + kernel: + func: lstm_grad + data_type: input + optional: h0, c0 + - backward_op : lu_grad forward : lu (Tensor x, bool pivot = true) -> Tensor(out), Tensor(pivots), Tensor(infos) args : (Tensor x, Tensor out, Tensor pivots, Tensor out_grad, bool pivot) @@ -3193,8 +3221,8 @@ func : tensor_unfold_grad - backward_op : thresholded_relu_grad - forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out) - args : (Tensor x, Tensor out_grad, float threshold) + forward : thresholded_relu (Tensor x, float threshold, float value) -> Tensor(out) + args : (Tensor x, Tensor out_grad, float threshold, float value) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta @@ -3490,6 +3518,16 @@ func: pyramid_hash_grad data_type: w +- backward_op: shuffle_batch_grad + forward: shuffle_batch (Tensor x, Tensor seed, int startup_seed=0) -> Tensor(out), Tensor(shuffle_idx), Tensor(seed_out) + args: (Tensor shuffle_idx, Tensor out_grad,int startup_seed=0) + output : Tensor(x_grad) + infer_meta: + func: ShuffleBatchGradInferMeta + kernel: + func: shuffle_batch_grad + data_type : out_grad + - backward_op: silu_double_grad forward: silu_grad (Tensor x, Tensor out, Tensor grad_out) -> Tensor(grad_x) args: (Tensor x, Tensor out, Tensor grad_out, Tensor grad_x_grad) diff --git a/paddle/phi/ops/yaml/fused_backward.yaml b/paddle/phi/ops/yaml/fused_backward.yaml index 235864c4c9d8b..3bd2673fab016 100644 --- a/paddle/phi/ops/yaml/fused_backward.yaml +++ b/paddle/phi/ops/yaml/fused_backward.yaml @@ -40,6 +40,29 @@ data_type : out_grad support_dygraph_mode : true +- backward_op : fused_elemwise_activation_grad + forward: fused_elemwise_activation (Tensor x, Tensor y, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out + = false) -> Tensor (out), Tensor (intermediate_out) + args: (Tensor x, Tensor y, Tensor out, Tensor intermediate_out, Tensor out_grad, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out = false) + output: Tensor (x_grad), Tensor (y_grad) + infer_meta: + func: FusedElemwiseActivationGradInferMeta + kernel: + func: fused_elemwise_activation_grad + data_type: out_grad + +- backward_op : fused_elemwise_add_activation_grad + forward: fused_elemwise_add_activation (Tensor x, Tensor y, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out = false) -> Tensor (out), Tensor (intermediate_out) + args: (Tensor x, Tensor y, Tensor out, Tensor intermediate_out, Tensor out_grad, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out = false) + output: Tensor (x_grad), Tensor (y_grad) + infer_meta: + func: FusedElemwiseActivationGradInferMeta + kernel: + func: fused_elemwise_add_activation_grad + data_type: out_grad + optional: x, intermediate_out + no_need_buffer: x, y + - backward_op : fused_rotary_position_embedding_grad forward: fused_rotary_position_embedding (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos, Tensor position_ids, bool use_neox_rotary_style, bool time_major, float rotary_emb_base) -> Tensor(out_q), Tensor(out_k), Tensor(out_v) args : (Tensor sin, Tensor cos, Tensor position_ids, Tensor out_q_grad, Tensor out_k_grad,Tensor out_v_grad, bool use_neox_rotary_style, bool time_major, float rotary_emb_base) diff --git a/paddle/phi/ops/yaml/fused_ops.yaml b/paddle/phi/ops/yaml/fused_ops.yaml index 5db39e9d207d7..3c244b6f4625d 100644 --- a/paddle/phi/ops/yaml/fused_ops.yaml +++ b/paddle/phi/ops/yaml/fused_ops.yaml @@ -56,6 +56,20 @@ data_transform : skip_transform : max_enc_len_this_time, max_dec_len_this_time +- op : block_multihead_attention_xpu + args : (Tensor qkv, Tensor key_cache, Tensor value_cache, Tensor seq_lens_encoder, Tensor seq_lens_decoder, Tensor seq_lens_this_time, Tensor padding_offsets, Tensor cum_offsets, Tensor cu_seqlens_q, Tensor cu_seqlens_k, Tensor block_tables, Tensor cache_k_per_batch_maxs, Tensor cache_v_per_batch_maxs, Tensor pre_key_cache, Tensor pre_value_cache, Tensor rope_emb, Tensor mask, Tensor tgt_mask, Tensor cache_k_quant_scales, Tensor cache_v_quant_scales, Tensor cache_k_dequant_scales, Tensor cache_v_dequant_scales, Tensor qkv_out_scale, Tensor qkv_bias, Tensor out_shift, Tensor out_smooth, Tensor max_enc_len_this_time, Tensor max_dec_len_this_time, int max_seq_len, int block_size, bool use_neox_style, bool dynamic_cachekv_quant=false, int quant_round_type=1, float quant_max_bound=127.0, float quant_min_bound=-127.0, float out_scale=-1, str compute_dtype = "default") + output : Tensor(fmha_out), Tensor(qkv_out), Tensor(key_cache_out), Tensor(value_cache_out) + infer_meta : + func : BlockMultiheadAttentionInferXPUMeta + kernel : + func : block_multihead_attention_xpu + data_type : qkv + optional : pre_key_cache, pre_value_cache, rope_emb, mask, tgt_mask, cache_k_quant_scales, cache_v_quant_scales, cache_k_dequant_scales, cache_v_dequant_scales, qkv_out_scale, qkv_bias, out_shift, out_smooth, max_enc_len_this_time, max_dec_len_this_time + inplace : (qkv -> qkv_out), (key_cache -> key_cache_out), (value_cache -> value_cache_out) + support_dygraph_mode : true + data_transform : + skip_transform : max_enc_len_this_time, max_dec_len_this_time + - op : bn_act_xpu args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, float momentum, float epsilon, str data_format, int act_type) output : Tensor(out) @@ -307,6 +321,28 @@ data_type : x support_dygraph_mode : true +- op : fused_elemwise_activation + args: (Tensor x, Tensor y, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out + = false) + output: Tensor (out), Tensor (intermediate_out) + infer_meta: + func: FusedElemwiseActivationInferMeta + kernel: + func: fused_elemwise_activation + data_type: x + intermediate: intermediate_out + backward: fused_elemwise_activation_grad + +- op : fused_elemwise_add_activation + args: (Tensor x, Tensor y, str[] functor_list, int axis = -1, float scale = 0.0, bool save_intermediate_out = false) + output: Tensor(out), Tensor(intermediate_out) + kernel: + func: fused_elemwise_add_activation + infer_meta: + func : FusedElemwiseActivationInferMeta + backward: fused_elemwise_add_activation_grad + intermediate: intermediate_out + - op : fused_embedding_eltwise_layernorm args : (Tensor[] ids, Tensor[] embs, Tensor bias, Tensor scale, float epsilon = 0.00001f) output : Tensor(out) @@ -400,6 +436,16 @@ func: fused_token_prune support_dygraph_mode : true +- op : fusion_group + args: (Tensor[] inputs, int[] outs_dtype = {}, int[] inputs_dtype = {}, str func_name = "", int type + = 0) + output: Tensor[] (outs){inputs.size()} + infer_meta: + func: FusionGroupInferMeta + kernel: + func: fusion_group + data_type : DataType::FLOAT32 + - op : fusion_gru args : (Tensor x, Tensor h0, Tensor weight_x, Tensor weight_h, Tensor bias, str activation = "tanh", str gate_activation = "sigmoid", bool is_reverse = false, bool use_seq = true, bool origin_mode = false, bool force_fp32_output = false) output : Tensor(reordered_h0), Tensor(xx), Tensor(batched_input), Tensor(batched_out), Tensor(hidden) @@ -411,6 +457,17 @@ optional : h0, bias intermediate : reordered_h0, xx, batched_input, batched_out +- op : fusion_lstm + args : (Tensor x, Tensor weight_x, Tensor weight_h, Tensor bias, Tensor h0, Tensor c0, bool use_peepholes=true, bool is_reverse=false, bool use_seq=true, str gate_activation="sigmoid", str cell_activation="tanh", str candidate_activation="tanh", float scale_data=1.0, float shift_data=0.0, float[] scale_weights={1.0}, bool force_fp32_output=false) + output : Tensor(hidden), Tensor(cell), Tensor(xx), Tensor(batched_input), Tensor(batched_hidden), Tensor(batched_cell), Tensor(reordered_h0), Tensor(reordered_c0), Tensor(checked_cell) + infer_meta : + func : FusionLstmInferMeta + kernel : + func : fusion_lstm + data_type : x + optional : h0, c0 + intermediate : xx, batched_input, batched_hidden, batched_cell, reordered_h0, reordered_c0, checked_cell + - op : fusion_repeated_fc_relu args : (Tensor x, Tensor[] w, Tensor[] bias) output : Tensor[](relu_out){w.size()-1}, Tensor(out) @@ -685,3 +742,15 @@ func : yolo_box_xpu data_type : x optional : x_max + +- op: add_group_norm_silu + args : (Tensor x,Tensor residual, Tensor scale, Tensor bias, float epsilon = 1e-5, int groups = -1, str data_format = "NCHW", str activation = "") + output : Tensor(y), Tensor(residual_out), Tensor(mean), Tensor(variance) + infer_meta : + func : AddGroupNormSiluInferMeta + kernel : + func : add_group_norm_silu + data_type : x + optional : scale, bias, residual, residual_out + support_dygraph_mode : true + interfaces : paddle::dialect::LayoutTransformationInterface diff --git a/paddle/phi/ops/yaml/inconsistent/onednn_static.yaml b/paddle/phi/ops/yaml/inconsistent/onednn_static.yaml index 282dd35cb3453..386eadf0c1dc6 100644 --- a/paddle/phi/ops/yaml/inconsistent/onednn_static.yaml +++ b/paddle/phi/ops/yaml/inconsistent/onednn_static.yaml @@ -91,17 +91,6 @@ kernel : func : fused_transpose -- op : fusion_lstm - args : (Tensor x, Tensor weight_x, Tensor weight_h, Tensor bias, Tensor h0, Tensor c0, bool use_peepholes=true, bool is_reverse=false, bool use_seq=true, str gate_activation="sigmoid", str cell_activation="tanh", str candidate_activation="tanh", float scale_data=1.0, float shift_data=0.0, float[] scale_weights={1.0}, bool force_fp32_output=false) - output : Tensor(hidden), Tensor(cell), Tensor(xx), Tensor(batched_input), Tensor(batched_hidden), Tensor(batched_cell), Tensor(reordered_h0), Tensor(reordered_c0), Tensor(checked_cell) - infer_meta : - func : FusionLstmInferMeta - kernel : - func : fusion_lstm - data_type : x - optional : h0, c0 - intermediate : xx, batched_input, batched_hidden, batched_cell, reordered_h0, reordered_c0, checked_cell - - op: multi_gru args: (Tensor x, Tensor[] weight_x, Tensor[] weight_h, Tensor[] bias, Tensor[] scale_weights, str activation="tanh", str gate_activation="sigmoid", int layers=1, bool origin_mode=false, str mkldnn_data_type="float32", float scale_data=1.0, float shift_data=1.0, bool force_fp32_output=false) output: Tensor(hidden) diff --git a/paddle/phi/ops/yaml/inconsistent/static_backward.yaml b/paddle/phi/ops/yaml/inconsistent/static_backward.yaml index f408cece8e006..5a9c9a66a2e75 100644 --- a/paddle/phi/ops/yaml/inconsistent/static_backward.yaml +++ b/paddle/phi/ops/yaml/inconsistent/static_backward.yaml @@ -485,16 +485,6 @@ composite : tile_grad(x, out_grad, repeat_times, x_grad) backward : tile_double_grad -- backward_op: fused_elemwise_add_activation_grad - forward: fused_elemwise_add_activation(Tensor x, Tensor y, str[] functor_list, float scale=0.0, int axis=-1, bool save_intermediate_out=false) -> Tensor(out), Tensor(intermediate_out) - args: (Tensor x, Tensor y, Tensor out, Tensor intermediate_out, Tensor out_grad, str[] functor_list, float scale=0.0, int axis=-1, bool save_intermediate_out=false) - output: Tensor(x_grad), Tensor(y_grad) - infer_meta: - func: FusedElemwiseAddActivationGradInferMeta - kernel: - func: fused_elemwise_add_activation_grad - optional : x, intermediate_out - - backward_op: match_matrix_tensor_grad forward: match_matrix_tensor (Tensor x, Tensor y, Tensor w, int dim_t=1) -> Tensor(out), Tensor(tmp) args: (Tensor x, Tensor y, Tensor w, Tensor tmp, Tensor out_grad, int dim_t=1) @@ -503,13 +493,3 @@ func: MatchMatrixTensorGradInferMeta kernel: func: match_matrix_tensor_grad - -- backward_op: shuffle_batch_grad - forward: shuffle_batch (Tensor x, Tensor seed, int startup_seed=0) -> Tensor(out), Tensor(shuffle_idx), Tensor(seed_out) - args: (Tensor shuffle_idx, Tensor out_grad,int startup_seed=0) - output : Tensor(x_grad) - infer_meta: - func: ShuffleBatchGradInferMeta - kernel: - func: shuffle_batch_grad - data_type : out_grad diff --git a/paddle/phi/ops/yaml/inconsistent/static_ops.yaml b/paddle/phi/ops/yaml/inconsistent/static_ops.yaml index ddfe98cefcc80..a5921bb3a039a 100644 --- a/paddle/phi/ops/yaml/inconsistent/static_ops.yaml +++ b/paddle/phi/ops/yaml/inconsistent/static_ops.yaml @@ -49,14 +49,6 @@ inplace : (x -> out) interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface -- op : assign_pos - args : (Tensor x, Tensor cum_count, Tensor eff_num_len) - output : Tensor(out) - infer_meta : - func : AssignPosInferMeta - kernel : - func : assign_pos - - op : assign_value args : (int[] shape, DataType dtype, Scalar[] values, Place place = {}) output : Tensor(out) @@ -196,15 +188,6 @@ data_type : dtype inplace: (input -> output) -- op : decayed_adagrad - args : (Tensor param, Tensor grad, Tensor moment, Tensor learning_rate, float decay = 0.95f, float epsilon = 1.0e-6f) - output : Tensor(param_out), Tensor(moment_out) - infer_meta : - func : DecayedAdagradInferMeta - kernel : - func : decayed_adagrad - data_type : param - - op : dequantize_linear args : (Tensor x, Tensor scale, Tensor zero_point, Tensor in_accum, Tensor in_state, int quant_axis = 0, int bit_length = 8, int round_type = 0, bool is_test = true, bool only_observer = false) output : Tensor(y), Tensor(out_state), Tensor(out_accum), Tensor(out_scale) @@ -608,15 +591,6 @@ interfaces : paddle::dialect::InferSymbolicShapeInterface traits : pir::SideEffectTrait -- op : prune_gate_by_capacity - args : (Tensor gate_idx, Tensor expert_count, int64_t n_expert, int64_t n_worker) - output : Tensor(new_gate_idx) - infer_meta : - func : PruneGateByCapacityInferMeta - kernel : - func : prune_gate_by_capacity - data_type : gate_idx - - op : pull_box_sparse args : (Tensor w, Tensor[] ids, bool is_sparse = false, bool is_distributed = false, int size = 1) output : Tensor[](out){ids.size()} @@ -812,17 +786,6 @@ param: [x] inplace : (x -> out) -- op : shuffle_batch - args : (Tensor x, Tensor seed, int startup_seed=0) - output : Tensor(out), Tensor(shuffle_idx), Tensor(seed_out) - infer_meta: - func: ShuffleBatchInferMeta - kernel: - func: shuffle_batch - data_type: x - backward : shuffle_batch_grad - traits : pir::SideEffectTrait - - op : soft_relu args : (Tensor x, float threshold = 20.0f) output : Tensor(out) @@ -859,16 +822,6 @@ backward : subtract_grad interfaces : paddle::dialect::InferSymbolicShapeInterface -- op : tdm_sampler - args: (Tensor x, Tensor travel, Tensor layer, bool output_positive=true, int[] neg_samples_num_list={}, int[] layer_offset_lod={}, int seed = 0, int dtype=2) - output: Tensor(out), Tensor(labels), Tensor(mask) - infer_meta: - func : TdmSamplerInferMeta - kernel: - func : tdm_sampler - data_type : x - optional : labels - - op : tile args : (Tensor x, IntArray repeat_times = {}) output : Tensor(out) @@ -935,16 +888,6 @@ optional: cache_kv, ln_scale, ln_bias, qkv_bias, src_mask, out_linear_bias, ln_scale_2, ln_bias_2, ln_mean_2, ln_var_2, bias_dropout_residual_out, cache_kv_out backward: fused_attention_grad -- op: fused_elemwise_add_activation - args: (Tensor x, Tensor y, str[] functor_list, float scale=0.0, int axis=-1, bool save_intermediate_out=false) - output: Tensor(out), Tensor(intermediate_out) - kernel: - func: fused_elemwise_add_activation - infer_meta: - func : FusedElemwiseAddActivationInferMeta - backward: fused_elemwise_add_activation_grad - intermediate: intermediate_out - - op: fused_feedforward args: (Tensor x, Tensor dropout1_seed, Tensor dropout2_seed, Tensor linear1_weight, Tensor linear1_bias, Tensor linear2_weight, Tensor linear2_bias, Tensor ln1_scale, Tensor ln1_bias, Tensor ln2_scale, Tensor ln2_bias, bool pre_layer_norm, float ln1_epsilon, float ln2_epsilon, str act_method, float dropout1_prob, float dropout2_prob, str dropout1_implementation, str dropout2_implementation, bool is_test, bool dropout1_fix_seed, bool dropout2_fix_seed, int dropout1_seed_val, int dropout2_seed_val, bool add_residual, int ring_id) output: Tensor(out), Tensor(dropout1_mask), Tensor(dropout2_mask), Tensor(ln1_mean), Tensor(ln1_variance), Tensor(ln2_mean), Tensor(ln2_variance), Tensor(linear1_out), Tensor(ln1_out), Tensor(dropout1_out), Tensor(dropout2_out) @@ -1001,15 +944,6 @@ optional: bias, sample_weight, custom_dist_probs, custom_dist_alias, custom_dist_alias_probs backward: nce_grad -- op: number_count - args: (Tensor numbers, int upper_range) - output: Tensor(out) - infer_meta: - func: NumberCountInferMeta - kernel: - func: number_count - data_type: numbers - - op: onednn_to_paddle_layout args: (Tensor x, int dst_layout) output: Tensor(out) diff --git a/paddle/phi/ops/yaml/legacy/backward_exclude.yaml b/paddle/phi/ops/yaml/legacy/backward_exclude.yaml index 335952bc3475c..9a327ef5dd4b3 100644 --- a/paddle/phi/ops/yaml/legacy/backward_exclude.yaml +++ b/paddle/phi/ops/yaml/legacy/backward_exclude.yaml @@ -5,7 +5,6 @@ - amax_grad - amin_grad - cast_grad -- channel_shuffle_grad - conv2d_transpose_double_grad - conv2d_transpose_grad - deformable_conv_grad @@ -34,7 +33,6 @@ - repeat_interleave_grad - repeat_interleave_with_tensor_index_grad - rnn_grad -- rrelu_grad - set_value_with_tensor_grad - slice_double_grad - slice_grad diff --git a/paddle/phi/ops/yaml/legacy/ops_exclude.yaml b/paddle/phi/ops/yaml/legacy/ops_exclude.yaml index 160e33c5b36c8..703c948240df0 100644 --- a/paddle/phi/ops/yaml/legacy/ops_exclude.yaml +++ b/paddle/phi/ops/yaml/legacy/ops_exclude.yaml @@ -22,7 +22,6 @@ - c_sync_calc_stream - c_sync_comm_stream - cast -- channel_shuffle - conv2d_transpose - conv2d_transpose_bias - copy_to @@ -75,7 +74,6 @@ - repeat_interleave - repeat_interleave_with_tensor_index - rnn -- rrelu - sequence_mask - set_value_with_tensor - slice diff --git a/paddle/phi/ops/yaml/legacy/static_ops.yaml b/paddle/phi/ops/yaml/legacy/static_ops.yaml index 1280fd3716f0a..d9d0c222b770f 100755 --- a/paddle/phi/ops/yaml/legacy/static_ops.yaml +++ b/paddle/phi/ops/yaml/legacy/static_ops.yaml @@ -699,6 +699,14 @@ func : swish backward : swish_grad +- op : transfer_layout + args: (Tensor x, int src_layout = -1, int dst_layout=-1) + output: Tensor (out) + infer_meta: + func: TransferLayoutInferMeta + kernel: + func: transfer_layout + - op : tril_indices args : (int rows = 0, int cols = 0, int offset = 0, DataType dtype = DataType::INT64) output : Tensor(out) diff --git a/paddle/phi/ops/yaml/op_compat.yaml b/paddle/phi/ops/yaml/op_compat.yaml index 2f7af0b64c802..4c6d111f0f4a6 100755 --- a/paddle/phi/ops/yaml/op_compat.yaml +++ b/paddle/phi/ops/yaml/op_compat.yaml @@ -4028,6 +4028,13 @@ outputs: {out: Out} +- op: fused_elemwise_activation + backward: fused_elemwise_activation_grad + inputs: + {x : X, y : Y} + outputs: + {out : Out, intermediate_out : IntermediateOut} + - op: fused_elemwise_add_activation backward: fused_elemwise_add_activation_grad inputs : @@ -4035,6 +4042,13 @@ outputs : {out : Out, intermediate_out : IntermediateOut} +- op: fused_elemwise_add_activation + backward: fused_elemwise_add_activation_grad + inputs: + {x : X, y : Y} + outputs: + {out : Out, intermediate_out : IntermediateOut} + - op: fused_matmul inputs : {x: X, y: Y, residual_data: ResidualData} @@ -4062,6 +4076,12 @@ outputs : {slimmed_x : SlimmedX, cls_inds : CLSInds} +- op: fusion_group + inputs: + inputs : Inputs + outputs: + outs : Outs + - op: fusion_seqpool_cvm_concat inputs: {x : X, cvm : CVM} @@ -4129,6 +4149,15 @@ outputs: {out: Out} +- op: lstm + backward: lstm_grad + inputs: + {input : Input, h0 : H0, c0 : C0, weight : Weight, bias : Bias} + outputs: + {hidden : Hidden, cell : Cell, batch_gate : BatchGate, batch_cell_pre_act : BatchCellPreAct} + extra: + outputs: [batch_gate, batch_cell_pre_act] + - op: lu backward: lu_grad inputs: @@ -4195,7 +4224,7 @@ inputs: {gate_idx: GateIdx, expert_count: ExpertCount} outputs: - new_gate_idx: NewGateIdx + out_gate_idx: NewGateIdx - op: pyramid_hash backward: pyramid_hash_grad @@ -4250,6 +4279,8 @@ {x: X} outputs: {out: Out, noise: Noise} + extra: + outputs: [noise] - op: send_v2 inputs : @@ -4355,6 +4386,12 @@ outputs : out : Out +- op: transfer_layout + inputs: + x : X + outputs: + out : Out + - op: uniform_random_batch_size_like inputs: input : Input diff --git a/paddle/phi/ops/yaml/op_version.yaml b/paddle/phi/ops/yaml/op_version.yaml index 7ef9a6f83e84d..a41a67e9ded17 100644 --- a/paddle/phi/ops/yaml/op_version.yaml +++ b/paddle/phi/ops/yaml/op_version.yaml @@ -486,6 +486,14 @@ comment : A flag to indicate whether to do softmax default : "true" +- op : thresholded_relu + version : + - checkpoint : Upgrade thresholded_relu, add a new attribute [value] + action : + - add_attr : value + comment : The threshold value of thresholded_relu. + default : 0.0 + - op : trace version : - checkpoint : Upgrade trace add a new attribute [axis2] diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml index e758d5e0438f0..3538cbd137762 100755 --- a/paddle/phi/ops/yaml/ops.yaml +++ b/paddle/phi/ops/yaml/ops.yaml @@ -321,6 +321,14 @@ backward : assign_out__grad traits : pir::SideEffectTrait +- op : assign_pos + args : (Tensor x, Tensor cum_count, Tensor eff_num_len) + output : Tensor(out) + infer_meta : + func : AssignPosInferMeta + kernel : + func : assign_pos + - op : assign_value_ args : (Tensor output, int[] shape, DataType dtype, Scalar[] values, Place place = {}) output : Tensor(out) @@ -760,6 +768,7 @@ kernel : func : class_center_sample data_type : label + traits : pir::SideEffectTrait - op : clip args : (Tensor x, Scalar(float) min, Scalar(float) max) @@ -1052,6 +1061,15 @@ backend : place interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : decayed_adagrad + args : (Tensor param, Tensor grad, Tensor moment, Tensor learning_rate, float decay = 0.95f, float epsilon = 1.0e-6f) + output : Tensor(param_out), Tensor(moment_out) + infer_meta : + func : DecayedAdagradInferMeta + kernel : + func : decayed_adagrad + data_type : param + - op : decode_jpeg args : (Tensor x, str mode, Place place) output : Tensor(out) @@ -1262,6 +1280,7 @@ optional : seed_tensor intermediate : mask backward : dropout_grad + traits : pir::SideEffectTrait - op : edit_distance args : (Tensor hyps, Tensor refs, Tensor hypslength, Tensor refslength, bool normalized = false) @@ -1672,6 +1691,7 @@ output : Tensor(out), Tensor(xshape) infer_meta : func : FlattenWithXShapeInferMeta + spmd_rule : FlattenInferSpmd kernel : func : flatten data_type : x @@ -2658,6 +2678,31 @@ backward : logsumexp_grad interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : lp_pool2d + args : (Tensor x, IntArray kernel_size, int[] strides = {1,1}, int[] paddings = {0,0}, bool ceil_mode = false, bool exclusive = true, str data_format = "NCHW", str pooling_type = "", bool global_pooling = false, bool adaptive = false, str padding_algorithm = "EXPLICIT", float norm_type = 0.0f) + output : Tensor(out) + infer_meta : + func : Pool2DInferMeta + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm] + kernel : + func : lp_pool2d + param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm, norm_type] + backward : lp_pool2d_grad + +- op : lstm + args: (Tensor input, Tensor h0, Tensor c0, Tensor weight, Tensor bias, bool use_peepholes + = true, bool is_reverse = false, bool is_test = false, str gate_activation = "sigmoid", + str cell_activation = "tanh", str candidate_activation = "tanh") + output: Tensor (hidden), Tensor (cell), Tensor (batch_gate), Tensor (batch_cell_pre_act) + infer_meta: + func: LSTMInferMeta + kernel: + func: lstm + data_type: input + optional: h0, c0 + intermediate: batch_gate, batch_cell_pre_act + backward: lstm_grad + - op : lstsq args : (Tensor x, Tensor y, Scalar rcond=0.0f, str driver="gels") output : Tensor(solution), Tensor(residuals), Tensor(rank), Tensor(singular_values) @@ -3255,6 +3300,15 @@ backward : prod_grad interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : prune_gate_by_capacity + args : (Tensor gate_idx, Tensor expert_count, int64_t n_expert=0, int64_t n_worker=0) + output : Tensor(out_gate_idx) + infer_meta : + func : PruneGateByCapacityInferMeta + kernel : + func : prune_gate_by_capacity + data_type : gate_idx + - op : psroi_pool args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height=1, int pooled_width=1, int output_channels=1, float spatial_scale=1.0) output : Tensor @@ -3584,7 +3638,7 @@ traits : pir::SideEffectTrait - op : rrelu - args : (Tensor x, float lower, float upper, bool is_test) + args : (Tensor x, float lower=1.0f/8, float upper=1.0f/3, bool is_test=false) output : Tensor(out), Tensor(noise) infer_meta : func : RReluInferMeta @@ -3776,6 +3830,19 @@ kernel : func : shard_index +- op : shuffle_batch + args : (Tensor x, Tensor seed, int startup_seed=0) + output : Tensor(out), Tensor(shuffle_idx), Tensor(seed_out) + infer_meta: + func: ShuffleBatchInferMeta + kernel: + func: shuffle_batch + data_type: x + backward : shuffle_batch_grad + traits : pir::SideEffectTrait + data_transform : + skip_transform : seed + - op : shuffle_channel args : (Tensor x, int group = 1) output : Tensor(out) @@ -4148,6 +4215,25 @@ func : tanh_shrink backward : tanh_shrink_grad +- op : tdm_child + args: (Tensor x, Tensor tree_info, int child_nums, DataType dtype = DataType::INT32) + output: Tensor (child), Tensor (leaf_mask) + infer_meta: + func: TdmChildInferMeta + kernel: + func: tdm_child + data_type: x + +- op : tdm_sampler + args: (Tensor x, Tensor travel, Tensor layer, bool output_positive=true, int[] neg_samples_num_list={}, int[] layer_offset_lod={}, int seed = 0, int dtype=2) + output: Tensor(out), Tensor(labels), Tensor(mask) + infer_meta: + func : TdmSamplerInferMeta + kernel: + func : tdm_sampler + data_type : x + optional : labels + - op : temporal_shift args : (Tensor x, int seg_num, float shift_ratio = 0.25f, str data_format = "NCHW") output : Tensor(out) @@ -4170,7 +4256,7 @@ no_need_buffer : input - op : thresholded_relu - args : (Tensor x, float threshold = 1.0) + args : (Tensor x, float threshold = 1.0, float value = 0.0) output : Tensor(out) infer_meta : func : UnchangedInferMeta @@ -4374,6 +4460,7 @@ data_type: x inplace: (x -> out) backward: uniform_inplace_grad + traits : pir::SideEffectTrait - op : uniform_random_batch_size_like args: (Tensor input, int[] shape, int input_dim_idx = 0, int output_dim_idx = 0, @@ -4386,6 +4473,7 @@ uniform_random_batch_size_like_sr {selected_rows -> selected_rows} data_type: dtype no_need_buffer: input + traits : pir::SideEffectTrait - op : unique_consecutive args : (Tensor x, bool return_inverse = false, bool return_counts = false, int[] axis = {}, DataType dtype = DataType::FLOAT32) @@ -4631,3 +4719,12 @@ func: MoeInferMeta kernel: func: moe + +- op: number_count + args: (Tensor numbers, int upper_range) + output: Tensor(out) + infer_meta: + func: NumberCountInferMeta + kernel: + func: number_count + data_type: numbers diff --git a/paddle/phi/ops/yaml/sparse_backward.yaml b/paddle/phi/ops/yaml/sparse_backward.yaml index 3e614b942d301..f7734af1bf6ec 100644 --- a/paddle/phi/ops/yaml/sparse_backward.yaml +++ b/paddle/phi/ops/yaml/sparse_backward.yaml @@ -184,6 +184,17 @@ func : log1p_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, log1p_csr_grad {sparse_csr, sparse_csr -> sparse_csr} +- backward_op : mask_as_grad + forward : mask_as(Tensor x, Tensor mask) -> Tensor(out) + args : (Tensor x, Tensor mask, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : mask_as_coo_grad {dense, sparse_coo, sparse_coo -> dense}, + mask_as_csr_grad {dense, sparse_csr, sparse_csr -> dense} + - backward_op : masked_matmul_grad forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) diff --git a/paddle/phi/ops/yaml/sparse_ops.yaml b/paddle/phi/ops/yaml/sparse_ops.yaml index ac230be485c09..80cef73a6c1f5 100644 --- a/paddle/phi/ops/yaml/sparse_ops.yaml +++ b/paddle/phi/ops/yaml/sparse_ops.yaml @@ -497,6 +497,18 @@ func : indices_coo{sparse_coo -> dense} layout : x +- op: mask_as + args : (Tensor x, Tensor mask) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : mask_as_coo{dense, sparse_coo -> sparse_coo}, + mask_as_csr{dense, sparse_csr -> sparse_csr} + layout : x + backward: mask_as_grad + - op: masked_matmul args : (Tensor x, Tensor y, Tensor mask) output : Tensor(out) diff --git a/paddle/pir/include/core/block.h b/paddle/pir/include/core/block.h index 25b4afe9bfc47..3756e738b22bb 100644 --- a/paddle/pir/include/core/block.h +++ b/paddle/pir/include/core/block.h @@ -91,7 +91,7 @@ class IR_API Block { bool HasOneUse() const; BlockOperand *first_use_addr() { return &first_use_; } - // This is a unsafe funcion, please use it carefully. + // This is a unsafe function, please use it carefully. void ResetOpListOrder(const OpListType &new_op_list); /// diff --git a/paddle/pir/include/core/block_argument.h b/paddle/pir/include/core/block_argument.h index b3b8c78660c34..c11fd88c9c11f 100644 --- a/paddle/pir/include/core/block_argument.h +++ b/paddle/pir/include/core/block_argument.h @@ -54,10 +54,10 @@ class IR_API BlockArgument : public Value { void Destroy(); /// set the position in the block argument list. void set_index(uint32_t index); - // Access create annd destroy. + // Access create and destroy. friend Block; - // Access classof annd dyn_cast_from. + // Access classof and dyn_cast_from. friend Value; static bool classof(Value value); static BlockArgument dyn_cast_from(Value value); diff --git a/paddle/pir/include/core/builtin_op.h b/paddle/pir/include/core/builtin_op.h index e12db2e3be124..875f1c73b7565 100644 --- a/paddle/pir/include/core/builtin_op.h +++ b/paddle/pir/include/core/builtin_op.h @@ -39,7 +39,7 @@ class IR_API ModuleOp : public pir::Op { Block &block(); // - // As the top operation, ModuleOp only support create&destroye through + // As the top operation, ModuleOp only support create&destroy through // below interface: "create"&"destroy". static ModuleOp Create(IrContext *context, Program *pointer); void Destroy(); @@ -84,7 +84,7 @@ class IR_API SetParameterOp : public pir::Op { }; /// -/// \brief ShdowOutputOp: ShdowOutputOp(OpOperand, {StrAttribute, +/// \brief ShadowOutputOp: ShadowOutputOp(OpOperand, {StrAttribute, /// StrAttribute}) /// class IR_API ShadowOutputOp diff --git a/paddle/pir/include/core/builtin_type_storage.h b/paddle/pir/include/core/builtin_type_storage.h index f706e0c66277e..0b74d8e127bf8 100644 --- a/paddle/pir/include/core/builtin_type_storage.h +++ b/paddle/pir/include/core/builtin_type_storage.h @@ -127,7 +127,7 @@ struct VectorTypeStorage : public TypeStorage { ~VectorTypeStorage() { free(data_); } /// - /// \brief Each derived TypeStorage must define a Construc method, which + /// \brief Each derived TypeStorage must define a Construct method, which /// StorageManager uses to construct a derived TypeStorage. /// static VectorTypeStorage* Construct(const ParamKey& key) { diff --git a/paddle/pir/include/core/program.h b/paddle/pir/include/core/program.h index d838916eefea5..4d0da62a98c84 100644 --- a/paddle/pir/include/core/program.h +++ b/paddle/pir/include/core/program.h @@ -57,6 +57,7 @@ class IR_API Program { std::shared_ptr Clone(IrMapping& ir_mapping) const; // NOLINT + void CopyToBlock(IrMapping& ir_mapping, Block* insert_block) const; // NOLINT Block* block() { return &module_.block(); } const Block* block() const { return &module_op().block(); } @@ -70,9 +71,13 @@ class IR_API Program { parameters_ = parameters; } + uint64_t id() const { return id_; } + private: // computation graph ModuleOp module_; + // unique in current process, "almost" unique between processes. + uint64_t id_; // weight ParameterMap parameters_; }; diff --git a/paddle/pir/include/dialect/shape/utils/shape_analysis.h b/paddle/pir/include/dialect/shape/utils/shape_analysis.h index bbdda621511eb..0256d97dbc2b1 100644 --- a/paddle/pir/include/dialect/shape/utils/shape_analysis.h +++ b/paddle/pir/include/dialect/shape/utils/shape_analysis.h @@ -42,7 +42,7 @@ class IR_API InferSymbolicShapeContext { const symbol::ShapeOrDataDimExprs& GetShapeOrDataForValue(Value val) const; - void SetStaticShapeForValue(Value val); + void SetSymbolForValueByStaticShape(Value val); void SetShapeOrDataForValue(Value val, const symbol::ShapeOrDataDimExprs& shape_or_data); @@ -150,7 +150,7 @@ class IR_API ShapeConstraintIRAnalysis final friend void InferSymExprForAllValues(ModuleOp module_op); - void SetStaticShapeForValue(Value val); + void SetSymbolForValueByStaticShape(Value val); void InferShapeOrDataForValue(Value val); diff --git a/paddle/pir/src/core/op_result_impl.cc b/paddle/pir/src/core/op_result_impl.cc index 29d411c1a6c88..75261f77cf0e7 100644 --- a/paddle/pir/src/core/op_result_impl.cc +++ b/paddle/pir/src/core/op_result_impl.cc @@ -19,8 +19,7 @@ #include "paddle/pir/include/core/operation.h" #include "paddle/pir/src/core/op_result_impl.h" -namespace pir { -namespace detail { +namespace pir::detail { uint32_t OpResultImpl::index() const { if (const auto *outline_result = dyn_cast(this)) { @@ -111,5 +110,4 @@ OpInlineResultImpl::OpInlineResultImpl(Type type, uint32_t result_index) result_index)); } -} // namespace detail -} // namespace pir +} // namespace pir::detail diff --git a/paddle/pir/src/core/program.cc b/paddle/pir/src/core/program.cc index 19d08f094fd4c..453cf3eb170df 100644 --- a/paddle/pir/src/core/program.cc +++ b/paddle/pir/src/core/program.cc @@ -13,13 +13,48 @@ // limitations under the License. #include "paddle/pir/include/core/program.h" +#include +#include +#include +#include #include "glog/logging.h" #include "paddle/pir/include/core/ir_context.h" namespace pir { +namespace { + +int64_t GetRandomId() { + std::random_device rd{}; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dis( + 0, std::numeric_limits::max()); + return dis(gen); +} + +bool InsertGlobalStorageSuccess(int64_t random_id) { + static std::unordered_set storage; + static std::mutex mutex; + std::unique_lock lock(mutex); + return storage.emplace(random_id).second; +} + +int64_t GetUniqueRandomId() { + int kLimit = 100; + for (int i = 0; i < kLimit; ++i) { + int64_t random_id = GetRandomId(); + if (InsertGlobalStorageSuccess(random_id)) { + return random_id; + } + } + LOG(FATAL) << "Fatal bug occured in GetUniqueRandomId()."; +} + +} // namespace + Program::Program(IrContext* context) { module_ = ModuleOp::Create(context, this); + id_ = GetUniqueRandomId(); } Program::~Program() { @@ -39,6 +74,26 @@ std::shared_ptr Program::Clone(IrMapping& ir_mapping) const { return new_program; } +void Program::CopyToBlock(IrMapping& ir_mapping, Block* insert_block) const { + auto clone_options = CloneOptions::All(); + for (const auto& op : *block()) { + bool skip_op = false; + for (uint32_t i = 0; i < op.num_results(); i++) { + if (ir_mapping.GetMutableMap().count(op.result(i))) { + skip_op = true; + break; + } + } + if (skip_op) { + continue; + } + + auto* new_op = op.Clone(ir_mapping, clone_options); + insert_block->push_back(new_op); + } + return; +} + Parameter* Program::GetParameter(const std::string& name) const { if (parameters_.count(name) != 0) { return parameters_.at(name).get(); diff --git a/paddle/pir/src/dialect/shape/interface/infer_symbolic_shape/infer_symbolic_shape.cc b/paddle/pir/src/dialect/shape/interface/infer_symbolic_shape/infer_symbolic_shape.cc index f1c44e945f60c..d635a0ac5cc52 100644 --- a/paddle/pir/src/dialect/shape/interface/infer_symbolic_shape/infer_symbolic_shape.cc +++ b/paddle/pir/src/dialect/shape/interface/infer_symbolic_shape/infer_symbolic_shape.cc @@ -18,7 +18,7 @@ // cinn operators. // Add `interfaces : pir::InferSymbolicShapeInterface` in relative -// yaml file to conresponding op. +// yaml file to corresponding op. // Since necessary checks have been done in the Op's `InferMeta` and `VeriySig`, // no more repetitive work here. diff --git a/paddle/pir/src/dialect/shape/transforms/shape_optimization_pass.cc b/paddle/pir/src/dialect/shape/transforms/shape_optimization_pass.cc index 343b1bf329c2c..e51cf34aa4bc9 100644 --- a/paddle/pir/src/dialect/shape/transforms/shape_optimization_pass.cc +++ b/paddle/pir/src/dialect/shape/transforms/shape_optimization_pass.cc @@ -126,6 +126,10 @@ void DebugPrintOpInfo(pir::Operation* op, std::ostringstream print_stream; for (uint32_t i = 0; i < op->num_results(); ++i) { const auto& res = op->result(i); + if (!res || !res.type()) { + continue; + } + print_stream << "\tresult(" << res.dyn_cast().index() << ") " << "ShapeOrData: {"; @@ -170,6 +174,10 @@ void CheckInferSymWithInferMeta( pir::InferSymbolicShapeContext* infer_context = nullptr) { for (uint32_t i = 0; i < op->num_results(); ++i) { const auto& res = op->result(i); + if (!res || !res.type()) { + continue; + } + std::ostringstream print_stream; // InferMeta funcs of some Ops are not corrrect now, we don't check them. @@ -299,7 +307,7 @@ void InferSymExprForBlock(const Block& block, << " DOES NOT have InferSymbolicShapeInterface!"; } for (uint32_t i = 0; i < op.num_results(); ++i) { - infer_context->SetStaticShapeForValue(op.result(i)); + infer_context->SetSymbolForValueByStaticShape(op.result(i)); } } DebugPrintOpInfo(&op, infer_context); @@ -314,6 +322,9 @@ void InferSymExprForAllValues(ModuleOp module_op) { auto infer_context = shape_analysis.MutInferSymbolicShapeContext(); for (uint32_t i = 0; i < module_op->num_regions(); i++) { for (auto& block : module_op->region(i)) { + for (auto& [_, value] : block.kwargs()) { + infer_context->SetSymbolForValueByStaticShape(value); + } InferSymExprForBlock(block, infer_context); } } diff --git a/paddle/pir/src/dialect/shape/utils/constraints_manager.cc b/paddle/pir/src/dialect/shape/utils/constraints_manager.cc index bdb9e52a49507..7b2a887cfaa8c 100644 --- a/paddle/pir/src/dialect/shape/utils/constraints_manager.cc +++ b/paddle/pir/src/dialect/shape/utils/constraints_manager.cc @@ -100,22 +100,22 @@ void ConstraintsManager::AddEqCstr(const DimExpr& lhs, const DimExpr& rhs) { equals_.Union(lhs, rhs); VLOG(4) << "add equal constraint: " << lhs << " == " << rhs; } - DimExpr origin, subsutituted; + DimExpr origin, substituted; auto comp_result = CompareDimExprPriority(lhs, rhs); if (comp_result == PriorityComparisonStatus::LOWER) { origin = lhs; - subsutituted = rhs; + substituted = rhs; } else if (comp_result == PriorityComparisonStatus::HIGHER) { origin = rhs; - subsutituted = lhs; + substituted = lhs; } else { return; } - if (CanSubstituteInConstraint(origin, subsutituted)) { - SubstituteInConstraint(origin, subsutituted); + if (CanSubstituteInConstraint(origin, substituted)) { + SubstituteInConstraint(origin, substituted); } if (equal_callback_func_) { - equal_callback_func_(origin, subsutituted); + equal_callback_func_(origin, substituted); } } diff --git a/paddle/pir/src/dialect/shape/utils/dim_expr_util.cc b/paddle/pir/src/dialect/shape/utils/dim_expr_util.cc index c7b5e21a2e01b..c622194e602eb 100644 --- a/paddle/pir/src/dialect/shape/utils/dim_expr_util.cc +++ b/paddle/pir/src/dialect/shape/utils/dim_expr_util.cc @@ -751,7 +751,7 @@ struct FoldOperandTrait { PADDLE_ENFORCE_EQ( *value, expr_value, - phi::errors::InvalidArgument("The value (%d) should be equel to expr " + phi::errors::InvalidArgument("The value (%d) should be equal to expr " "(%d) when they are both not 1.", *value, expr_value)); @@ -887,7 +887,7 @@ struct FoldRedundantSymbolicBroadcast { ret.value().value, int64_value, phi::errors::InvalidArgument( - "The value of return (%d) should be equel to expr (%d) of " + "The value of return (%d) should be equal to expr (%d) of " "operands at index (%d) when they are both > 1.", ret.value().value, int64_value, diff --git a/paddle/pir/src/dialect/shape/utils/shape_analysis.cc b/paddle/pir/src/dialect/shape/utils/shape_analysis.cc index b62ad0f2a3d95..3c51cf57226c4 100644 --- a/paddle/pir/src/dialect/shape/utils/shape_analysis.cc +++ b/paddle/pir/src/dialect/shape/utils/shape_analysis.cc @@ -67,14 +67,15 @@ InferSymbolicShapeContext::GetShapeOrDataForValue(Value val) const { return value_id_to_shape_or_data_.at(val.impl()->id()); } -void InferSymbolicShapeContext::SetStaticShapeForValue(Value val) { +void InferSymbolicShapeContext::SetSymbolForValueByStaticShape(Value val) { const auto& value_type = val.type(); if (!val || !value_type) { - PADDLE_THROW( - phi::errors::Fatal("Set static shape for null value is FOBBIDEN!")); + LOG(WARNING) << "Risk on SetSymbolForValueByStaticShape for null value"; + return; } if (!IsStaticShape(val)) { - LOG(WARNING) << "Risk on SetStaticShapeForValue for contain_unknown_dim"; + LOG(WARNING) + << "Risk on SetSymbolForValueByStaticShape for contain_unknown_dim"; } const auto& GetStaticShapeForDenseTensorType = [&](DenseTensorType type_info) -> symbol::TensorShapeOrDataDimExprs { @@ -289,8 +290,8 @@ const std::string ShapeConstraintIRAnalysis::GetNextSymName() { return context_.GetNextSymName(); } -void ShapeConstraintIRAnalysis::SetStaticShapeForValue(Value val) { - context_.SetStaticShapeForValue(val); +void ShapeConstraintIRAnalysis::SetSymbolForValueByStaticShape(Value val) { + context_.SetSymbolForValueByStaticShape(val); } void ShapeConstraintIRAnalysis::InferShapeOrDataForValue(Value val) { @@ -319,7 +320,7 @@ void ShapeConstraintIRAnalysis::InferShapeOrDataForValue(Value val) { for (auto& operand : GetRealOperandSource(op)) { if (operand.impl() && !context_.HasShapeOrDataForValue(operand)) { if (!operand.defining_op()) { - SetStaticShapeForValue(operand); + SetSymbolForValueByStaticShape(operand); } else { Visit(operand.defining_op()); } @@ -334,7 +335,7 @@ void ShapeConstraintIRAnalysis::InferShapeOrDataForValue(Value val) { for (auto& operand : GetRealOperandSource(op)) { if (operand.impl() && !context_.HasShapeOrDataForValue(operand)) { if (!operand.defining_op()) { - SetStaticShapeForValue(operand); + SetSymbolForValueByStaticShape(operand); } else { has_prev_op = true; } @@ -379,22 +380,23 @@ void ShapeConstraintIRAnalysis::InferShapeOrDataForValue(Value val) { if (infer_symbolic_shape_interface) { infer_symbolic_shape_interface.InferSymbolicShape(&context_); for (auto& result_value : op->results()) { - if (result_value && (!context_.HasShapeOrDataForValue(result_value))) { + if (!result_value || !result_value.type()) { + continue; + } + if (!context_.HasShapeOrDataForValue(result_value)) { PADDLE_THROW(phi::errors::Fatal(op->name() + " HAS ERROR on InferSymbolicShape!")); } } } else { - // TODO(Hongqing-work): throw it after the shape analysis reconstruct - // is done. - // PADDLE_THROW(phi::errors::Unimplemented( - // val.defining_op()->name() + - // " DOES NOT have InferSymbolicShapeInterface!")); LOG(WARNING) << op->name() << " DOES NOT have InferSymbolicShapeInterface!"; for (auto& result_value : op->results()) { - if (result_value && (!context_.HasShapeOrDataForValue(result_value))) { - SetStaticShapeForValue(result_value); + if (!result_value || !result_value.type()) { + continue; + } + if (!context_.HasShapeOrDataForValue(result_value)) { + SetSymbolForValueByStaticShape(result_value); } } } @@ -412,7 +414,7 @@ ShapeConstraintIRAnalysis::GetShapeOrDataForValue(Value val) { if (!context_.HasShapeOrDataForValue(val)) { // backtrack to infer shape from defining op if (!val.defining_op()) { - SetStaticShapeForValue(val); + SetSymbolForValueByStaticShape(val); } else { VLOG(3) << "InferShapeOrDataForValue, defining_op: " << val.defining_op()->name(); diff --git a/paddle/scripts/build_docker_images.sh b/paddle/scripts/build_docker_images.sh index 2b584cdca6b4c..e078e473f573f 100644 --- a/paddle/scripts/build_docker_images.sh +++ b/paddle/scripts/build_docker_images.sh @@ -1,13 +1,13 @@ #!/bin/sh # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/scripts/fast_install.sh b/paddle/scripts/fast_install.sh index 5793a38d6ef3a..90d7af6a0c0df 100644 --- a/paddle/scripts/fast_install.sh +++ b/paddle/scripts/fast_install.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -468,7 +468,7 @@ function PipLinuxInstall(){ fi else echo paddlepaddle whl包下载失败 - echo "wget err: $wheel_gpu_develop" + echo "wget err: $wheel_gpu_develop" exit 1 fi else diff --git a/paddle/scripts/musl_build/Dockerfile b/paddle/scripts/musl_build/Dockerfile index 1c53284cef6b3..babf3f6050039 100644 --- a/paddle/scripts/musl_build/Dockerfile +++ b/paddle/scripts/musl_build/Dockerfile @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/scripts/musl_build/build_docker.sh b/paddle/scripts/musl_build/build_docker.sh index 0739cbdf731c8..c822a3a225136 100755 --- a/paddle/scripts/musl_build/build_docker.sh +++ b/paddle/scripts/musl_build/build_docker.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -46,7 +46,7 @@ function build_image(){ declare -a BUILD_ARGS BUILD_ARGS+=("--build-arg" "PYTHON_VERSION=$PYTHON_VERSION") echo ">>> python version: $PYTHON_VERSION" - + if [ "$HTTP_PROXY" ]; then BUILD_ARGS+=("--build-arg" "http_proxy=$HTTP_PROXY") echo ">>> using http proxy: $HTTP_PROXY" @@ -81,7 +81,7 @@ function build_image(){ echo ">>> with pip index: $WITH_PIP_INDEX" BUILD_ARGS+=("--build-arg" pip_index="$WITH_PIP_INDEX") fi - + echo ">>> build docker image: $BUILD_IMAGE" # shellcheck disable=2086 docker build \ diff --git a/paddle/scripts/musl_build/build_inside.sh b/paddle/scripts/musl_build/build_inside.sh index 4c7fa804de578..297f1f058e0e4 100755 --- a/paddle/scripts/musl_build/build_inside.sh +++ b/paddle/scripts/musl_build/build_inside.sh @@ -1,13 +1,13 @@ #!/bin/sh # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,13 +33,13 @@ cd "$BUILD_DIR" # setup root dir chown -R root:root /root -if [ "$HTTP_PROXY" ]; then - echo ">>> http_proxy: $HTTP_PROXY" +if [ "$HTTP_PROXY" ]; then + echo ">>> http_proxy: $HTTP_PROXY" git config --global http.proxy "$HTTP_PROXY" fi -if [ "$HTTP_PROXY" ]; then - echo ">>> https_proxy: $HTTPS_PROXY" +if [ "$HTTP_PROXY" ]; then + echo ">>> https_proxy: $HTTPS_PROXY" git config --global https.proxy "$HTTPS_PROXY" fi diff --git a/paddle/scripts/musl_build/build_paddle.sh b/paddle/scripts/musl_build/build_paddle.sh index 879bb823c2714..cfeba3cf92632 100755 --- a/paddle/scripts/musl_build/build_paddle.sh +++ b/paddle/scripts/musl_build/build_paddle.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/scripts/musl_build/config.sh b/paddle/scripts/musl_build/config.sh index ded239a2a4da7..4972876c3bd03 100755 --- a/paddle/scripts/musl_build/config.sh +++ b/paddle/scripts/musl_build/config.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 45b796671852e..82f06bc1b4030 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -94,6 +94,7 @@ if not defined retry_times set retry_times=1 if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python38 if not defined BUILD_DIR set BUILD_DIR=build if not defined TEST_INFERENCE set TEST_INFERENCE=ON +if not defined WITH_PIP_CUDA_LIBRARIES set WITH_PIP_CUDA_LIBRARIES=OFF set task_name=%1 set UPLOAD_TP_FILE=OFF @@ -301,6 +302,7 @@ rem ------Build windows avx whl package------ :CASE_build_avx_whl set WITH_AVX=ON set ON_INFER=ON +set WITH_PIP_CUDA_LIBRARIES=ON if not defined CUDA_ARCH_NAME set CUDA_ARCH_NAME=All call :cmake || goto cmake_error @@ -501,12 +503,15 @@ echo %task_name%|findstr build >nul && ( ) :cmake_impl +if "%WITH_TESTING%"=="ON" ( + cd /d %work_dir%\%BUILD_DIR% + rem whether to run cpp test + python -m pip install PyGithub + python %work_dir%\tools\check_only_change_python_files.py + if exist %work_dir%\%BUILD_DIR%\only_change_python_file.txt set WITH_CPP_TEST=OFF + echo WITH_CPP_TEST: %WITH_CPP_TEST% +) cd /d %work_dir%\%BUILD_DIR% -rem whether to run cpp test -python -m pip install PyGithub -python %work_dir%\tools\check_only_change_python_files.py -if exist %work_dir%\%BUILD_DIR%\only_change_python_file.txt set WITH_CPP_TEST=OFF -echo WITH_CPP_TEST: %WITH_CPP_TEST% echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE% -DON_INFER=%ON_INFER% ^ -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ @@ -515,7 +520,7 @@ echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -D -DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% -DCUDA_ARCH_BIN=%CUDA_ARCH_BIN% -DCUB_PATH=%THIRD_PARTY_HOME%/cub ^ -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DNEW_RELEASE_ALL=%NEW_RELEASE_ALL% -DNEW_RELEASE_PYPI=%NEW_RELEASE_PYPI% ^ -DNEW_RELEASE_JIT=%NEW_RELEASE_JIT% -DWITH_ONNXRUNTIME=%WITH_ONNXRUNTIME% -DWITH_CPP_TEST=%WITH_CPP_TEST% ^ --DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% +-DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% -DWITH_PIP_CUDA_LIBRARIES=%WITH_PIP_CUDA_LIBRARIES% >> %work_dir%\win_cmake.sh echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE% -DON_INFER=%ON_INFER% ^ @@ -525,7 +530,7 @@ echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -D -DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% -DCUDA_ARCH_BIN=%CUDA_ARCH_BIN% -DCUB_PATH=%THIRD_PARTY_HOME%/cub ^ -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DNEW_RELEASE_ALL=%NEW_RELEASE_ALL% -DNEW_RELEASE_PYPI=%NEW_RELEASE_PYPI% ^ -DNEW_RELEASE_JIT=%NEW_RELEASE_JIT% -DWITH_ONNXRUNTIME=%WITH_ONNXRUNTIME% -DWITH_CPP_TEST=%WITH_CPP_TEST% ^ --DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% >> %work_dir%\win_cmake.sh +-DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% -DWITH_PIP_CUDA_LIBRARIES=%WITH_PIP_CUDA_LIBRARIES% >> %work_dir%\win_cmake.sh cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE% -DON_INFER=%ON_INFER% ^ @@ -535,7 +540,7 @@ cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_ -DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% -DCUDA_ARCH_BIN=%CUDA_ARCH_BIN% -DCUB_PATH=%THIRD_PARTY_HOME%/cub ^ -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DNEW_RELEASE_ALL=%NEW_RELEASE_ALL% -DNEW_RELEASE_PYPI=%NEW_RELEASE_PYPI% ^ -DNEW_RELEASE_JIT=%NEW_RELEASE_JIT% -DWITH_ONNXRUNTIME=%WITH_ONNXRUNTIME% -DWITH_CPP_TEST=%WITH_CPP_TEST% ^ --DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% +-DWIN_UNITTEST_LEVEL=%WIN_UNITTEST_LEVEL% -DWITH_NIGHTLY_BUILD=%WITH_NIGHTLY_BUILD% -DWITH_PIP_CUDA_LIBRARIES=%WITH_PIP_CUDA_LIBRARIES% goto:eof :cmake_error diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 8c0266c36e8c1..e793c210628be 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1130,7 +1130,10 @@ function check_whl_size() { function generate_upstream_develop_api_spec() { set -x + # Temporarily save some scripts from PR branch cp ${PADDLE_ROOT}/python/requirements.txt /tmp + cp ${PADDLE_ROOT}/tools/print_signatures.py /tmp + mkdir -p ${PADDLE_ROOT}/build/pr_whl && mv ${PADDLE_ROOT}/build/python/dist/*.whl ${PADDLE_ROOT}/build/pr_whl/ pr_whl_size=`du -m ${PADDLE_ROOT}/build/python/dist/*.whl|awk '{print $1}'` echo "pr_whl_size: ${pr_whl_size}" @@ -1178,17 +1181,20 @@ function generate_api_spec() { echo "Not supported $2" exit 1 fi + if [ "$spec_kind" == "DEV" ]; then + REQUIREMENTS_PATH=/tmp/requirements.txt + PRINT_SIGNATURES_SCRIPT_PATH=/tmp/print_signatures.py + else + REQUIREMENTS_PATH=${PADDLE_ROOT}/python/requirements.txt + PRINT_SIGNATURES_SCRIPT_PATH=${PADDLE_ROOT}/tools/print_signatures.py + fi mkdir -p ${PADDLE_ROOT}/build/.check_api_workspace cd ${PADDLE_ROOT}/build/.check_api_workspace virtualenv -p `which python` .${spec_kind}_env source .${spec_kind}_env/bin/activate + pip install -r $REQUIREMENTS_PATH - if [ "$spec_kind" == "DEV" ]; then - pip install -r /tmp/requirements.txt - else - pip install -r ${PADDLE_ROOT}/python/requirements.txt - fi if [ -d "${PADDLE_ROOT}/build/python/dist/" ]; then pip install ${PADDLE_ROOT}/build/python/dist/*whl elif [ -d "${PADDLE_ROOT}/dist/" ];then @@ -1196,7 +1202,10 @@ function generate_api_spec() { mkdir ${PADDLE_ROOT}/build/python/dist/ && mv ${PADDLE_ROOT}/dist/*whl ${PADDLE_ROOT}/build/python/dist/ fi spec_path=${PADDLE_ROOT}/paddle/fluid/API_${spec_kind}.spec - python ${PADDLE_ROOT}/tools/print_signatures.py paddle > $spec_path + python ${PRINT_SIGNATURES_SCRIPT_PATH} paddle > $spec_path + python ${PRINT_SIGNATURES_SCRIPT_PATH} --show-fields="args,varargs,varkw,defaults,kwonlyargs,kwonlydefaults" paddle > ${spec_path}.api + python ${PRINT_SIGNATURES_SCRIPT_PATH} --show-fields="annotations" paddle > ${spec_path}.annotations + python ${PRINT_SIGNATURES_SCRIPT_PATH} --show-fields="document" paddle > ${spec_path}.doc # used to log op_register data_type op_type_path=${PADDLE_ROOT}/paddle/fluid/OP_TYPE_${spec_kind}.spec @@ -1214,9 +1223,6 @@ function generate_api_spec() { api_source_md5_path=${PADDLE_ROOT}/paddle/fluid/API_${spec_kind}.source.md5 python ${PADDLE_ROOT}/tools/count_api_without_core_ops.py -p paddle > $api_source_md5_path - awk -F '(' '{print $NF}' $spec_path >${spec_path}.doc - awk -F '(' '{$NF="";print $0}' $spec_path >${spec_path}.api - python ${PADDLE_ROOT}/tools/diff_use_default_grad_op_maker.py \ ${PADDLE_ROOT}/paddle/fluid/op_use_default_grad_maker_${spec_kind}.spec @@ -1474,7 +1480,7 @@ function card_test() { if [ "${WITH_XPU}" == "ON" ];then CUDA_DEVICE_COUNT=1 elif [ "${WITH_ROCM}" == "ON" ];then - CUDA_DEVICE_COUNT=$(rocm-smi -i | grep GPU | wc -l) + CUDA_DEVICE_COUNT=$(rocm-smi -i | grep DCU | wc -l) elif [ "${WITH_IPU}" == "ON" ];then CUDA_DEVICE_COUNT=1 else @@ -1517,13 +1523,22 @@ function card_test() { if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + if [ "$WITH_ROCM" == "ON" ];then + (env HIP_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + else + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + fi fi else if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + if [ "$WITH_ROCM" == "ON" ];then + (env HIP_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + else + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + fi + fi fi done @@ -2652,7 +2667,11 @@ set -x fi if [ -a "$PADDLE_ROOT/added_ut" ];then added_uts=^$(awk BEGIN{RS=EOF}'{gsub(/\n/,"$|^");print}' $PADDLE_ROOT/added_ut)$ - env CUDA_VISIBLE_DEVICES=0 ctest -R "(${added_uts})" -LE "RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE|RUN_TYPE=HYBRID" --output-on-failure --repeat-until-fail 3 --timeout 15;added_ut_error=$? + if [ "$WITH_ROCM" == "ON" ];then + env HIP_VISIBLE_DEVICES=0 ctest -R "(${added_uts})" -LE "RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE|RUN_TYPE=HYBRID" --output-on-failure --repeat-until-fail 3 --timeout 15;added_ut_error=$? + else + env CUDA_VISIBLE_DEVICES=0 ctest -R "(${added_uts})" -LE "RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE|RUN_TYPE=HYBRID" --output-on-failure --repeat-until-fail 3 --timeout 15;added_ut_error=$? + fi ctest -R "(${added_uts})" -L "RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE" --output-on-failure --repeat-until-fail 3 --timeout 15;added_ut_error_1=$? if [ "$added_ut_error" != 0 ] && [ "$added_ut_error_1" != 0 ];then echo "========================================" @@ -2826,7 +2845,9 @@ set +x rerun_ut_endTime_s=`date +%s` echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt - cp $PADDLE_ROOT/build/Testing/Temporary/CTestCostData.txt ${cfs_dir}/coverage/${AGILE_PULL_ID}/${AGILE_REVISION}/ + if [ "$WITH_ROCM" != "ON" ];then + cp $PADDLE_ROOT/build/Testing/Temporary/CTestCostData.txt ${cfs_dir}/coverage/${AGILE_PULL_ID}/${AGILE_REVISION}/ + fi if [[ "$EXIT_CODE" != "0" ]]; then show_ut_retry_result fi @@ -3488,7 +3509,6 @@ function build_document_preview() { sh /paddle/tools/document_preview.sh ${PORT} } - # origin name: example function exec_samplecode_test() { if [ -d "${PADDLE_ROOT}/build/pr_whl" ];then @@ -3502,10 +3522,10 @@ function exec_samplecode_test() { cd ${PADDLE_ROOT}/tools if [ "$1" = "cpu" ] ; then - python sampcd_processor.py --debug --mode cpu; example_error=$? + python sampcd_processor.py --mode cpu; example_error=$? elif [ "$1" = "gpu" ] ; then SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2} - python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug --mode gpu; example_error=$? + python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --mode gpu; example_error=$? fi if [ "$example_error" != "0" ];then echo "Code instance execution failed" >&2 @@ -3513,6 +3533,75 @@ function exec_samplecode_test() { fi } +function need_type_checking() { + set +x + + # check pr title + TITLE_CHECK=`curl -s https://github.com/PaddlePaddle/Paddle/pull/${GIT_PR_ID} | grep "" | grep -i "typing" || true` + + if [[ ${TITLE_CHECK} ]]; then + set -x + return 0 + else + set -x + return 1 + fi +} + +function exec_type_checking() { + if [ -d "${PADDLE_ROOT}/build/pr_whl" ];then + pip install ${PADDLE_ROOT}/build/pr_whl/*.whl + else + echo "WARNING: PR wheel is not found. Use develop wheel !!!" + pip install ${PADDLE_ROOT}/build/python/dist/*.whl + fi + + python -c "import paddle;print(paddle.__version__);paddle.version.show()" + + cd ${PADDLE_ROOT}/tools + + # check all sample code + TITLE_CHECK_ALL=`curl -s https://github.com/PaddlePaddle/Paddle/pull/${GIT_PR_ID} | grep "<title>" | grep -i "typing all" || true` + + if [[ ${TITLE_CHECK_ALL} ]]; then + python type_checking.py --full-test; type_checking_error=$? + else + python type_checking.py; type_checking_error=$? + fi + + if [ "$type_checking_error" != "0" ];then + echo "Example code type checking failed" >&2 + exit 5 + fi +} + + +function exec_samplecode_checking() { + example_info_gpu="" + example_code_gpu=0 + if [ "${WITH_GPU}" == "ON" ] ; then + { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 + example_code_gpu=$? + fi + { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 + example_code=$? + + # TODO(megemini): type_checkding should be default after type annotation been done. + need_type_checking + type_checking_status=$? + + if [[ ${type_checking_status} -eq 0 ]]; then + { type_checking_info=$(exec_type_checking 2>&1 1>&3 3>/dev/null); } 3>&1 + type_checking_code=$? + fi + + summary_check_example_code_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + + if [[ ${type_checking_status} -eq 0 ]]; then + summary_type_checking_problems $type_checking_code "$type_checking_info" + fi +} + function collect_ccache_hits() { ccache -s @@ -3553,10 +3642,11 @@ function test_model_benchmark() { bash ${PADDLE_ROOT}/tools/test_model_benchmark.sh } -function summary_check_problems() { +function summary_check_example_code_problems() { set +x local example_code=$1 local example_info=$2 + if [ $example_code -ne 0 ];then echo "===============================================================================" echo "*****Example code error***** Please fix the error listed in the information:" @@ -3579,6 +3669,33 @@ function summary_check_problems() { } +function summary_type_checking_problems() { + set +x + local type_checking_code=$1 + local type_checking_info=$2 + + if [ $type_checking_code -ne 0 ];then + echo "===============================================================================" + echo "*****Example code type checking error***** Please fix the error listed in the information:" + echo "===============================================================================" + echo "$type_checking_info" + echo "===============================================================================" + echo "*****Example code type checking FAIL*****" + echo "===============================================================================" + exit $type_checking_code + else + echo "===============================================================================" + echo "*****Example code type checking info*****" + echo "===============================================================================" + echo "$type_checking_info" + echo "===============================================================================" + echo "*****Example code type checking PASS*****" + echo "===============================================================================" + fi + set -x +} + + function reuse_so_cache() { get_html="https://api.github.com/repos/PaddlePaddle/Paddle" curl -X GET ${get_html}/commits -H "authorization: token ${GITHUB_API_TOKEN}" >tmp.txt @@ -3631,7 +3748,10 @@ function build_pr_and_develop() { fi mv ${PADDLE_ROOT}/dist/*.whl ${PADDLE_ROOT}/build/python/dist/ cmake_change=`git diff --name-only upstream/$BRANCH | grep "cmake/external" || true` + # Temporarily save some scripts from PR branch cp ${PADDLE_ROOT}/python/requirements.txt /tmp + cp ${PADDLE_ROOT}/tools/print_signatures.py /tmp + generate_api_spec "$1" "PR" mkdir ${PADDLE_ROOT}/build/pr_whl && cp ${PADDLE_ROOT}/build/python/dist/*.whl ${PADDLE_ROOT}/build/pr_whl rm -f ${PADDLE_ROOT}/build/python/dist/*.whl && rm -f ${PADDLE_ROOT}/build/python/build/.timestamp @@ -4262,15 +4382,7 @@ function main() { check_sequence_op_unittest generate_api_spec ${PYTHON_ABI:-""} "PR" set +e - example_info_gpu="" - example_code_gpu=0 - if [ "${WITH_GPU}" == "ON" ] ; then - { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code_gpu=$? - fi - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + exec_samplecode_checking assert_api_spec_approvals ;; build_and_check_cpu) @@ -4282,15 +4394,7 @@ function main() { ;; build_and_check_gpu) set +e - example_info_gpu="" - example_code_gpu=0 - if [ "${WITH_GPU}" == "ON" ] ; then - { example_info_gpu=$(exec_samplecode_test gpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code_gpu=$? - fi - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $[${example_code_gpu} + ${example_code}] "${example_info_gpu}\n${example_info}" + exec_samplecode_checking assert_api_spec_approvals ;; check_whl_size) @@ -4395,6 +4499,9 @@ function main() { export FLAGS_PIR_OPTEST=True parallel_test true ;; + hyg_dcu_test) + parallel_test + ;; nv_cicheck_coverage) parallel_test nv_test @@ -4416,10 +4523,6 @@ function main() { build ${parallel_number} run_brpc_test ;; - assert_api) - generate_upstream_develop_api_spec ${PYTHON_ABI:-""} ${parallel_number} - assert_api_spec_approvals - ;; test_inference) PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" if [ "${WITH_PYTHON}" == "OFF" ] ; then @@ -4449,9 +4552,6 @@ function main() { gen_fluid_lib ${parallel_number} test_fluid_lib_train ;; - assert_api_approvals) - assert_api_spec_approvals - ;; assert_file_approvals) assert_file_diff_approvals ;; @@ -4533,11 +4633,6 @@ function main() { build ${parallel_number} build_document_preview ;; - api_example) - { example_info=$(exec_samplecode_test cpu 2>&1 1>&3 3>/dev/null); } 3>&1 - example_code=$? - summary_check_problems $example_code "$example_info" - ;; test_op_benchmark) test_op_benchmark ;; diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index f016890ca3269..5dc084deeae6c 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -98,7 +98,7 @@ function threads_config() { export OPENBLAS_MAIN_FREE=1 fi fi - + } PADDLE_CONF_HOME="$HOME/.config/paddle" @@ -138,7 +138,7 @@ fi if [ "@WITH_GPU@" == "ON" ]; then PADDLE_NAME="paddlepaddle-gpu" -else +else PADDLE_NAME="paddlepaddle" fi diff --git a/paddle/scripts/windows_build/build.bat b/paddle/scripts/windows_build/build.bat index 0aeacfef7f9bd..4ffec08e666e2 100644 --- a/paddle/scripts/windows_build/build.bat +++ b/paddle/scripts/windows_build/build.bat @@ -1,5 +1,5 @@ @ECHO OFF -SETLOCAL +SETLOCAL set source_path=%1 set PYTHON_DIR=%2 set WITH_GPU=%3 diff --git a/pyproject.toml b/pyproject.toml index 4a4a5a73c5fda..dc9455167005e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,9 @@ select = [ # Pyupgrade "UP", + # Flake8-pyi + "PYI", + # NumPy-specific rules "NPY001", "NPY003", @@ -111,6 +114,10 @@ ignore = [ "UP015", # It will cause the performance regression on python3.10 "UP038", + # collections.namedtuple can be quickly created a inlined class + "PYI024", + # `__all__.append` is a common pattern in Paddle + "PYI056", ] [tool.ruff.lint.isort] @@ -131,3 +138,34 @@ known-first-party = ["paddle"] "test/dygraph_to_static/test_loop.py" = ["C416", "F821"] # Ignore unnecessary lambda in dy2st unittest test_lambda "test/dygraph_to_static/test_lambda.py" = ["PLC3002"] +# Ignore docstring in tensor.pyi +"python/paddle/tensor/tensor.prototype.pyi" = ["PYI021", "PYI048"] + +[tool.mypy] +python_version = "3.8" +cache_dir = ".mypy_cache" +# Miscellaneous strictness flags +allow_redefinition = true +local_partial_types = true +strict = false +# Untyped definitions and calls +check_untyped_defs = true +# Import discovery +follow_imports = "normal" +# Miscellaneous +warn_unused_configs = true +# Configuring warnings +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +# Configuring error messages +show_column_numbers = true + +[[tool.mypy.overrides]] +module = [ + "astor", + "cv2", + "scipy", + "xlsxwriter" +] +ignore_missing_imports = true diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b3029a24309cf..16501a254f280 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -25,7 +25,7 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) if(WITH_GPU) set(PACKAGE_NAME "paddlepaddle-gpu") elseif(WITH_ROCM) - set(PACKAGE_NAME "paddlepaddle-rocm") + set(PACKAGE_NAME "paddlepaddle-dcu") elseif(WITH_XPU) set(PACKAGE_NAME "paddlepaddle-xpu") elseif(WITH_IPU) @@ -173,17 +173,10 @@ endif() add_custom_target(paddle_python ALL DEPENDS ${PADDLE_PYTHON_BUILD_DIR}/.timestamp) + if(BUILD_WHL_PACKAGE AND NOT WITH_SETUP_INSTALL) - add_custom_target( - paddle_copy ALL - # generate tensor.pyi for type hints - COMMAND - ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/gen_tensor_stub.py - --input-file - ${PADDLE_SOURCE_DIR}/python/paddle/tensor/tensor.prototype.pyi - --output-file ${PADDLE_BINARY_DIR}/python/paddle/tensor/tensor.pyi - DEPENDS ${PADDLE_PYTHON_BUILD_DIR}/.timestamp_wheel) + add_custom_target(paddle_copy ALL + DEPENDS ${PADDLE_PYTHON_BUILD_DIR}/.timestamp_wheel) add_dependencies(paddle_copy paddle_python) endif() diff --git a/python/env_dict.py.in b/python/env_dict.py.in index 46c280e823df3..62822d4e16cf2 100644 --- a/python/env_dict.py.in +++ b/python/env_dict.py.in @@ -14,6 +14,8 @@ env_dict={ 'FLUID_CORE_NAME':'@FLUID_CORE_NAME@', 'PHI_LIB':'@PHI_LIB@', 'PHI_NAME':'@PHI_NAME@', + 'PHI_KERNEL_GPU_LIB':'@PHI_KERNEL_GPU_LIB@', + 'PHI_KERNEL_GPU_NAME':'@PHI_KERNEL_GPU_NAME@', 'WITH_SHARED_PHI':'@WITH_SHARED_PHI@', 'IR_LIB':'@IR_LIB@', 'IR_NAME':'@IR_NAME@', diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 0cd36f299ecd6..37409b626009b 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -14,12 +14,16 @@ import typing +__is_metainfo_generated = False try: from paddle.cuda_env import * # noqa: F403 from paddle.version import ( # noqa: F401 commit as __git_commit__, full_version as __version__, ) + + __is_metainfo_generated = True + except ImportError: import sys @@ -272,6 +276,7 @@ atleast_1d, atleast_2d, atleast_3d, + block_diag, broadcast_tensors, broadcast_to, cast, @@ -433,6 +438,7 @@ inner, inverse, isfinite, + isin, isinf, isnan, isneginf, @@ -577,8 +583,7 @@ if os.path.exists(cuh_file): os.environ.setdefault('runtime_include_dir', runtime_include_dir) - -if is_compiled_with_cuda(): +if __is_metainfo_generated and is_compiled_with_cuda(): import os import platform @@ -679,7 +684,9 @@ ctypes.CDLL('msvcp140.dll') ctypes.CDLL('vcruntime140_1.dll') except OSError: - print( + import logging + + logging.error( '''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure. It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe''' ) @@ -699,7 +706,6 @@ path_patched = False for dll in dlls: is_loaded = False - print("dll:", dll) if with_load_library_flags: res = kernel32.LoadLibraryExW(dll, None, 0x00001100) last_error = ctypes.get_last_error() @@ -733,6 +739,7 @@ ir_guard._switch_to_pir() __all__ = [ + 'block_diag', 'iinfo', 'finfo', 'dtype', @@ -846,6 +853,7 @@ 'squeeze_', 'to_tensor', 'gather_nd', + 'isin', 'isinf', 'isneginf', 'isposinf', diff --git a/python/paddle/_typing/basic.py b/python/paddle/_typing/basic.py index 4ecd20b74ebce..f6c7d92ac15ed 100644 --- a/python/paddle/_typing/basic.py +++ b/python/paddle/_typing/basic.py @@ -25,30 +25,10 @@ Numberic: TypeAlias = Union[int, float, complex, np.number, "Tensor"] TensorLike: TypeAlias = Union[npt.NDArray[Any], "Tensor", Numberic] -_T = TypeVar("_T", bound=Numberic) -_SeqLevel1: TypeAlias = Sequence[_T] -_SeqLevel2: TypeAlias = Sequence[Sequence[_T]] -_SeqLevel3: TypeAlias = Sequence[Sequence[Sequence[_T]]] -_SeqLevel4: TypeAlias = Sequence[Sequence[Sequence[Sequence[_T]]]] -_SeqLevel5: TypeAlias = Sequence[Sequence[Sequence[Sequence[Sequence[_T]]]]] -_SeqLevel6: TypeAlias = Sequence[ - Sequence[Sequence[Sequence[Sequence[Sequence[_T]]]]] -] - -IntSequence: TypeAlias = _SeqLevel1[int] - -NumbericSequence: TypeAlias = _SeqLevel1[Numberic] - -NestedSequence: TypeAlias = Union[ - _T, - _SeqLevel1[_T], - _SeqLevel2[_T], - _SeqLevel3[_T], - _SeqLevel4[_T], - _SeqLevel5[_T], - _SeqLevel6[_T], -] +_T = TypeVar("_T") +NestedSequence = Union[_T, Sequence["NestedSequence[_T]"]] +IntSequence = Sequence[int] +NumbericSequence = Sequence[Numberic] NestedNumbericSequence: TypeAlias = NestedSequence[Numberic] - TensorOrTensors: TypeAlias = Union["Tensor", Sequence["Tensor"]] diff --git a/python/paddle/_typing/shape.py b/python/paddle/_typing/shape.py index 0193840119a66..235bfd6157c9b 100644 --- a/python/paddle/_typing/shape.py +++ b/python/paddle/_typing/shape.py @@ -13,23 +13,24 @@ # limitations under the License. from __future__ import annotations -from typing import List, Tuple, Union +from typing import TYPE_CHECKING, List, Tuple, Union from typing_extensions import TypeAlias -from .. import Tensor +if TYPE_CHECKING: + from .. import Tensor DynamicShapeLike: TypeAlias = Union[ - Tuple[Union[int, Tensor, None], ...], - List[Union[int, Tensor, None]], - Tensor, + Tuple[Union[int, "Tensor", None], ...], + List[Union[int, "Tensor", None]], + "Tensor", ] ShapeLike: TypeAlias = Union[ Tuple[int, ...], List[int], - Tensor, + "Tensor", ] diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 9ae60e5185ee0..34318f3cc9183 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -251,7 +251,7 @@ def _pir_transform(t, dtype): param = op.operand(0).source() cast_param = paddle.cast(param, dtype) cast_param.persistable = True - paddle._pir_ops.updata_parameter(cast_param, t.name) + paddle._pir_ops.update_parameter(cast_param, t.name) block.remove_op(op) break main.set_parameters_from(startup) diff --git a/python/paddle/autograd/backward_utils.py b/python/paddle/autograd/backward_utils.py index 0649c3e19bf05..8266b9edc2009 100644 --- a/python/paddle/autograd/backward_utils.py +++ b/python/paddle/autograd/backward_utils.py @@ -29,20 +29,21 @@ # TODO: Consider a better way to mark these ops has no grad op. # Such as use a new trait to mark these ops. +# Please keep them as alphabetical order. ALLOW_NO_GRAD_OPS = [ # Compare ops "pd_op.equal", "pd_op.equal_", - "pd_op.not_equal", - "pd_op.not_equal_", - "pd_op.less_than", - "pd_op.less_than_", - "pd_op.less_equal", - "pd_op.less_equal_", "pd_op.greater_than", "pd_op.greater_than_", "pd_op.greater_equal", "pd_op.greater_equal_", + "pd_op.less_than", + "pd_op.less_than_", + "pd_op.less_equal", + "pd_op.less_equal_", + "pd_op.not_equal", + "pd_op.not_equal_", # Logical ops "pd_op.logical_and", "pd_op.logical_and_", @@ -67,35 +68,39 @@ "pd_op.bitwise_xor_", # Array ops "pd_op.assign_array", - "pd_op.array_length", - "pd_op.slice_array", - "pd_op.slice_array_dense", - "pd_op.assign_array", "pd_op.assign_array_", - "pd_op.create_array", - "pd_op.create_array_like", + "pd_op.array_length", + "pd_op.array_pop", "pd_op.array_read", "pd_op.array_write_", - "pd_op.array_pop", + "pd_op.create_array", + "pd_op.create_array_like", + "pd_op.slice_array", + "pd_op.slice_array_dense", # Others - "pd_op.remainder", - "pd_op.argmax", - "pd_op.print", "pd_op.accuracy", - "pd_op.randint", - "pd_op.uniform", - "pd_op.gaussian", + "pd_op.all", + "pd_op.any", + "pd_op.argmax", + "pd_op.assign_value_", "pd_op.bernoulli", + "pd_op.distribute_fpn_proposals", + "pd_op.floor_divide", "pd_op.full_like", - "pd_op.assign_value_", - "pd_op.nextafter", + "pd_op.full_with_tensor", + "pd_op.gaussian", "pd_op.isnan", "pd_op.isinf", - "pd_op.all", - "pd_op.any", + "pd_op.nextafter", + "pd_op.nonzero", + "pd_op.one_hot", + "pd_op.print", "pd_op.prior_box", + "pd_op.randint", + "pd_op.remainder", + "pd_op.shape", "pd_op.share_data_", - "pd_op.floor_divide", + "pd_op.uniform", ] @@ -113,6 +118,8 @@ "pd_op.rsqrt", "pd_op.sigmoid", "pd_op.silu", + "pd_op.sum", + "pd_op.mean", ] diff --git a/python/paddle/base/core.py b/python/paddle/base/core.py index 4b00161bc3c82..f412a954c0bb0 100644 --- a/python/paddle/base/core.py +++ b/python/paddle/base/core.py @@ -506,7 +506,11 @@ def _test_use_sync(value): # ops in forward_blacklist will not be replaced by composite ops. -prim_config = {"forward_blacklist": set(), "composite_ops_record": set()} +prim_config = { + "forward_blacklist": set(), + "composite_ops_record": set(), + "backward_blacklist": set(), +} def _get_batch_norm_none_var(op): @@ -588,6 +592,7 @@ def _reset_prim_forward_blacklist(): def _set_prim_backward_blacklist(*args): ops = set(args) for item in ops: + prim_config["backward_blacklist"].add(item) if not isinstance(item, str): raise TypeError("all items in set must belong to string") _set_bwd_prim_blacklist(ops) @@ -671,3 +676,15 @@ def _check_and_set_prim_vjp_skip_default_ops(): _check_and_set_prim_vjp_skip_default_ops() + + +def _check_prim_vjp_ops(): + ops_org = os.getenv("FLAGS_prim_backward_blacklist", "") + if ops_org: + ops = [] + for item in ops_org.split(";"): + ops.append(item.strip()) + _set_prim_backward_blacklist(*ops) + + +_check_prim_vjp_ops() diff --git a/python/paddle/base/executor.py b/python/paddle/base/executor.py index fef11f5985ef1..74afe0d32ed85 100755 --- a/python/paddle/base/executor.py +++ b/python/paddle/base/executor.py @@ -1173,6 +1173,9 @@ def _get_pir_program_and_executor(self, cached_data): if core._enable_dist_prim_all(): with decomp.prim_guard(): decomp.decompose_dist_program(program) + from paddle.base.libpaddle.pir import dump_pir_py_code_if_need + + dump_pir_py_code_if_need(program, "pir_original_programs.py") return program, new_exe, data_op_infos diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py index fcd69d0fd65d1..4b62b57f4e806 100644 --- a/python/paddle/base/framework.py +++ b/python/paddle/base/framework.py @@ -33,7 +33,6 @@ import numpy as np import paddle -import paddle.version as paddle_version from .. import pir from . import core, unique_name @@ -573,10 +572,10 @@ def require_version(min_version, max_version=None): ) version_installed = [ - paddle_version.major, - paddle_version.minor, - paddle_version.patch, - paddle_version.rc, + paddle.version.major, + paddle.version.minor, + paddle.version.patch, + paddle.version.rc, ] zero_version = ["0", "0", "0", "0"] @@ -591,13 +590,13 @@ def version_cmp(ver_a, ver_b): if version_cmp(version_installed, zero_version) == 0: if max_version is not None: warnings.warn( - f"PaddlePaddle version in [{min_version}, {max_version}] required, but {paddle_version.full_version} installed. " + f"PaddlePaddle version in [{min_version}, {max_version}] required, but {paddle.version.full_version} installed. " "Maybe you are using a develop version, " "please make sure the version is good with your code." ) else: warnings.warn( - f"PaddlePaddle version {min_version} or higher is required, but {paddle_version.full_version} installed, " + f"PaddlePaddle version {min_version} or higher is required, but {paddle.version.full_version} installed, " "Maybe you are using a develop version, " "please make sure the version is good with your code." ) @@ -619,12 +618,12 @@ def version_cmp(ver_a, ver_b): or version_cmp(version_installed, min_version_to_check) < 0 ): raise Exception( - f"VersionError: PaddlePaddle version in [{min_version}, {max_version}] required, but {paddle_version.full_version} installed." + f"VersionError: PaddlePaddle version in [{min_version}, {max_version}] required, but {paddle.version.full_version} installed." ) else: if version_cmp(version_installed, min_version_to_check) < 0: raise Exception( - f"VersionError: PaddlePaddle version {min_version} or higher is required, but {paddle_version.full_version} installed, " + f"VersionError: PaddlePaddle version {min_version} or higher is required, but {paddle.version.full_version} installed, " f"please upgrade your PaddlePaddle to {min_version} or other higher version." ) @@ -1617,6 +1616,9 @@ def __init__( if name is None: name = self.block.program._name_generator("_generated_var") + while self.block._find_var_recursive(name) is not None: + name = self.block.program._name_generator("_generated_var") + if dtype is not None: dtype = convert_to_proto_type(dtype) diff --git a/python/paddle/decomposition/decomp.py b/python/paddle/decomposition/decomp.py index 6ffaebe444c9d..ab06767768271 100644 --- a/python/paddle/decomposition/decomp.py +++ b/python/paddle/decomposition/decomp.py @@ -850,13 +850,15 @@ def decompose_dist_program(pir_program): decompose(pir_program, []) # decomp backward ops + blacklist = core.prim_config["backward_blacklist"] + block = pir_program.global_block() + pre_combine_op = None with paddle.pir.core.program_guard(pir_program): ops = pir_program.global_block().ops for op in ops: bwd_op_name = op.name() - # todo(CZ): to be removed - if bwd_op_name in ["pd_op.mean_grad", "pd_op.concat_grad"]: + if bwd_op_name.split(".")[-1] in blacklist: continue skip_decomp = False if has_decomp_vjp(op): @@ -867,13 +869,45 @@ def decompose_dist_program(pir_program): if not skip_decomp: pir.set_insertion_point(op) orig_outs = op.results() + + is_next_split = False decomp_outs = call_decomp_vjp(op) - new_outs = _analyse_decomp_results( - orig_outs, decomp_outs, op - ) - op.replace_all_uses_with(new_outs) + for i in range(len(orig_outs)): + if orig_outs[i].has_one_use(): + next_op = orig_outs[i].first_use().owner() + if next_op.name() == "builtin.split": + is_next_split = True + _check_op_results( + next_op.name(), + next_op.results(), + decomp_outs[i], + ) + next_op.replace_all_uses_with(decomp_outs[i]) + block.remove_op(next_op) + + if not is_next_split: + new_outs = _analyse_decomp_results( + orig_outs, decomp_outs, op + ) + _check_op_results(op.name(), orig_outs, new_outs) + op.replace_all_uses_with(new_outs) + block.remove_op(op) + if op.name() == "builtin.combine": + pre_combine_op = op + + if pre_combine_op is not None: + remove_op = True + for item in pre_combine_op.results(): + if item.has_one_use(): + remove_op = False + break + if remove_op: + block.remove_op(pre_combine_op) + pre_combine_op = None + paddle.pir.set_insertion_point_to_block_end(block) + def decompose_pir_program(pir_program, param_mapping, grad_var_to_var): ''' diff --git a/python/paddle/distributed/auto_parallel/static/engine.py b/python/paddle/distributed/auto_parallel/static/engine.py index 7c2439a059a34..7faa92607719c 100644 --- a/python/paddle/distributed/auto_parallel/static/engine.py +++ b/python/paddle/distributed/auto_parallel/static/engine.py @@ -58,6 +58,7 @@ from .pir_pass import ( apply_partition_pass, apply_reshard_pass, + remove_other_rank_op_pass, remove_unuseful_comm_op_pass, ) from .planner_v2 import Planner @@ -696,6 +697,8 @@ def _parallel_pir(self, mode): # collect the communicator created during resolution. apply_reshard_pass(dist_program) + remove_other_rank_op_pass(dist_program) + # Part 4: Optimization Pass # NOTE Only those Optimization Pass that related to Parallelism (need dist attr) should be placed here and all the Pass should be Optional. diff --git a/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py index 130e80212f274..f5df914650c2c 100644 --- a/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py @@ -478,9 +478,12 @@ def _apply_post_optimization( self._strategy.gradient_merge.avg = True # gradient_merge is then train-only optimization + grad_to_global_grad = {} if self.is_train and self._strategy.gradient_merge.enable: config = copy.deepcopy(self._strategy.gradient_merge.to_dict()) config["dist_context"] = self._dist_context + config["grad_to_global_grad"] = grad_to_global_grad + config["pipeline_mode"] = self._strategy.pipeline.schedule_mode if gradient_sync_after_accumulate: config["params_grads"] = global_params_grads config[ @@ -557,4 +560,5 @@ def _apply_post_optimization( "vpp_degree": self._strategy.pipeline.vpp_degree, "dist_context": self._dist_context, "split_backward": self._strategy.pipeline.split_backward, + "grad_to_global_grad": grad_to_global_grad, } diff --git a/python/paddle/distributed/auto_parallel/static/pir_pass.py b/python/paddle/distributed/auto_parallel/static/pir_pass.py index cae150f556967..17579dc1d5071 100644 --- a/python/paddle/distributed/auto_parallel/static/pir_pass.py +++ b/python/paddle/distributed/auto_parallel/static/pir_pass.py @@ -89,44 +89,6 @@ def apply_partition_pass(program): var.replace_all_uses_with(reshard_var) reshard_var.get_defining_op().operand(0).set_source(var) - # pruning op and value not belong to cur rank - cur_rank = paddle.distributed.get_rank() - for op in program.global_block().ops[::-1]: - if op.name() in partition_skip_op_list: - can_delete = True - for val in op.results(): - if not val.use_empty(): - can_delete = False - if can_delete: - op.erase() - continue - if cur_rank not in op.dist_attr.process_mesh.process_ids: - op.erase() - else: - # set the operand as null when it is not belong to cur rank - if ( - op.name() == 'dist_op.reshard' - and cur_rank - not in op.operand(0) - .source() - .dist_attr() - .process_mesh.process_ids - ): - op.operand(0).set_source(None) - - # merge pd.data ops for - lr_ops = [] - for op in program.global_block().ops[::-1]: - if op.name() == 'pd_op.data' and "learning_rate" in op.attrs()["name"]: - lr_ops.append(op) - - if len(lr_ops) > 1: - lr_value = lr_ops[0].result(0) - for op in lr_ops[1:]: - lr = op.result(0) - lr.replace_all_uses_with(lr_value) - op.erase() - def apply_reshard_pass(program): for op in program.global_block().ops: @@ -160,6 +122,40 @@ def apply_reshard_pass(program): op.erase() +# pruning op and value not belong to cur rank +def remove_other_rank_op_pass(dist_program): + cur_rank = paddle.distributed.get_rank() + for op in dist_program.global_block().ops[::-1]: + if op.name() in partition_skip_op_list: + can_delete = True + for val in op.results(): + if not val.use_empty(): + can_delete = False + if can_delete: + op.erase() + continue + if cur_rank not in op.dist_attr.process_mesh.process_ids: + op.erase() + elif op.name() == "dist_op.reshard": + assert op.result( + 0 + ).use_empty(), f'There should not have useful dist.reshard op in remove_other_rank_op_pass. but find : {op}' + op.erase() + + # merge pd.data ops for + lr_ops = [] + for op in dist_program.global_block().ops[::-1]: + if op.name() == 'pd_op.data' and "learning_rate" in op.attrs()["name"]: + lr_ops.append(op) + + if len(lr_ops) > 1: + lr_value = lr_ops[0].result(0) + for op in lr_ops[1:]: + lr = op.result(0) + lr.replace_all_uses_with(lr_value) + op.erase() + + # Note: this is the pass in the dense program comm_ops = ["pd_op.c_allreduce_sum_", "pd_op.c_allgather"] @@ -172,6 +168,10 @@ def remove_unuseful_comm_op_pass(program): if process_group.nranks == 1: op.result(0).replace_all_uses_with(op.operand_source(0)) op.erase() + if op.name() == "pd_op.share_data_": + if op.operand_source(0).has_one_use(): + op.result(0).replace_all_uses_with(op.operand_source(0)) + op.erase() # In sequence_parallel, we need to transpose hidden_states diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/nd_mesh_reshard_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/nd_mesh_reshard_func.py index cf4b9b7b32af1..bbc9b959b72db 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/nd_mesh_reshard_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/nd_mesh_reshard_func.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import paddle import paddle.distributed as dist @@ -67,10 +66,12 @@ def get_1D_sub_process_mesh(process_mesh, mesh_dim): process_ids = np.array(process_mesh.process_ids).reshape(mesh_shape) rank_id = dist.get_rank() + # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism + if rank_id not in process_mesh.process_ids: + rank_id = process_mesh.process_ids[0] coord = list(np.where(process_ids == rank_id)) coord[mesh_dim] = range(mesh_shape[mesh_dim]) sub_process_ids = process_ids[tuple(coord)].flatten() - sub_mesh_shape = sub_process_ids.shape sub_mesh_name = dim_names[mesh_dim] return dist.ProcessMesh(sub_process_ids, [sub_mesh_name]) @@ -106,35 +107,31 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): first_diff_axis = find_first_diff_shard_axis( src_dist_attr, dst_dist_attr ) - ori_dst_dist_attr = copy_dist_attr_with_new_member(dst_dist_attr) - out_value = src_value # intermediate result - src_type = src_value.type() + # out_value = src_value # intermediate result + # src_type = src_value.type() tensor_ndim = len(src_value.shape) process_mesh = dst_dist_attr.process_mesh # Step2. Convert the non-replicated dimensions to replicated. # Step2.1. convert partial status to replicated - real_out_dist_attr = copy_dist_attr_with_new_member(src_dist_attr) if is_partial(src_dist_attr): - in_partial_status = copy.deepcopy(src_dist_attr.partial_status) + in_partial_status = src_dist_attr.partial_status out_partial_status = dst_dist_attr.partial_status # read-only # convert each partial dim to replicated with corresponding # 1-D mesh function for partial_dim, partial_type in in_partial_status.items(): - if ( - partial_dim in out_partial_status - or partial_dim in ori_dst_dist_attr.dims_mapping - ): + if partial_dim in out_partial_status: continue # get the partial status after converting - real_out_partial_status = copy.deepcopy( - real_out_dist_attr.partial_status + tmp_partial_status = src_dist_attr.partial_status + tmp_partial_status.pop(partial_dim) + tmp_dst_dist_attr = copy_dist_attr_with_new_member( + src_dist_attr, + new_partial_status=tmp_partial_status, ) - real_out_partial_status.pop(partial_dim) - real_out_dist_attr = copy_dist_attr_with_new_member( - real_out_dist_attr, - new_partial_status=real_out_partial_status, + tmp_dst_type = paddle.base.libpaddle.pir.cvt_to_dist_type( + src_value.type(), tmp_dst_dist_attr ) # get the process_mesh on specific axis @@ -160,28 +157,29 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): ) one_dim_func = PToRReshardFunction() - out_value = one_dim_func.reshard( + src_value = one_dim_func.reshard( in_one_dim_dist_attr, out_one_dim_dist_attr, - out_value, - src_type, + src_value, + tmp_dst_type, ) - - out_value.update_dist_attr(real_out_dist_attr) + src_dist_attr = tmp_dst_dist_attr # Step2.2 convert shard status to replicated for i in range(first_diff_axis, -1, -1): - in_mesh_axis = real_out_dist_attr.dims_mapping[i] - if in_mesh_axis == -1: + in_mesh_axis = src_dist_attr.dims_mapping[i] + out_mesh_axis = dst_dist_attr.dims_mapping[i] + if in_mesh_axis == -1 or in_mesh_axis == out_mesh_axis: continue # calculate the dist_attr after converting - real_out_dims_mapping = copy.deepcopy( - real_out_dist_attr.dims_mapping + tmp_dims_mapping = src_dist_attr.dims_mapping + tmp_dims_mapping[i] = -1 + tmp_dst_dist_attr = copy_dist_attr_with_new_member( + src_dist_attr, new_dims_mapping=tmp_dims_mapping ) - real_out_dims_mapping[i] = -1 - real_out_dist_attr = copy_dist_attr_with_new_member( - real_out_dist_attr, new_dims_mapping=real_out_dims_mapping + tmp_dst_type = paddle.base.libpaddle.pir.cvt_to_dist_type( + src_value.type(), tmp_dst_dist_attr ) # get the process_mesh on specific axis @@ -205,45 +203,41 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): ) one_dim_func = SToRReshardFunction() - out_value = one_dim_func.reshard( - in_one_dim_dist_attr, out_one_dim_dist_attr, out_value, src_type + src_value = one_dim_func.reshard( + in_one_dim_dist_attr, + out_one_dim_dist_attr, + src_value, + tmp_dst_type, ) - - out_value.update_dist_attr(real_out_dist_attr) + src_dist_attr = tmp_dst_dist_attr # Step3. Convert the replicated status to the status in dst_dist_attr # Step3.1 convert replicated to partial - if is_partial(ori_dst_dist_attr): - in_partial_status = out_value.dist_attr.partial_status - out_partial_status = ori_dst_dist_attr.partial_status + if is_partial(dst_dist_attr): + in_partial_status = src_dist_attr.partial_status + out_partial_status = dst_dist_attr.partial_status for partial_dim, partial_type in out_partial_status.items(): if partial_dim in in_partial_status: continue - raise NotImplementedError( "RToPReshardFunction is not implemented" ) - # Step3.2 convert replicated/partial to shard + # Step3.2 convert replicated to shard for i in range(first_diff_axis, -1, -1): - out_mesh_axis = ori_dst_dist_attr.dims_mapping[i] - if out_mesh_axis == -1: + in_mesh_axis = src_dist_attr.dims_mapping[i] + out_mesh_axis = dst_dist_attr.dims_mapping[i] + if in_mesh_axis == out_mesh_axis: continue - in_partial_status = out_value.dist_attr().partial_status - need_p2s = out_mesh_axis in in_partial_status - dims_mapping = copy.deepcopy(real_out_dist_attr.dims_mapping) - dims_mapping[i] = out_mesh_axis - partial_status = None - if out_mesh_axis in real_out_dist_attr.partial_status: - partial_status = copy.deepcopy( - real_out_dist_attr.partial_status - ) - partial_status.pop(out_mesh_axis) - real_out_dist_attr = copy_dist_attr_with_new_member( - real_out_dist_attr, - new_dims_mapping=dims_mapping, - new_partial_status=partial_status, + # calculate the dist_attr after converting + tmp_dims_mapping = src_dist_attr.dims_mapping + tmp_dims_mapping[i] = out_mesh_axis + tmp_dst_dist_attr = copy_dist_attr_with_new_member( + src_dist_attr, new_dims_mapping=tmp_dims_mapping + ) + tmp_dst_type = paddle.base.libpaddle.pir.cvt_to_dist_type( + src_value.type(), tmp_dst_dist_attr ) # get the process_mesh on specific axis @@ -265,23 +259,15 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): sub_mesh, out_one_dim_dims_mapping, {} ) ) - - if need_p2s: - raise NotImplementedError( - "PToSReshardFunction is not implemented" - ) - else: - one_dim_func = RToSReshardFunction() - out_value = one_dim_func.reshard( - in_one_dim_dist_attr, - out_one_dim_dist_attr, - out_value, - dst_type, - ) - out_value.update_dist_attr(real_out_dist_attr) - - out_value.set_type(dst_type) - return out_value + one_dim_func = RToSReshardFunction() + src_value = one_dim_func.reshard( + in_one_dim_dist_attr, + out_one_dim_dist_attr, + src_value, + tmp_dst_type, + ) + src_dist_attr = tmp_dst_dist_attr + return src_value class NdMeshReshardFunctionCrossMesh(ReshardFunction): @@ -310,20 +296,14 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): tmp_dst_type = paddle.base.libpaddle.pir.cvt_to_dist_type( src_value.type(), tmp_dist_attr ) - out_value = same_status_func.reshard( + src_value = same_status_func.reshard( src_dist_attr, tmp_dist_attr, src_value, tmp_dst_type ) - if out_value is None: - return None - - curr_global_rank = paddle.distributed.get_rank() - if curr_global_rank in dst_dist_attr.process_mesh.process_ids: - nd_mesh_func = NdMeshReshardFunction() - assert nd_mesh_func.is_suitable( - tmp_dist_attr, dst_dist_attr - ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" - return nd_mesh_func.reshard( - tmp_dist_attr, dst_dist_attr, out_value, dst_type - ) - return None + nd_mesh_func = NdMeshReshardFunction() + assert nd_mesh_func.is_suitable( + tmp_dist_attr, dst_dist_attr + ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" + return nd_mesh_func.reshard( + tmp_dist_attr, dst_dist_attr, src_value, dst_type + ) diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/p_to_r_reshard_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/p_to_r_reshard_func.py index 8956cc2535d9b..d5046ff0f7963 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/p_to_r_reshard_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/p_to_r_reshard_func.py @@ -47,7 +47,7 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): src_reduce_type = ReduceOp.SUM reduce_mean = True - group = new_process_group(src_mesh.process_ids) + group = new_process_group(sorted(src_mesh.process_ids)) reduced_value = paddle._C_ops.c_allreduce_sum_( src_value, group.id, True, False ) @@ -95,20 +95,14 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): tmp_dst_type = paddle.base.libpaddle.pir.cvt_to_dist_type( src_value.type(), tmp_dist_attr ) - out_value = same_status_func.reshard( + src_value = same_status_func.reshard( src_dist_attr, tmp_dist_attr, src_value, tmp_dst_type ) - if out_value is None: - return None - - curr_global_rank = paddle.distributed.get_rank() - if curr_global_rank in dst_dist_attr.process_mesh.process_ids: - p_to_r_func = PToRReshardFunction() - assert p_to_r_func.is_suitable( - tmp_dist_attr, dst_dist_attr - ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" - return p_to_r_func.reshard( - tmp_dist_attr, dst_dist_attr, out_value, dst_type - ) - return None + p_to_r_func = PToRReshardFunction() + assert p_to_r_func.is_suitable( + tmp_dist_attr, dst_dist_attr + ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" + return p_to_r_func.reshard( + tmp_dist_attr, dst_dist_attr, src_value, dst_type + ) diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/r_to_s_reshard_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/r_to_s_reshard_func.py index 922df440c5a21..e2999864f4e87 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/r_to_s_reshard_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/r_to_s_reshard_func.py @@ -50,6 +50,19 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): if curr_global_rank in mesh.process_ids: total_nums = src_value.shape[split_axis] num_of_pieces = mesh.shape[mesh_axis] + if num_of_pieces == 1: + dst_value = paddle._C_ops.share_data_(src_value) + share_data_op = dst_value.get_defining_op() + # set dist type and dist attr + dst_value.set_type(dst_type) + share_data_op.dist_attr = ( + paddle.base.libpaddle.pir.create_op_dist_attribute( + src_dist_attr.process_mesh, + [src_dist_attr], + [dst_dist_attr], + ) + ) + return dst_value piece_len = (total_nums + num_of_pieces - 1) // num_of_pieces rank_relative = mesh.process_ids.index(curr_global_rank) start = rank_relative * piece_len @@ -59,15 +72,17 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): out_value = paddle.slice(src_value, [split_axis], [start], [end]) - out_value.set_type(src_value.type()) - out_value.update_dist_attr(dst_dist_attr) + out_value.set_type(dst_type) out_value.get_defining_op().dist_attr = ( paddle.base.libpaddle.pir.create_op_dist_attribute( mesh, [src_dist_attr], [dst_dist_attr] ) ) return out_value - return None + # fake var will be removed in remove_other_rank_op_pass. + fake_var = paddle._C_ops.reshard_v2(src_value, dst_dist_attr) + fake_var.set_type(dst_type) + return fake_var class RToSReshardFunctionCrossMesh(ReshardFunction): diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/s_to_r_reshard_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/s_to_r_reshard_func.py index 5a907839cf78b..6c9c564cf6196 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/s_to_r_reshard_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/s_to_r_reshard_func.py @@ -69,6 +69,18 @@ def infer_allgather_dist_type(self, in_value, split_axis): return out_type def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): + if src_dist_attr.process_mesh.size == 1: + dst_value = paddle._C_ops.share_data_(src_value) + share_data_op = dst_value.get_defining_op() + # set dist type and dist attr + dst_value.set_type(dst_type) + share_data_op.dist_attr = ( + paddle.base.libpaddle.pir.create_op_dist_attribute( + src_dist_attr.process_mesh, [src_dist_attr], [dst_dist_attr] + ) + ) + return dst_value + def get_split_axis_with_dims_mapping(dims_mapping): split_axis = {} for idx, v in enumerate(dims_mapping): @@ -102,8 +114,7 @@ def get_split_axis_with_dims_mapping(dims_mapping): return new_value else: # TODO(ywt01) support unbalanced split - pass - return None + raise NotImplementedError("unbalanced split is not implemented") def reshard_s_to_r_with_padding( self, @@ -116,8 +127,8 @@ def reshard_s_to_r_with_padding( ): src_mesh = src_dist_attr.process_mesh num_of_process = len(src_mesh.process_ids) - dtype = src_value.dtype - group = new_process_group(src_mesh.process_ids) + + group = new_process_group(sorted(src_mesh.process_ids)) allgather_value = paddle._C_ops.c_allgather( src_value, group.id, num_of_process, True ) @@ -138,11 +149,32 @@ def reshard_s_to_r_with_padding( if split_axis != 0 or padding_num != 0: allgather_op = allgather_value.get_defining_op() - paddle.pir.set_insertion_point_after(allgather_op) - split_value = paddle._C_ops.split_with_num( + split_values = paddle._C_ops.split_with_num( allgather_op.result(0), num_of_process, 0 ) - concat_value = paddle._C_ops.concat(split_value, split_axis) + builtin_split_op = split_values[0].get_defining_op() + pd_splite_op = builtin_split_op.operand_source(0).get_defining_op() + + # fix the split_with_num dist attribtue. + new_inner_types = [] + for sub_value in split_values: + new_inner_type = paddle.base.libpaddle.pir.cvt_to_dist_type( + sub_value.type(), allgather_value.dist_attr() + ) + new_inner_types.append(new_inner_type) + sub_value.set_type(new_inner_type) + vec_type = paddle.base.libpaddle.pir.create_vec_type( + new_inner_types + ) + pd_splite_op.result(0).set_type(vec_type) + + concat_value = paddle._C_ops.concat(split_values, split_axis) + # fold builtin.split op and builtin.combine op + concat_op = concat_value.get_defining_op() + builtin_combine_op = concat_op.operand_source(0).get_defining_op() + concat_op.operand(0).set_source(pd_splite_op.result(0)) + builtin_combine_op.erase() + builtin_split_op.erase() return concat_value return allgather_value @@ -183,16 +215,11 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): out_value = same_status_func.reshard( src_dist_attr, tmp_dist_attr, src_value, tmp_dst_type ) - if out_value is None: - return None - - curr_global_rank = paddle.distributed.get_rank() - if curr_global_rank in dst_dist_attr.process_mesh.process_ids: - s_to_r_func = SToRReshardFunction() - assert s_to_r_func.is_suitable( - tmp_dist_attr, dst_dist_attr - ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" - return s_to_r_func.reshard( - tmp_dist_attr, dst_dist_attr, out_value, dst_type - ) - return None + + s_to_r_func = SToRReshardFunction() + assert s_to_r_func.is_suitable( + tmp_dist_attr, dst_dist_attr + ), f"Invoke the p to r reshard function is not valid from {tmp_dist_attr} to {dst_dist_attr}" + return s_to_r_func.reshard( + tmp_dist_attr, dst_dist_attr, out_value, dst_type + ) diff --git a/python/paddle/distributed/auto_parallel/static/reshard_funcs/same_status_reshard_func.py b/python/paddle/distributed/auto_parallel/static/reshard_funcs/same_status_reshard_func.py index ceae2e7424fd6..db6ec8d1df238 100644 --- a/python/paddle/distributed/auto_parallel/static/reshard_funcs/same_status_reshard_func.py +++ b/python/paddle/distributed/auto_parallel/static/reshard_funcs/same_status_reshard_func.py @@ -87,11 +87,14 @@ def reshard(self, src_dist_attr, dst_dist_attr, src_value, dst_type): dst_mesh, [], [dst_dist_attr] ) ) - recv_value.update_dist_attr(dst_dist_attr) + recv_value.set_type(dst_type) is_send = False break if is_send: - return None + # fake var will be removed in remove_other_rank_op_pass. + fake_var = paddle._C_ops.reshard_v2(src_value, dst_dist_attr) + fake_var.set_type(dst_type) + return fake_var else: return recv_value diff --git a/python/paddle/distributed/auto_tuner/utils.py b/python/paddle/distributed/auto_tuner/utils.py index 741120f7fe598..2db4cb6e0bdcc 100644 --- a/python/paddle/distributed/auto_tuner/utils.py +++ b/python/paddle/distributed/auto_tuner/utils.py @@ -1068,7 +1068,10 @@ def _gen_new_arg(arg, cmd, cfg, res_args, tuner_cfg): prefix + str(cfg[arg]) if prefix else cfg[arg] ) json.dump(cmd_cfg, open(cmd[arg][0], "w")) - if tuner_cfg["run_cmd"].get("generate_launch_cfg", True): + if ( + tuner_cfg["run_cmd"].get("generate_launch_cfg", True) + and not run_best + ): new_cmd_apth = ( os.path.splitext(cmd[arg][0])[0] + "_" @@ -1107,7 +1110,10 @@ def _gen_new_arg(arg, cmd, cfg, res_args, tuner_cfg): prefix + str(cfg[arg]) if prefix else cfg[arg] ) yaml.dump(cmd_cfg, open(cmd[arg][0], "w")) - if tuner_cfg["run_cmd"].get("generate_launch_cfg", True): + if ( + tuner_cfg["run_cmd"].get("generate_launch_cfg", True) + and not run_best + ): new_cmd_apth = ( os.path.splitext(cmd[arg][0])[0] + cfg["log_dir_name"] @@ -1157,7 +1163,10 @@ def _gen_new_arg(arg, cmd, cfg, res_args, tuner_cfg): else: cmd_cfg[keys[-1]] = rr_values json.dump(cmd_cfg, open(cmd[arg][0], "w")) - if tuner_cfg["run_cmd"].get("generate_launch_cfg", True): + if ( + tuner_cfg["run_cmd"].get("generate_launch_cfg", True) + and not run_best + ): new_cmd_apth = ( os.path.splitext(cmd[arg][0])[0] + cfg["log_dir_name"] @@ -1198,7 +1207,10 @@ def _gen_new_arg(arg, cmd, cfg, res_args, tuner_cfg): else: cmd_cfg[keys[-1]] = rr_values yaml.dump(cmd_cfg, open(cmd[arg][0], "w")) - if tuner_cfg["run_cmd"].get("generate_launch_cfg", True): + if ( + tuner_cfg["run_cmd"].get("generate_launch_cfg", True) + and not run_best + ): new_cmd_apth = ( os.path.splitext(cmd[arg][0])[0] + cfg["log_dir_name"] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py index 63f76416142c1..ce6154b1ca8db 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py @@ -119,6 +119,14 @@ def __init__(self, optimizer, hcg): self._broadcast_overlap = False self._forward_pre_hook_remove_helper = [] + if ( + paddle.is_compiled_with_xpu() + and os.getenv("XPU_CDNN_CLUSTER_PARALLEL") is not None + ): + assert ( + not self.comm_overlap + ), "comm overlap not support when use xpu cdnn_cluster parallel." + try: # The fp32 params such as layer_norm_0.w_0 will be at the end of param_list. # Have to sort the params to make sure all params are in the forward using order. @@ -312,6 +320,14 @@ def reduce_gradients(self, parameter_list, hcg): for buffer in self._comm_buffers: buffer.scale_grads() return + + # sync here to guarantee cdnn_cluster parallel correct. + if ( + paddle.is_compiled_with_xpu() + and os.getenv("XPU_CDNN_CLUSTER_PARALLEL") is not None + ): + paddle.device.synchronize() + with framework.no_grad(): for param in parameter_list: g_var = self._get_param_grad(param) @@ -624,6 +640,14 @@ def __init__(self, optimizer, hcg): self._set_inner_opt_attr('_parameter_list', self._local_parameter_list) self._set_inner_opt_attr('_param_groups', self._local_parameter_list) + if ( + paddle.is_compiled_with_xpu() + and os.getenv("XPU_CDNN_CLUSTER_PARALLEL") is not None + ): + assert ( + not self.comm_overlap + ), "comm overlap not support when use xpu cdnn_cluster parallel." + # Ensure acc_steps is greater than 0 when comm_overlap is used if self.comm_overlap: assert ( @@ -739,6 +763,14 @@ def filter_parameters(self, parameter_list, hcg): def reduce_gradients(self, parameter_list, hcg): # TODO merge grad / nrank with dp logger.debug("sharding start gradients sync") + + # sync here to guarantee cdnn_cluster parallel correct. + if ( + paddle.is_compiled_with_xpu() + and os.getenv("XPU_CDNN_CLUSTER_PARALLEL") is not None + ): + paddle.device.synchronize() + with framework.no_grad(): for comm_buffer in self._comm_buffer_list: if self.pp_release_grads and comm_buffer.grad_storage is None: @@ -812,6 +844,7 @@ def copy_attr(attr_name): copy_attr("optimize_attr") copy_attr("do_model_average") copy_attr("need_clip") + copy_attr("no_sync") self._slice_params[param.name] = slice_param return slice_param diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 33b8c3d95d582..db8c2f7b9b820 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -24,6 +24,7 @@ paddle.float32: "float32", paddle.float64: "float64", paddle.bfloat16: "bfloat16", + paddle.bool: "bool", } PADDLE_TO_NUMBER = { @@ -33,6 +34,7 @@ paddle.int32: 3, paddle.int64: 4, paddle.bfloat16: 5, + paddle.bool: 6, } NUMBER_TO_DTYPE = { @@ -42,6 +44,7 @@ 3: "int32", 4: "int64", 5: "bfloat16", + 6: "bool", } diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 816af6f91530d..53d929c7890bd 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -451,7 +451,9 @@ def check_layer_sparse(sublayer): return False is_sparse_gradient = [ - check_layer_sparse(sublayer) for sublayer, _ in layers_param + check_layer_sparse(sublayer) + for sublayer, param in layers_param + if not getattr(param, "no_sync", False) ] if in_dynamic_mode(): diff --git a/python/paddle/distributed/passes/allreduce_matmul_grad_overlapping.py b/python/paddle/distributed/passes/allreduce_matmul_grad_overlapping.py index 77affd4cd9c1e..e22cc5bbf6d65 100644 --- a/python/paddle/distributed/passes/allreduce_matmul_grad_overlapping.py +++ b/python/paddle/distributed/passes/allreduce_matmul_grad_overlapping.py @@ -138,8 +138,14 @@ def _split_matmul_grad_and_multi_streaming_allreduce( name: allreduce_op.output(name) for name in allreduce_op_outputs } + # matmul_v2 + reshape + reshape + matmul_v2 + reshape + ... + original c_allreduce_sum + # => + # matmul_v2 + new c_allreduce_sum + reshape + reshape + matmul_v2 + reshape + ... + original c_allreduce_sum + # + # NOTE(liym27): new c_allreduce_sum must be inserted to "the next of the first matmul_v2", otherwise another + # pass fused_linear_param_grad_add will not work. allreduce_op = block._insert_op_without_sync( - index=allreduce_id + 1, + index=matmul_grad_id + 1, type=allreduce_op.type, inputs=allreduce_op_inputs, outputs=allreduce_op_outputs, diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index aab9bdb2456a0..2d7413965ae3b 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -523,6 +523,8 @@ def parse_program( dist_context, ) + return grad_to_gradient_merge + @register_pass("auto_parallel_gradient_merge_pass") class GradientMergePass(PassBase): @@ -550,8 +552,9 @@ def _apply_single_impl(self, main_program, startup_program, context): gradient_sync_after_accumulate = self.get_attr( "gradient_sync_after_accumulate", False ) + grad_to_global_grad = self.get_attr("grad_to_global_grad", {}) with paddle.static.program_guard(main_program, startup_program): - parse_program( + grad_to_merge_grad = parse_program( main_program, startup_program, params_grads, @@ -562,3 +565,5 @@ def _apply_single_impl(self, main_program, startup_program, context): ) main_program._sync_with_cpp() + for k, v in grad_to_merge_grad.items(): + grad_to_global_grad[k] = v diff --git a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_vpp.py b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_vpp.py index 4fc9a1ec28692..8bc29411269ab 100644 --- a/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_vpp.py +++ b/python/paddle/distributed/passes/pipeline_scheduler_pass/pipeline_vpp.py @@ -15,6 +15,10 @@ import logging from paddle.base import core +from paddle.distributed.auto_parallel.static.operators.common import ( + is_data_parallel_reduce_op, + is_data_parallel_scale_op, +) from ...utils.log_utils import get_logger from ..pass_base import register_pass @@ -36,7 +40,8 @@ class PipelineVirtualPipelinePass(PipelinePassBase): def __init__(self): super().__init__() - + self._real_overlap_sharding_reduce = False + self.reduce_comm_suffix = "_reduce" self._forward_micro_step_counter = {} self._backward_micro_step_counter = {} @@ -137,10 +142,22 @@ def _get_virtual_pp_rank(micro_step, forward): if real_split_backward: for chunk_id in range(num_model_chunks - 1, -1, -1): for micro_batch_id in range(0, accumulate_steps): - w_job = core.Job(BACKWARD + "_w" + str(chunk_id)) + if ( + self._real_overlap_sharding_reduce + and micro_batch_id == accumulate_steps - 1 + ): + w_job = core.Job( + BACKWARD + + "_w" + + str(chunk_id) + + self.reduce_comm_suffix + ) + else: + w_job = core.Job(BACKWARD + "_w" + str(chunk_id)) w_job.set_micro_batch_id(micro_batch_id) job_list.append(w_job) - + job_types = [job.type() for job in job_list] + logger.debug(f"The VPP job list: {job_types}") opt_job = core.Job(OPT) job_list.append(opt_job) return job_list @@ -162,6 +179,102 @@ def _split_matmul_grad_ops_to_matmul(self, program, dist_context): block, matmul_grad_id, dist_context=dist_context ) + def _move_sharding_comm_to_backward( + self, types, sub_programs, global_grads + ): + def _get_sharding_comm_op(op, idx, ops): + if is_data_parallel_reduce_op(op): + op_input_names = op.desc.input_arg_names() + op_output_names = op.desc.output_arg_names() + if ( + op_input_names[0] == op_output_names[0] + and op_input_names[0] in global_grads + ): + global_grad_to_comm_op[op_input_names[0]] = [op] + remove_op_ids.append(idx) + + if op.type in ["c_allreduce_sum", "c_reduce_sum"]: + scale_index = idx + 1 + if scale_index < len(len(ops)): + if is_data_parallel_scale_op(ops[scale_index]): + global_grad_to_comm_op[op_input_names[0]].append(op) + remove_op_ids.append(scale_index) + + def _get_scale_op(op, idx): + if is_data_parallel_scale_op(op): + return + if op.type == 'scale': + op_input_names = op.desc.input_arg_names() + op_output_names = op.desc.output_arg_names() + if ( + op_input_names[0] == op_output_names[0] + and op_input_names[0] in global_grads + ): + global_grad_to_scale_op[op_input_names[0]] = op + remove_op_ids.append(idx) + + # 1 get the all sharding_avg in optimizer + type_programs = dict(zip(types, sub_programs)) + opt_program = type_programs["optimizer"] + global_grad_to_comm_op = {} + global_grad_to_scale_op = {} + all_remove_op_ids = [] + for cur_block in opt_program.blocks: + remove_op_ids = [] + for idx, op in enumerate(cur_block.ops): + _get_scale_op(op, idx) + _get_sharding_comm_op(op, idx, cur_block.ops) + all_remove_op_ids.append(remove_op_ids) + if len(global_grad_to_comm_op) == 0: # no need to overlap sharding comm + return False + + # 2 create the new backward(w) with the sharding_comm + new_types = [] + new_programs = [] + for type, sub_program in type_programs.items(): + if "backward_w" in type: + new_program = sub_program.clone() + cur_block = new_program.global_block() + cur_block_scale_op = [] + for idx, op in reversed(list(enumerate(cur_block.ops))): + if op.type == "elementwise_add": + input_arg_names = op.input_arg_names + output_arg_names = op.output_arg_names + if ( + input_arg_names[0] == output_arg_names[0] + and input_arg_names[0] in global_grad_to_comm_op + ): + for origin_op in reversed( + global_grad_to_comm_op[input_arg_names[0]] + ): + new_op = cur_block._insert_op_without_sync( + index=idx + 1, type="nop" + ) + new_op.desc.copy_from(origin_op.desc) + del global_grad_to_comm_op[input_arg_names[0]] + cur_block_scale_op.append( + global_grad_to_scale_op[input_arg_names[0]] + ) + for origin_op in cur_block_scale_op: + new_op = cur_block.append_op(type="nop") + new_op.desc.copy_from(origin_op.desc) + cur_block._sync_with_cpp() + new_types.append(type + self.reduce_comm_suffix) + new_programs.append(new_program) + assert ( + len(global_grad_to_comm_op) == 0 + ), f"global_grad_to_comm_op must be used up, but left: {global_grad_to_comm_op}" + + types.extend(new_types) + sub_programs.extend(new_programs) + + for id, cur_block in enumerate(opt_program.blocks): + for op_id in reversed(all_remove_op_ids[id]): + cur_block._remove_op(op_id) + cur_block._sync_with_cpp() + + return True + def _partial_programs(self, program): dist_context = self.get_attr("dist_context") num_model_chunks = self.get_attr("vpp_degree") @@ -169,7 +282,10 @@ def _partial_programs(self, program): accumulate_steps = self.get_attr("num_micro_batches") num_stages = self.get_attr("pp_degree") split_backward = self.get_attr("split_backward", False) - + grad_to_global_grad = self.get_attr("grad_to_global_grad", {}) + global_grads = [ + global_grad for _, global_grad in grad_to_global_grad.items() + ] if split_backward and accumulate_steps == num_stages: self._split_matmul_grad_ops_to_matmul(program, dist_context) types, sub_program_list = _program_for_vpp_split_bwk( @@ -178,6 +294,11 @@ def _partial_programs(self, program): dist_context, enable_send_recv_overlap, ) + self._real_overlap_sharding_reduce = ( + self._move_sharding_comm_to_backward( + types, sub_program_list, global_grads + ) + ) else: types, sub_program_list = _program_for_vpp( program, diff --git a/python/paddle/distribution/__init__.py b/python/paddle/distribution/__init__.py index 246c4ffb71173..168fbc460d5bd 100644 --- a/python/paddle/distribution/__init__.py +++ b/python/paddle/distribution/__init__.py @@ -34,6 +34,7 @@ from .multivariate_normal import MultivariateNormal from .normal import Normal from .poisson import Poisson +from .student_t import StudentT from .transform import ( # noqa:F401 AbsTransform, AffineTransform, @@ -77,6 +78,7 @@ 'Geometric', 'Binomial', 'Poisson', + 'StudentT', ] __all__.extend(transform.__all__) diff --git a/python/paddle/distribution/student_t.py b/python/paddle/distribution/student_t.py new file mode 100644 index 0000000000000..d1a88887023ff --- /dev/null +++ b/python/paddle/distribution/student_t.py @@ -0,0 +1,277 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from collections.abc import Sequence + +import paddle +from paddle.base.data_feeder import check_type, convert_dtype +from paddle.base.framework import Variable +from paddle.distribution import Gamma, distribution +from paddle.framework import in_dynamic_mode + + +class StudentT(distribution.Distribution): + r""" + The StudentT distribution with parameters: `df`, `loc`, `scale`. + + In probability theory and statistics, the StudentT distribution is one of the basic continuous probability distributions + defined on the real number set. + + The probability density function (pdf) is + + .. math:: + + pdf(x; \nu, \mu, \sigma) = \frac{\Gamma[(\nu+1)/2]}{\sigma\sqrt{\nu\pi}\Gamma(\nu/2)[1+(\frac{x-\mu}{\sigma})^2/\nu]^{(1+\nu)/2}} + + In the above equation: + + * :math:`df = \nu`: is the degree of freedom. + * :math:`loc = \mu`: is the center parameter. + * :math:`scale = \sigma`: is the scale parameter. + * :math:`\Gamma(\cdot)`: is the gamma function. + + Args: + df (float|Tensor): The degree of freedom of the distribution, which should be non-negative. If the input data type is float, + the data type of `df` will be converted to a 1-D Tensor with paddle global default dtype. Supported dtype: float32, float64. + loc (float|Tensor): The center of the distribution. If the input data type is float, the data type of `loc` will be converted to a + 1-D Tensor with paddle global default dtype. Supported dtype: float32, float64. + scale (float|Tensor): The scale of the distribution, which should be non-negative. If the input data type is float, the data type + of `scale` will be converted to a 1-D Tensor with paddle global default dtype. Supported dtype: float32, float64. + name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Examples: + .. code-block:: python + + >>> import paddle + >>> from paddle.distribution import StudentT + >>> paddle.set_device('cpu') + >>> paddle.seed(100) + >>> dist = StudentT(df=10.0, loc=0.0, scale=1.0) + >>> dist.sample([3]) + Tensor(shape=[3, 1], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-2.07709980], + [ 0.27981189], + [ 0.00881413]]) + + >>> dist2 = StudentT(df=paddle.to_tensor([10.0, 5.0]), loc=paddle.to_tensor([0.0, 0.0]), scale=paddle.to_tensor([1.0, 2.0])) + >>> value_tensor = paddle.to_tensor([0.8], dtype="float32") + >>> lp = dist2.log_prob(value_tensor) + >>> print(lp) + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True, + [-1.28509235, -1.75626254]) + + >>> p = dist2.prob(value_tensor) + >>> print(p) + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.27662504, 0.17268908]) + + >>> entropy = dist2.entropy() + >>> print(entropy) + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True, + [1.52126312, 2.32064891]) + + """ + + def __init__(self, df, loc, scale, name=None): + if not in_dynamic_mode(): + check_type( + df, + 'df', + ( + float, + Variable, + paddle.pir.Value, + ), + 'StudentT', + ) + check_type( + loc, + 'loc', + ( + float, + Variable, + paddle.pir.Value, + ), + 'StudentT', + ) + check_type( + scale, + 'scale', + ( + float, + Variable, + paddle.pir.Value, + ), + 'StudentT', + ) + + self.name = name if name is not None else 'StudentT' + self.dtype = paddle.get_default_dtype() + + if self._validate_args(df, loc, scale): + self.df = df + self.loc = loc + self.scale = scale + self.df, self.loc, self.scale = paddle.broadcast_tensors( + [self.df, self.loc, self.scale] + ) + self.dtype = convert_dtype(df.dtype) + else: + self.df, self.loc, self.scale = self._to_tensor(df, loc, scale) + + if not self._check_nonnegative(self.df): + raise ValueError( + 'Every element of input parameter `df` should be nonnegative.' + ) + if not self._check_nonnegative(self.scale): + raise ValueError( + 'Every element of input parameter `scale` should be nonnegative.' + ) + + if self.df.shape == []: + self.df = self.df.reshape([1]) + self.loc = self.loc.reshape([1]) + self.scale = self.scale.reshape([1]) + batch_shape = self.df.shape + super().__init__(batch_shape) + self._chi2 = Gamma(0.5 * self.df, paddle.full_like(self.df, 0.5)) + + def _check_nonnegative(self, value): + """Check the non-negative constraint for input parameters + + Args: + value (Tensor) + + Returns: + bool: pass or not. + """ + return (value >= 0.0).all() + + @property + def mean(self): + """Mean of StudentT distribution. + + Returns: + Tensor: mean value. + """ + return paddle.where( + self.df > 1.0, + self.loc, + paddle.full_like(self.loc, fill_value=float('nan')), + ) + + @property + def variance(self): + """Variance of StudentT distribution. + + Returns: + Tensor: variance value. + """ + var = self.df.clone().detach() + var_condition = self.df > 2.0 + var = paddle.where( + var_condition, + self.scale.pow(2) * var / (var - 2), + paddle.full_like(var, fill_value=float('nan')), + ) + inf_condition = (self.df <= 2.0).logical_and(self.df > 1.0) + var = paddle.where( + inf_condition, paddle.full_like(var, fill_value=float('inf')), var + ) + return var + + def sample(self, shape=()): + """Generate StudentT samples of the specified shape. The final shape would be ``shape+batch_shape`` . + + Args: + shape (Sequence[int], optional): Prepended shape of the generated samples. + + Returns: + Tensor: Sampled data with shape `sample_shape` + `batch_shape`. + """ + if not isinstance(shape, Sequence): + raise TypeError('sample shape must be Sequence object.') + + output_shape = self._extend_shape(shape) + z = paddle.cast(paddle.normal(shape=output_shape), self.dtype) + chi2 = self._chi2.sample(shape) + x = z * paddle.rsqrt(chi2 / self.df) + return self.loc + self.scale * x + + def entropy(self): + r"""Shannon entropy in nats. + + The entropy is + + .. math:: + + H = \log(\frac{\Gamma(\nu/2)\Gamma(1/2) \sigma \sqrt{\nu}}{\Gamma[(1+\nu)/2]}) + \frac{(1+\nu)}{2} \cdot \{\psi[(1+\nu)/2] - \psi(\nu/2)\} + + In the above equation: + + * :math:`\nu`: is the degree of freedom. + * :math:`\Gamma()`: is the gamma function. + * :math:`\psi()`: is the digamma function. + + Returns: + Tensor: Shannon entropy of StudentT distribution. The data type is the same as `df`. + """ + lbeta = ( + paddle.lgamma(0.5 * self.df) + + math.lgamma(0.5) + - paddle.lgamma(0.5 * (self.df + 1)) + ) + return ( + self.scale.log() + + 0.5 + * (self.df + 1) + * ( + paddle.digamma(0.5 * (self.df + 1)) + - paddle.digamma(0.5 * self.df) + ) + + 0.5 * self.df.log() + + lbeta + ) + + def log_prob(self, value): + """Log probability density function. + + Args: + value (Tensor): The input tensor. + + Returns: + Tensor: log probability density. The data type is the same as `df`. + """ + value = self._check_values_dtype_in_probs(self.df, value) + y = (value - self.loc) / self.scale + Z = ( + self.scale.log() + + 0.5 * self.df.log() + + 0.5 * math.log(math.pi) + + paddle.lgamma(0.5 * self.df) + - paddle.lgamma(0.5 * (self.df + 1.0)) + ) + return -0.5 * (self.df + 1.0) * paddle.log1p(y**2.0 / self.df) - Z + + def prob(self, value): + """Probability density function. + + Args: + value (Tensor): The input tensor. + + Returns: + Tensor: probability density. The data type is the same as `df`. + """ + return paddle.exp(self.log_prob(value)) diff --git a/python/paddle/incubate/autograd/functional.py b/python/paddle/incubate/autograd/functional.py index 8ae915a1e4868..b2f19ee83f806 100644 --- a/python/paddle/incubate/autograd/functional.py +++ b/python/paddle/incubate/autograd/functional.py @@ -582,7 +582,7 @@ def _grad(ys, xs, v=None): # xs_grad when the xs is a single Tensor. xs_grad = paddle.grad(ys, xs, v, create_graph=True, allow_unused=True) if ( - isinstance(xs, paddle.base.framework.Variable) + isinstance(xs, (paddle.base.framework.Variable, paddle.pir.Value)) and isinstance(xs_grad, typing.Sequence) and len(xs_grad) > 0 ): @@ -658,23 +658,27 @@ def _check_inputs(func, xs, v=None): if not callable(func): raise TypeError(f"Expected 'fun' is Callable, but got {type(func)}.") - if not isinstance(xs, (framework.Variable, typing.Sequence)): + if not isinstance( + xs, (framework.Variable, typing.Sequence, paddle.pir.Value) + ): raise TypeError( f"Expected 'xs' is a Tensor|Sequence[Tensor]," f"but got {type(xs)}." ) if isinstance(xs, typing.Sequence) and not all( - isinstance(x, framework.Variable) for x in xs + isinstance(x, (framework.Variable, paddle.pir.Value)) for x in xs ): raise TypeError("All elements of 'xs' should be Tensor.") - if not isinstance(v, (framework.Variable, typing.Sequence, type(None))): + if not isinstance( + v, (framework.Variable, typing.Sequence, type(None), paddle.pir.Value) + ): raise TypeError( f"Expected 'v' is Tensor|Sequence[Tensor]|None, but got {type(v)}." ) if isinstance(v, typing.Sequence) and not all( - isinstance(e, framework.Variable) for e in v + isinstance(e, (framework.Variable, paddle.pir.Value)) for e in v ): raise TypeError("All elements of 'xs' should be Tensor.") diff --git a/python/paddle/incubate/autograd/primapi.py b/python/paddle/incubate/autograd/primapi.py index 109cde97a75ca..a33e1f4dfb8de 100644 --- a/python/paddle/incubate/autograd/primapi.py +++ b/python/paddle/incubate/autograd/primapi.py @@ -72,13 +72,17 @@ def forward_grad(outputs, inputs, grad_inputs=None): 'operators, use enable_prim to turn it on.' ) - if not isinstance(outputs, (framework.Variable, typing.Sequence)): + if not isinstance( + outputs, (framework.Variable, typing.Sequence, paddle.pir.Value) + ): raise TypeError( f'Expected outputs is Tensor|Sequence[Tensor], ' f'but got {type(outputs)}.' ) - if not isinstance(inputs, (framework.Variable, typing.Sequence)): + if not isinstance( + inputs, (framework.Variable, typing.Sequence, paddle.pir.Value) + ): raise TypeError( f'Expected inputs is Tensor|Sequence[Tensor], ' f'but got {type(inputs)}.' @@ -101,7 +105,11 @@ def forward_grad(outputs, inputs, grad_inputs=None): ad = primx.Transform(ys[0].block) _, ys_dot = ad.linearize(xs, ys, xs_dot) - return ys_dot[0] if isinstance(outputs, framework.Variable) else ys_dot + return ( + ys_dot[0] + if isinstance(outputs, (framework.Variable, paddle.pir.Value)) + else ys_dot + ) @framework.static_only @@ -155,7 +163,7 @@ def grad(outputs, inputs, grad_outputs=None): # The follow code snippet fixes the problem by return the first element # of grad_inputs when the inputs is a single Tensor. if ( - isinstance(inputs, framework.Variable) + isinstance(inputs, (framework.Variable, paddle.pir.Value)) and isinstance(grad_inputs, typing.Sequence) and len(grad_inputs) > 0 ): @@ -163,13 +171,17 @@ def grad(outputs, inputs, grad_outputs=None): else: return grad_inputs - if not isinstance(outputs, (framework.Variable, typing.Sequence)): + if not isinstance( + outputs, (framework.Variable, typing.Sequence, paddle.pir.Value) + ): raise TypeError( f'Expected outputs is Tensor|Sequence[Tensor], ' f'but got {type(outputs)}.' ) - if not isinstance(inputs, (framework.Variable, typing.Sequence)): + if not isinstance( + inputs, (framework.Variable, typing.Sequence, paddle.pir.Value) + ): raise TypeError( f'Expected inputs is Tensor|Sequence[Tensor], ' f'but got {type(inputs)}.' @@ -213,7 +225,11 @@ def grad(outputs, inputs, grad_outputs=None): ad.erase_ops(sorted(op_indexes)) ad.erase_dots(xs_dot) - return xs_bar[0] if isinstance(inputs, framework.Variable) else xs_bar + return ( + xs_bar[0] + if isinstance(inputs, (framework.Variable, paddle.pir.Value)) + else xs_bar + ) @framework.static_only diff --git a/python/paddle/incubate/autograd/primx.py b/python/paddle/incubate/autograd/primx.py index 901e23a649974..ba2f0c2e615a1 100644 --- a/python/paddle/incubate/autograd/primx.py +++ b/python/paddle/incubate/autograd/primx.py @@ -137,8 +137,12 @@ def add(self, key_var, value_var): def add_rec(self, key_vars, value_vars): if value_vars is None: return - if isinstance(key_vars, paddle.base.framework.Variable): - if not isinstance(value_vars, paddle.base.framework.Variable): + if isinstance( + key_vars, (paddle.base.framework.Variable, paddle.pir.Value) + ): + if not isinstance( + value_vars, (paddle.base.framework.Variable, paddle.pir.Value) + ): raise TypeError( f'value_vars must be Variable, but got {type(value_vars)}' ) @@ -208,7 +212,9 @@ def add_vars(self, new_vars): def add_vars_rec(self, new_vars): if new_vars is None: return - if isinstance(new_vars, paddle.base.framework.Variable): + if isinstance( + new_vars, (paddle.base.framework.Variable, paddle.pir.Value) + ): self.vars.update({id(new_vars): new_vars}) return if not isinstance(new_vars, list): @@ -242,7 +248,7 @@ def erase_dots(self, vars_to_erase): def var2dot_rec(self, vars): """Lookup var2dot recursively.""" - if isinstance(vars, paddle.base.framework.Variable): + if isinstance(vars, (paddle.base.framework.Variable, paddle.pir.Value)): dot = self.var2dot.lookup(vars) return dot @@ -250,7 +256,7 @@ def var2dot_rec(self, vars): return dots def dot2bar_rec(self, dots): - if isinstance(dots, paddle.base.framework.Variable): + if isinstance(dots, (paddle.base.framework.Variable, paddle.pir.Value)): bar = self.dot2bar.lookup(dots) assert bar is not None, 'bar must be not None' return bar @@ -385,7 +391,9 @@ def bind(args, to_bind, value_table): for i in range(len(args)): if isinstance(args[i], list): bind(args[i], to_bind, value_table) - if not isinstance(args[i], paddle.base.framework.Variable): + if not isinstance( + args[i], (paddle.base.framework.Variable, paddle.pir.Value) + ): continue elif args[i] is not None and args[i].name in to_bind: args[i] = value_table[to_bind[args[i].name]] diff --git a/python/paddle/incubate/autograd/utils.py b/python/paddle/incubate/autograd/utils.py index b5bc0c6238ea7..0518071a90040 100644 --- a/python/paddle/incubate/autograd/utils.py +++ b/python/paddle/incubate/autograd/utils.py @@ -309,7 +309,9 @@ def map_output_for_composite(op): def flatten(inp): - if inp is None or isinstance(inp, paddle.base.framework.Variable): + if inp is None or isinstance( + inp, (paddle.base.framework.Variable, paddle.pir.Value) + ): return [inp] flattened = [] for part in inp: @@ -323,7 +325,7 @@ def flatten_and_remove_none(inp): def as_tensors(xs): - if isinstance(xs, framework.Variable): + if isinstance(xs, (framework.Variable, paddle.pir.Value)): return (xs,) elif isinstance(xs, typing.Sequence): return tuple(xs) diff --git a/python/paddle/incubate/nn/functional/block_multihead_attention.py b/python/paddle/incubate/nn/functional/block_multihead_attention.py index a55f61de2c678..596b9581570ad 100644 --- a/python/paddle/incubate/nn/functional/block_multihead_attention.py +++ b/python/paddle/incubate/nn/functional/block_multihead_attention.py @@ -389,3 +389,156 @@ def block_multihead_attention( }, ) return out, qkv, key_cache, value_cache + + +def block_multihead_attention_xpu( + qkv, + key_cache, + value_cache, + seq_lens_encoder, + seq_lens_decoder, + seq_lens_this_time, + padding_offsets, + cum_offsets, + cu_seqlens_q, + cu_seqlens_k, + block_tables, + cache_k_per_batch_maxs, + cache_v_per_batch_maxs, + pre_key_cache=None, + pre_value_cache=None, + cache_k_quant_scales=None, + cache_v_quant_scales=None, + cache_k_dequant_scales=None, + cache_v_dequant_scales=None, + qkv_out_scale=None, + qkv_bias=None, + out_shift=None, + out_smooth=None, + max_enc_len_this_time=None, + max_dec_len_this_time=None, + rope_emb=None, + mask=None, + tgt_mask=None, + max_seq_len=-1, + block_size=64, + use_neox_style=False, + use_dynamic_cachekv_quant=False, + quant_round_type=1, + quant_max_bound=127.0, + quant_min_bound=-127.0, + out_scale=-1, + compute_dtype="default", +): + if in_dynamic_mode(): + return _C_ops.block_multihead_attention_xpu( + qkv, + key_cache, + value_cache, + seq_lens_encoder, + seq_lens_decoder, + seq_lens_this_time, + padding_offsets, + cum_offsets, + cu_seqlens_q, + cu_seqlens_k, + block_tables, + cache_k_per_batch_maxs, + cache_v_per_batch_maxs, + pre_key_cache, + pre_value_cache, + rope_emb, + mask, + tgt_mask, + cache_k_quant_scales, + cache_v_quant_scales, + cache_k_dequant_scales, + cache_v_dequant_scales, + qkv_out_scale, + qkv_bias, + out_shift, + out_smooth, + max_enc_len_this_time, + max_dec_len_this_time, + max_seq_len, + block_size, + use_neox_style, + use_dynamic_cachekv_quant, + quant_round_type, + quant_max_bound, + quant_min_bound, + out_scale, + compute_dtype, + ) + + helper = LayerHelper('block_multihead_attention_xpu', **locals()) + out = helper.create_variable_for_type_inference(dtype=qkv.dtype) + + inputs = {} + inputs['qkv'] = qkv + inputs['key_cache'] = key_cache + inputs['value_cache'] = value_cache + inputs['seq_lens_encoder'] = seq_lens_encoder + inputs['seq_lens_decoder'] = seq_lens_decoder + inputs['seq_lens_this_time'] = seq_lens_this_time + inputs['padding_offsets'] = padding_offsets + inputs['cum_offsets'] = cum_offsets + inputs['cu_seqlens_q'] = cu_seqlens_q + inputs['cu_seqlens_k'] = cu_seqlens_k + inputs['block_tables'] = block_tables + inputs['cache_k_per_batch_maxs'] = cache_k_per_batch_maxs + inputs['cache_v_per_batch_maxs'] = cache_v_per_batch_maxs + if pre_key_cache is not None: + inputs['pre_key_cache'] = pre_key_cache + if pre_value_cache is not None: + inputs['pre_value_cache'] = pre_value_cache + if rope_emb is not None: + inputs['rope_emb'] = rope_emb + if mask is not None: + inputs['mask'] = mask + if tgt_mask is not None: + inputs['tgt_mask'] = tgt_mask + if cache_k_quant_scales is not None: + inputs["cache_k_quant_scales"] = cache_k_quant_scales + if cache_v_quant_scales is not None: + inputs["cache_v_quant_scales"] = cache_v_quant_scales + if cache_k_dequant_scales is not None: + inputs["cache_k_dequant_scales"] = cache_k_dequant_scales + if cache_v_dequant_scales is not None: + inputs["cache_v_dequant_scales"] = cache_v_dequant_scales + if qkv_out_scale is not None: + inputs["qkv_out_scale"] = qkv_out_scale + if qkv_bias is not None: + inputs["qkv_bias"] = qkv_bias + if out_shift is not None: + inputs["out_shift"] = out_shift + if out_smooth is not None: + inputs["out_smooth"] = out_smooth + if max_enc_len_this_time is not None: + inputs["max_enc_len_this_time"] = max_enc_len_this_time + if max_dec_len_this_time is not None: + inputs["max_dec_len_this_time"] = max_dec_len_this_time + + outputs = { + 'fmha_out': out, + 'qkv_out': qkv, + 'key_cache_out': key_cache, + 'value_cache_out': value_cache, + } + helper.append_op( + type='block_multihead_attention_xpu', + inputs=inputs, + outputs=outputs, + attrs={ + 'max_seq_len': max_seq_len, + 'block_size': block_size, + 'use_neox_style': use_neox_style, + 'dynamic_cachekv_quant': use_dynamic_cachekv_quant, + 'quant_round_type': quant_round_type, + 'quant_max_bound': quant_max_bound, + 'quant_min_bound': quant_min_bound, + 'out_scale': out_scale, + 'compute_dtype': compute_dtype, + }, + ) + return out, qkv, key_cache, value_cache diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py index 56a0d8a613be6..d80737b0646e3 100644 --- a/python/paddle/jit/api.py +++ b/python/paddle/jit/api.py @@ -23,10 +23,21 @@ import types import warnings from collections import OrderedDict +from collections.abc import Callable, Sequence from contextlib import contextmanager -from typing import Any +from types import ModuleType +from typing import ( + Any, + Protocol, + TypedDict, + TypeVar, + overload, +) + +from typing_extensions import Literal, NotRequired, ParamSpec, TypeAlias, Unpack import paddle +from paddle._typing import NestedSequence from paddle.base import core, dygraph from paddle.base.compiler import ( BuildStrategy, @@ -45,6 +56,7 @@ from paddle.base.wrapped_decorator import wrap_decorator from paddle.framework import use_pir_api from paddle.nn import Layer +from paddle.static import InputSpec from paddle.static.io import save_inference_model from paddle.utils.environments import ( BooleanEnvironmentVariable, @@ -71,6 +83,11 @@ ENV_ENABLE_SOT = BooleanEnvironmentVariable("ENABLE_FALL_BACK", True) +_LayerT = TypeVar("_LayerT", bound=Layer) +_RetT = TypeVar("_RetT") +_InputT = ParamSpec("_InputT") +Backends: TypeAlias = Literal["CINN"] + @contextmanager def sot_mode_guard(value: bool): @@ -98,13 +115,13 @@ def copy_decorator_attrs(original_func, decorated_obj): return decorated_obj -def ignore_module(modules: list[Any]): +def ignore_module(modules: list[ModuleType]) -> None: """ Adds modules that ignore transcription. Builtin modules that have been ignored are collections, pdb, copy, inspect, re, numpy, logging, six Args: - modules (List[Any]): Ignored modules that you want to add + modules (list[ModuleType]): Ignored modules that you want to add Examples: .. code-block:: python @@ -133,6 +150,67 @@ def _check_and_set_backend(backend, build_strategy): build_strategy.build_cinn_pass = True +class ToStaticOptions(TypedDict): + property: NotRequired[bool] + full_graph: NotRequired[bool] + + +class ToStaticDecorator(Protocol): + @overload + def __call__(self, function: _LayerT) -> _LayerT: + ... + + @overload + def __call__( + self, function: Callable[_InputT, _RetT] + ) -> StaticFunction[_InputT, _RetT]: + ... + + +@overload +def to_static( + function: _LayerT, + input_spec: NestedSequence[InputSpec] | None = ..., + build_strategy: BuildStrategy | None = ..., + backend: Backends | None = ..., + **kwargs: Unpack[ToStaticOptions], +) -> _LayerT: + ... + + +@overload +def to_static( + function: Callable[_InputT, _RetT], + input_spec: NestedSequence[InputSpec] | None = ..., + build_strategy: BuildStrategy | None = ..., + backend: Backends | None = ..., + **kwargs: Unpack[ToStaticOptions], +) -> StaticFunction[_InputT, _RetT]: + ... + + +@overload +def to_static( + function: Any, + input_spec: NestedSequence[InputSpec] | None = ..., + build_strategy: BuildStrategy | None = ..., + backend: Backends | None = ..., + **kwargs: Unpack[ToStaticOptions], +) -> Any: + ... + + +@overload +def to_static( + function: None = ..., + input_spec: NestedSequence[InputSpec] | None = ..., + build_strategy: BuildStrategy | None = ..., + backend: Backends | None = ..., + **kwargs: Unpack[ToStaticOptions], +) -> ToStaticDecorator: + ... + + def to_static( function=None, input_spec=None, @@ -254,6 +332,28 @@ def decorated(python_func): return decorated +class NotToStaticDecorator(Protocol): + @overload + def __call__( + self, func: Callable[_InputT, _RetT] + ) -> Callable[_InputT, _RetT]: + ... + + @overload + def __call__(self, func: None = ...) -> NotToStaticDecorator: + ... + + +@overload +def not_to_static(func: Callable[_InputT, _RetT]) -> Callable[_InputT, _RetT]: + ... + + +@overload +def not_to_static(func: None = ...) -> NotToStaticDecorator: + ... + + def not_to_static(func=None): """ A Decorator to suppresses the convention of a function. @@ -337,14 +437,12 @@ def output_spec(self, spec): return if not isinstance(spec, list): raise TypeError( - "The config `output_spec` should be 'list', but received input type is %s." - % type(input) + f"The config `output_spec` should be 'list', but received input type is {type(input)}." ) for var in spec: if not isinstance(var, core.eager.Tensor): raise TypeError( - "The element in config `output_spec` list should be 'Variable', but received element's type is %s." - % type(var) + f"The element in config `output_spec` list should be 'Variable', but received element's type is {type(var)}." ) self._output_spec = spec @@ -358,8 +456,7 @@ def model_filename(self, filename): return if not isinstance(filename, str): raise TypeError( - "The config `model_filename` should be str, but received input's type is %s." - % type(filename) + f"The config `model_filename` should be str, but received input's type is {type(filename)}." ) if len(filename) == 0: raise ValueError("The config `model_filename` is empty string.") @@ -375,8 +472,7 @@ def params_filename(self, filename): return if not isinstance(filename, str): raise TypeError( - "The config `params_filename` should be str, but received input's type is %s." - % type(filename) + f"The config `params_filename` should be str, but received input's type is {type(filename)}." ) if len(filename) == 0: raise ValueError("The config `params_filename` is empty string.") @@ -392,13 +488,22 @@ def keep_name_table(self, value): return if not isinstance(value, bool): raise TypeError( - "The config `keep_name_table` should be bool value, but received input's type is %s." - % type(value) + f"The config `keep_name_table` should be bool value, but received input's type is {type(value)}." ) self._keep_name_table = value -def _parse_save_configs(configs): +class _SaveLoadOptions(TypedDict): + output_spec: NotRequired[Sequence[InputSpec]] + with_hook: NotRequired[bool] + combine_params: NotRequired[bool] + clip_extra: NotRequired[bool] + skip_forward: NotRequired[bool] + input_names_after_prune: NotRequired[list[str]] + skip_prune_program: NotRequired[bool] + + +def _parse_save_configs(configs: _SaveLoadOptions): supported_configs = [ "output_spec", "with_hook", @@ -413,8 +518,7 @@ def _parse_save_configs(configs): for key in configs: if key not in supported_configs: raise ValueError( - "The additional config (%s) of `paddle.jit.save` is not supported." - % (key) + f"The additional config ({key}) of `paddle.jit.save` is not supported." ) # construct inner config @@ -439,8 +543,7 @@ def _parse_load_config(configs): for key in configs: if key not in supported_configs: raise ValueError( - "The additional config (%s) of `paddle.jit.load` is not supported." - % (key) + f"The additional config ({key}) of `paddle.jit.load` is not supported." ) # construct inner config @@ -554,7 +657,7 @@ def _get_output_vars(outputs, output_spec, with_hook=False): output_size = len(result_list) if len(output_spec) == output_size: for var in output_spec: - if not isinstance(var, paddle.pir.Value, int): + if not isinstance(var, (paddle.pir.Value, int)): warnings.warn(output_spec_is_not_value_error % var.name) else: if var not in ValueSet(result_list): @@ -636,9 +739,9 @@ def _build_load_path_and_config(path, config): ) elif not prefix_format_exist and not directory_format_exist: raise ValueError( - "The ``path`` (%s) to load model not exists. " + f"The ``path`` ({path}) to load model not exists. " "Please make sure that *.pdmodel exists or " - "don't using ``skip_forward=True`` to jit.save." % path + "don't using ``skip_forward=True`` to jit.save." ) else: if prefix_format_exist: @@ -802,7 +905,12 @@ def set_property(meta, key, val): @_run_save_pre_hooks @switch_to_static_graph -def save(layer, path, input_spec=None, **configs): +def save( + layer: Callable[_InputT, _RetT], + path: str, + input_spec: InputSpec | None = None, + **configs: Unpack[_SaveLoadOptions], +) -> None: """ Saves input Layer or function as ``paddle.jit.TranslatedLayer`` format model, which can be used for inference or fine-tuning after loading. @@ -954,8 +1062,7 @@ def save(layer, path, input_spec=None, **configs): isinstance(layer, (Layer, StaticFunction)) or inspect.isfunction(layer) ): raise TypeError( - "The input of paddle.jit.save should be 'Layer' or 'Function', but received input type is %s." - % type(layer) + f"The input of paddle.jit.save should be 'Layer' or 'Function', but received input type is {type(layer)}." ) elif inspect.isfunction(layer) or isinstance(layer, StaticFunction): warnings.warn( @@ -996,14 +1103,12 @@ def save(layer, path, input_spec=None, **configs): and 'forward' != attr_func ): raise ValueError( - "If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is %s." - % type(input_spec) + f"If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is {type(input_spec)}." ) if not isinstance(input_spec, (list, tuple)): raise TypeError( - "The input input_spec should be 'list', but received input_spec's type is %s." - % type(input_spec) + f"The input input_spec should be 'list', but received input_spec's type is {type(input_spec)}." ) inner_input_spec = [] for var in paddle.utils.flatten(input_spec): @@ -1372,7 +1477,9 @@ def save(layer, path, input_spec=None, **configs): @dygraph_only -def load(path, **configs): +def load( + path: str, **configs: Unpack[_SaveLoadOptions] +) -> TranslatedLayer | PirTranslatedLayer: """ :api_attr: imperative diff --git a/python/paddle/jit/dy2static/ast_utils.py b/python/paddle/jit/dy2static/ast_utils.py index fc703dd6f6e49..7c4c90ec44d0e 100644 --- a/python/paddle/jit/dy2static/ast_utils.py +++ b/python/paddle/jit/dy2static/ast_utils.py @@ -27,8 +27,7 @@ def ast_to_source_code(ast_node): """ if not isinstance(ast_node, (gast.AST, ast.AST)): raise TypeError( - "Type of ast_root should be gast.AST or ast.AST, but received %s." - % type(ast_node) + f"Type of ast_root should be gast.AST or ast.AST, but received {type(ast_node)}." ) if isinstance(ast_node, gast.AST): ast_node = gast.gast_to_ast(ast_node) diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py index 7ef8b4ce88736..10d2c9633ae80 100644 --- a/python/paddle/jit/dy2static/convert_operators.py +++ b/python/paddle/jit/dy2static/convert_operators.py @@ -615,8 +615,7 @@ def convert_len(var): return paddle.tensor.array_length(var) else: raise TypeError( - 'len(var) only supports LoDTensor/LoDTensorArray/SelectedRows, but received %s.' - % type(var) + f'len(var) only supports LoDTensor/LoDTensorArray/SelectedRows, but received {type(var)}.' ) elif isinstance(var, Value): if var.is_dense_tensor_type() or var.is_selected_row_type(): diff --git a/python/paddle/jit/dy2static/function_spec.py b/python/paddle/jit/dy2static/function_spec.py index 7d5605f547df8..ce0b8382e9d01 100644 --- a/python/paddle/jit/dy2static/function_spec.py +++ b/python/paddle/jit/dy2static/function_spec.py @@ -179,7 +179,7 @@ def pir_to_static_inputs_with_spec(self, input_with_spec, main_program): if isinstance(var_spec, paddle.static.InputSpec): stop_gradient = getattr(var_spec, 'stop_gradient', False) feed_value = paddle.static.input.data( - name=var_spec.name or "feed_%s" % i, + name=var_spec.name or f"feed_{i}", shape=var_spec.shape, dtype=convert_dtype(var_spec.dtype), ) @@ -232,7 +232,7 @@ def to_static_inputs_with_spec(self, input_with_spec, main_program): stop_gradient = getattr(var_spec, 'stop_gradient', False) feed_layer = block.create_var( # TODO(Aurelius84): consider a more elegant way to name this - name=var_spec.name or "feed_%s" % i, + name=var_spec.name or f"feed_{i}", shape=var_spec.shape, dtype=var_spec.dtype, is_data=True, diff --git a/python/paddle/jit/dy2static/logging_utils.py b/python/paddle/jit/dy2static/logging_utils.py index d9e20b2a81d5c..837c3efae442d 100644 --- a/python/paddle/jit/dy2static/logging_utils.py +++ b/python/paddle/jit/dy2static/logging_utils.py @@ -180,7 +180,7 @@ def _output_to_stdout(self, msg, *args): _TRANSLATOR_LOGGER = TranslatorLogger() -def set_verbosity(level=0, also_to_stdout=False): +def set_verbosity(level: int = 0, also_to_stdout: bool = False) -> None: """ Sets the verbosity level of log for dygraph to static graph. Logs can be output to stdout by setting `also_to_stdout`. @@ -215,11 +215,13 @@ def set_verbosity(level=0, also_to_stdout=False): _TRANSLATOR_LOGGER.need_to_echo_log_to_stdout = also_to_stdout -def get_verbosity(): +def get_verbosity() -> int: return _TRANSLATOR_LOGGER.verbosity_level -def set_code_level(level=LOG_AllTransformer, also_to_stdout=False): +def set_code_level( + level: int = LOG_AllTransformer, also_to_stdout: bool = False +) -> None: """ Sets the level to print code from specific level Ast Transformer. Code can be output to stdout by setting `also_to_stdout`. diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 8571740db2659..f4fc6ea387f97 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -1108,8 +1108,7 @@ def _check_params_all_inited(self, main_program): """ if not isinstance(self._params, (list, tuple)): raise TypeError( - "Type of self._params in PartialProgramLayer should be list or tuple, but received %s." - % type(self._params) + f"Type of self._params in PartialProgramLayer should be list or tuple, but received {type(self._params)}." ) param_and_buffer_names_set = set() @@ -1127,12 +1126,11 @@ def _check_params_all_inited(self, main_program): if name not in param_and_buffer_names_set: raise ValueError( "\n\tWe don't support to define layer with parameters in the function decorated by `@to_static`." - "\n\tBut we found parameter(%s) was created in the decorated function." + f"\n\tBut we found parameter({name}) was created in the decorated function." "\n" "\n\tRevise suggestion: " "\n\t\t1. Please ensure all your sublayers are inherited from nn.Layer." "\n\t\t2. Please use nn.ParameterList and nn.LayerList as container instead of using a native Python container such as List" - % name ) def _valid_vars(self, vars): diff --git a/python/paddle/jit/dy2static/pir_partial_program.py b/python/paddle/jit/dy2static/pir_partial_program.py index 55d8ab47e92a4..ff6ee46c8a1f9 100644 --- a/python/paddle/jit/dy2static/pir_partial_program.py +++ b/python/paddle/jit/dy2static/pir_partial_program.py @@ -1257,8 +1257,7 @@ def _check_params_all_inited(self, main_program): """ if not isinstance(self._params, (list, tuple)): raise TypeError( - "Type of self._params in PartialProgramLayer should be list or tuple, but received %s." - % type(self._params) + f"Type of self._params in PartialProgramLayer should be list or tuple, but received {type(self._params)}." ) param_and_buffer_names_set = set() diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py index ea4040485b64a..ac50ba8b5f50c 100644 --- a/python/paddle/jit/dy2static/program_translator.py +++ b/python/paddle/jit/dy2static/program_translator.py @@ -19,11 +19,14 @@ import threading import warnings import weakref -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar + +from typing_extensions import ParamSpec, Self import paddle import paddle.pir.core as ir_static from paddle import decomposition, get_flags +from paddle._typing import NestedSequence from paddle.base import core, framework from paddle.base.data_feeder import check_type from paddle.base.dygraph.base import ( @@ -35,6 +38,7 @@ from paddle.nn.layer import layers from paddle.pir import Value from paddle.pir.core import _convert_into_value, static_op_arg_cast_guard +from paddle.static import InputSpec, Program from paddle.utils import flatten, gast from . import error, logging_utils @@ -49,8 +53,9 @@ create_and_update_origin_info_map, update_op_callstack_with_origin_info, ) -from .partial_program import PartialProgramLayerHook +from .partial_program import PartialProgramLayer, PartialProgramLayerHook from .pir_partial_program import ( + PartialProgramLayer as PirPartialProgramLayer, PartialProgramLayerHook as PirPartialProgramLayerHook, ) from .transformers import DygraphToStaticAst @@ -72,6 +77,9 @@ if TYPE_CHECKING: from paddle.static.amp.fp16_utils import AmpOptions +_RetT = TypeVar("_RetT") +_InputT = ParamSpec("_InputT") + __all__ = [] # For each traced function, we set `max_traced_program_count` = 10 to consider caching performance. @@ -318,7 +326,7 @@ def unwrap_decorators(func): return decorators, cur -class StaticFunction: +class StaticFunction(Generic[_InputT, _RetT]): def __init__(self, function, input_spec=None, **kwargs): """ Initializes a `StaticFunction`. @@ -374,7 +382,7 @@ def __init__(self, function, input_spec=None, **kwargs): self._property = kwargs.get("property", False) self._get_debug_name() - def _get_debug_name(self): + def _get_debug_name(self) -> str: try: if self._class_instance: self._debug_name = self._class_instance.__class__.__name__ @@ -384,11 +392,11 @@ def _get_debug_name(self): self._debug_name = "static_function" @property - def is_property(self): + def is_property(self) -> bool: # whether is class proproty to be exported. return self._property - def train(self): + def train(self) -> None: if ( isinstance(self._class_instance, layers.Layer) and self._class_instance.training is False @@ -399,7 +407,7 @@ def train(self): ) self._training = True - def eval(self): + def eval(self) -> None: if ( isinstance(self._class_instance, layers.Layer) and self._class_instance.training is True @@ -452,12 +460,12 @@ def forward(self, x, y): return self._descriptor_cache[instance] - def _clone(self): + def _clone(self) -> Self: return self.__class__( self.dygraph_function, self._input_spec, **self._kwargs ) - def __call__(self, *args, **kwargs): + def __call__(self, *args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT: """ Supports to call the returned instance with input `args` and `kwargs` directly. @@ -493,7 +501,7 @@ def __call__(self, *args, **kwargs): return self._perform_call(*args, **kwargs) - def _is_train_mode(self): + def _is_train_mode(self) -> bool: if self._class_instance is not None: if not hasattr(self._class_instance, 'training'): raise TypeError( @@ -504,7 +512,9 @@ def _is_train_mode(self): else: return self._training - def _call_dygraph_function(self, *args, **kwargs): + def _call_dygraph_function( + self, *args: _InputT.args, **kwargs: _InputT.kwargs + ) -> _RetT: """ Calls dygraph function directly and returns the outputs. @@ -526,7 +536,9 @@ def _raise_when_property(self): if self.is_property: raise RuntimeError("Can not call the func when property=True.") - def get_concrete_program(self, *args, **kwargs): + def get_concrete_program( + self, *args: _InputT.args, **kwargs: _InputT.kwargs + ) -> tuple[ConcreteProgram, PartialProgramLayer | PirPartialProgramLayer]: raise NotImplementedError("Not implemented yet.") def get_concrete_program_with_cache_key(self, cached_key): @@ -536,11 +548,11 @@ def get_traced_count(self): raise NotImplementedError("Not implemented yet.") @property - def code(self): + def code(self) -> str: raise NotImplementedError("Not implemented yet.") @property - def dygraph_function(self): + def dygraph_function(self) -> Callable[_InputT, _RetT]: """ Returns the original decorated function. """ @@ -550,15 +562,18 @@ def dygraph_function(self): return self._dygraph_function @property - def concrete_program(self): + def concrete_program(self) -> ConcreteProgram: raise NotImplementedError("Not implemented yet.") def concrete_program_specify_input_spec( - self, input_spec=None, with_hook=False, is_prim_infer=False + self, + input_spec: NestedSequence[InputSpec] | None = None, + with_hook: bool = False, + is_prim_infer: bool = False, ): raise NotImplementedError("Not implemented yet.") - def rollback(self): + def rollback(self) -> Callable[_InputT, _RetT]: """ Rollback into original dygraph functions for current class instance. @@ -662,23 +677,23 @@ def __deepcopy__(self, memo): return self._dygraph_function @property - def inputs(self): + def inputs(self) -> list[Any]: raise NotImplementedError("Not implemented yet.") @property - def outputs(self): + def outputs(self) -> list[Any]: raise NotImplementedError("Not implemented yet.") @property - def main_program(self): + def main_program(self) -> Program: raise NotImplementedError("Not implemented yet.") @property - def program_cache(self): + def program_cache(self) -> ProgramCache: raise NotImplementedError("Not implemented yet.") @property - def function_spec(self): + def function_spec(self) -> FunctionSpec: raise NotImplementedError("Not implemented yet.") @@ -762,10 +777,10 @@ def program_cache(self): @property def function_spec(self): - raise_error_template("function_spec ")() + raise_error_template("function_spec")() -class ASTStaticFunction(StaticFunction): +class ASTStaticFunction(StaticFunction[_InputT, _RetT]): """ Wrapper class to Manage program conversion of decorated function. @@ -812,7 +827,9 @@ def _perform_call(self, *args, **kwargs): ) raise e - def get_concrete_program(self, *args, **kwargs): + def get_concrete_program( + self, *args: _InputT.args, **kwargs: _InputT.kwargs + ) -> tuple[ConcreteProgram, PartialProgramLayer | PirPartialProgramLayer]: """ Returns traced concrete program and inner executable partial layer. @@ -867,7 +884,9 @@ def get_concrete_program(self, *args, **kwargs): partial_program_layer._debug_name = self._debug_name return concrete_program, partial_program_layer - def get_concrete_program_with_cache_key(self, cached_key): + def get_concrete_program_with_cache_key( + self, cached_key: CacheKey + ) -> tuple[ConcreteProgram, PartialProgramLayer | PirPartialProgramLayer]: """ Returns traced concrete program and inner executable partial layer by cached key. @@ -884,14 +903,14 @@ def get_concrete_program_with_cache_key(self, cached_key): ) = self._program_cache.get_program_without_cache(cached_key) return concrete_program, partial_program_layer - def get_traced_count(self): + def get_traced_count(self) -> int: """ Returns the number of traced programs for the decorated function. """ return len(self._program_cache) @property - def code(self): + def code(self) -> str: """ Returns the source code of transformed static function for debugging. """ @@ -900,7 +919,7 @@ def code(self): return source_code @property - def concrete_program(self): + def concrete_program(self) -> ConcreteProgram: """ Returns recent ConcreteProgram instance of decorated function. @@ -930,8 +949,11 @@ def concrete_program(self): return self.concrete_program_specify_input_spec(input_spec=None) def concrete_program_specify_input_spec( - self, input_spec=None, with_hook=False, is_prim_infer=False - ): + self, + input_spec: NestedSequence[InputSpec] | None = None, + with_hook: bool = False, + is_prim_infer: bool = False, + ) -> ConcreteProgram: """ Returns recent ConcreteProgram instance of decorated function while specifying input_spec. If the self._function_spec already has @@ -1006,7 +1028,7 @@ def concrete_program_specify_input_spec( ) @property - def inputs(self): + def inputs(self) -> list[Any]: """ Returns input tensors of recent converted static program. """ @@ -1020,7 +1042,7 @@ def inputs(self): return inputs @property - def outputs(self): + def outputs(self) -> list[Any]: """ Returns output tensors of recent converted static program. """ @@ -1035,7 +1057,7 @@ def outputs(self): return outputs @property - def main_program(self): + def main_program(self) -> Program: """ Returns recent converted static main program. """ @@ -1045,11 +1067,11 @@ def main_program(self): return main_program @property - def program_cache(self): + def program_cache(self) -> ProgramCache: return self._program_cache @property - def function_spec(self): + def function_spec(self) -> FunctionSpec: return self._function_spec @@ -1597,8 +1619,7 @@ def _build_once(self, cache_key): def __getitem__(self, item): if not isinstance(item, CacheKey): raise ValueError( - 'type(item) should be CacheKey, but received %s' - % type_name(item) + f'type(item) should be CacheKey, but received {type_name(item)}' ) item_id = hash(item) self._recent_cache_key = item @@ -1621,8 +1642,7 @@ def get_program_without_cache(self, cache_key): def get_program(self, item): if not isinstance(item, CacheKey): raise ValueError( - "Input item's type should be FunctionSpec, but received %s" - % type_name(item) + f"Input item's type should be FunctionSpec, but received {type_name(item)}" ) item_id = hash(item) if item_id not in self._caches: @@ -1757,7 +1777,7 @@ def enable(self, enable_to_static): self.enable_to_static = enable_to_static -def enable_to_static(enable_to_static_bool): +def enable_to_static(enable_to_static_bool: bool) -> None: """ Enable or disable the converting from imperative to static graph by ProgramTranslator globally. diff --git a/python/paddle/jit/dy2static/transformers/early_return_transformer.py b/python/paddle/jit/dy2static/transformers/early_return_transformer.py index 4dab1e5ab1638..ce8cf9e606878 100644 --- a/python/paddle/jit/dy2static/transformers/early_return_transformer.py +++ b/python/paddle/jit/dy2static/transformers/early_return_transformer.py @@ -36,9 +36,7 @@ def transform(self): def is_define_return_in_if(self, node): assert isinstance( node, gast.If - ), "Type of input node should be gast.If, but received %s ." % type( - node - ) + ), f"Type of input node should be gast.If, but received {type(node)}." for child in node.body: if isinstance(child, gast.Return): return True diff --git a/python/paddle/jit/dy2static/utils.py b/python/paddle/jit/dy2static/utils.py index ad195befba4b5..03a2cd06d3211 100644 --- a/python/paddle/jit/dy2static/utils.py +++ b/python/paddle/jit/dy2static/utils.py @@ -204,7 +204,7 @@ def make_hashable(x, error_msg=None): return tuple(map(make_hashable, x.values())) error_msg = error_msg or "Requires a hashable object." - raise ValueError(error_msg + " But received type: %s" % type_name(x)) + raise ValueError(f"{error_msg} But received type: {type_name(x)}") return x @@ -327,8 +327,7 @@ def func_prefix(func): callable_func = getattr(module, func_name) else: raise ValueError( - 'Function: %s doesn\'t exist in the Module transformed from AST.' - % func_name + f'Function: {func_name} doesn\'t exist in the Module transformed from AST.' ) # After transform dygraph function into callable_func saved in tmp file, # it lost the global variables from imported statements or defined in source file. diff --git a/python/paddle/jit/pir_translated_layer.py b/python/paddle/jit/pir_translated_layer.py index 8a6e3ede35e2a..df3217ceb07b3 100644 --- a/python/paddle/jit/pir_translated_layer.py +++ b/python/paddle/jit/pir_translated_layer.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import numpy as np @@ -217,7 +219,6 @@ def _load_pir_parameter_vars(model_path, program_holder, params_filename): # load all vars assert params_filename is not None, "params_filename should not be None." var_file_path = os.path.join(model_path, params_filename) - if os.path.exists(var_file_path): core.load_combine_func( var_file_path, @@ -228,8 +229,7 @@ def _load_pir_parameter_vars(model_path, program_holder, params_filename): ) else: raise ValueError( - "The file %s does not exist. Please check the model path." - % var_file_path + f"The file {var_file_path} does not exist. Please check the model path." ) load_var_dict.update(other_var_dict) @@ -328,8 +328,7 @@ def _run_dygraph(instance, input, program_holder): for i, value in enumerate(input): if not isinstance(value, (np.ndarray, core.eager.Tensor)): raise TypeError( - "The type of input in PirTranslatedLayer must be numpy array or Variable(Tensor), but received %s." - % type(value) + f"The type of input in PirTranslatedLayer must be numpy array or Variable(Tensor), but received {type(value)}." ) # NOTE: In order to unify the API, firstly convert the input to Tensor if isinstance(value, np.ndarray): @@ -361,8 +360,7 @@ def _run_dygraph(instance, input, program_holder): persistable_tensors.append(instance._buffers[dy_var_name]) else: raise ValueError( - "The persistable variable %s does not exist in current PirTranslatedLayer." - % var_name + f"The persistable variable {var_name} does not exist in current PirTranslatedLayer." ) from paddle.jit.dy2static.pir_partial_program import PartialProgramLayer @@ -378,7 +376,6 @@ def _run_dygraph(instance, input, program_holder): parameters, ) instance.layer = layer - if instance._is_test: layer.training = False else: @@ -392,9 +389,42 @@ def _run_dygraph(instance, input, program_holder): return instance.layer(input_tensors) -def _run_static_graph(program_holder, trace_program): - paddle.base.framework.switch_main_program(trace_program) - return program_holder.output_vars +def _run_static_graph(inputs, program_holder, src_program): + ''' + This function is used when the pirTranslatedLayer is + applied for dy_to_static conversion. + ''' + dst_program = paddle.static.default_main_program() + value_map = paddle.pir.IrMapping() + # Establish a mapping relationship between existing parameters + # and corresponding parameters in the program to be copied + len_dst_op = len(dst_program.global_block().ops) + for dst_op in dst_program.global_block().ops: + if dst_op.name() == "builtin.parameter": + for src_op in src_program.global_block().ops[:len_dst_op]: + if ( + src_op.name() == dst_op.name() + and src_op.result(0).name == dst_op.result(0).name + ): + for i in range(src_op.num_results()): + value_map.add(src_op.result(i), dst_op.result(i)) + # Establish a mapping relationship between truly inputs + # and corresponding inputs in the program to be copied + src_inputs = program_holder.input_vars + if len(src_inputs) != len(inputs): + raise ValueError( + f"The number of input is invalid, expected {len(src_inputs)}, but received {len(inputs)}." + ) + for src_input, input_ in zip(src_inputs, inputs): + value_map.add(src_input, input_) + + # find the insert point for copy + current_insert_point = paddle.pir.get_current_insertion_point() + current_block = current_insert_point.block() + src_program.copy_to_block(value_map, current_block) + + output = [value_map.look_up(v) for v in program_holder.output_vars] + return output[0] if len(output) == 1 else output def _collect_current_and_parent_var(program, block_idx): @@ -514,7 +544,11 @@ class PirTranslatedLayer(layers.Layer): """ - def __init__(self, programs, persistable_vars): + def __init__( + self, + programs: dict[str, paddle.static.Program], + persistable_vars: dict[str, paddle.Tensor], + ): super().__init__() if not isinstance(programs, dict): @@ -561,7 +595,7 @@ def _construct(model_path, configs=None): # 0. dir and filename check model_path = os.path.normpath(model_path) if not os.path.isdir(model_path): - raise ValueError("There is no directory named '%s'" % model_path) + raise ValueError(f"There is no directory named '{model_path}'") model_filename = None params_filename = None if configs is not None: @@ -608,7 +642,7 @@ def __i_m_p_l__(self, *input): return _run_dygraph(self, input, program_holder) else: return _run_static_graph( - program_holder, program_holder.infer_program + input, program_holder, program_holder.infer_program ) __i_m_p_l__.__name__ = method_name @@ -719,8 +753,7 @@ def _get_program_holder(self, method_name='forward'): program_holder = self._program_holder_dict.get(method_name, None) if program_holder is None: raise ValueError( - "The method `%s` does not exist in loaded PirTranslatedLayer." - % method_name + f"The method `{method_name}` does not exist in loaded PirTranslatedLayer." ) return program_holder diff --git a/python/paddle/jit/sot/infer_meta.py b/python/paddle/jit/sot/infer_meta.py index 3ec9f0d891c9e..a67b10c27105f 100644 --- a/python/paddle/jit/sot/infer_meta.py +++ b/python/paddle/jit/sot/infer_meta.py @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations from functools import cached_property +from typing import TypeVar import paddle from paddle.amp.auto_cast import amp_state @@ -26,10 +28,32 @@ from .utils import Cache, Singleton, map_if_extend, meta_str +DynamicSymbolT = TypeVar("DynamicSymbolT") + + +class SymbolicInt(metaclass=Singleton): + def __eq__(self, other) -> bool: + return isinstance(other, (int, SymbolicInt)) + + def __repr__(self) -> str: + return "SymbolicInt()" + + def __str__(self) -> str: + return "SymbolicInt()" + class MetaInfo: def __init__( - self, shape, dtype, stop_gradient, name, persistable, type, place + self, + shape, + dtype, + stop_gradient, + name, + persistable, + type, + place, + *, + dynamic_axes: list[int] | None = None, ): self.name = name self.persistable = persistable @@ -38,9 +62,18 @@ def __init__( self.shape = shape self.dtype = dtype self.stop_gradient = stop_gradient + self.dynamic_axes = dynamic_axes or [] + + def get_dynamic_shape( + self, dynamic_symbol: DynamicSymbolT = -1 + ) -> list[int | DynamicSymbolT]: + return [ + dim if i not in self.dynamic_axes else dynamic_symbol + for i, dim in enumerate(self.shape) + ] @staticmethod - def from_tensor(tensor): + def from_tensor(tensor, *, dynamic_axes: list[int] | None = None): if isinstance(tensor, paddle.pir.Value): name = "Value@NoName" else: # For Tensor or Variable @@ -54,6 +87,7 @@ def from_tensor(tensor): ) assert isinstance(dtype, expected_dtype_class) + # TODO(@xiongkun) remove after pir become default state. # We always use float32 in simulation if AMP is enabled. current_amp_state = amp_state() if ( @@ -63,7 +97,12 @@ def from_tensor(tensor): and current_amp_state["dtype"] == "float16" ): dtype = paddle.float32 - # TODO(@xiongkun) remove after pir become default state. + dynamic_axes = dynamic_axes or [] + dynamic_axes = [ + i + for i, dim in enumerate(tensor.shape) + if dim == -1 or i in dynamic_axes + ] return MetaInfo( list(tensor.shape), dtype, @@ -72,6 +111,7 @@ def from_tensor(tensor): persistable, tensor.type, tensor.place, + dynamic_axes=dynamic_axes, ) def is_dynamic_shape(self): @@ -82,12 +122,14 @@ def is_dynamic_shape(self): return -1 in self.shape def to_input_spec(self): + shape = self.get_dynamic_shape(None) return paddle.static.InputSpec( - self.shape, dtype=self.dtype, stop_gradient=self.stop_gradient + shape, dtype=self.dtype, stop_gradient=self.stop_gradient ) def guard_str(self): - return f"({self.shape}, {self.dtype}, {self.stop_gradient})" + shape = self.get_dynamic_shape(SymbolicInt()) + return f"({shape}, {self.dtype}, {self.stop_gradient})" def __repr__(self): return meta_str(self.shape, self.dtype, self.stop_gradient) @@ -161,20 +203,22 @@ def startup_program(self): else: return self.legacy_programs[1] - def create_var(self, meta): + def create_var(self, meta: MetaInfo): + shape = meta.get_dynamic_shape() + if paddle.framework.use_pir_api(): with paddle.static.program_guard( self.main_program, self.startup_program ): var = paddle.static.input.data( name=self.gen_name(meta), - shape=meta.shape, + shape=shape, dtype=convert_dtype(meta.dtype), ) var.stop_gradient = meta.stop_gradient else: var = self.main_program.global_block().create_var( - shape=meta.shape, + shape=shape, dtype=meta.dtype, stop_gradient=meta.stop_gradient, ) @@ -193,9 +237,10 @@ def infer_meta(self, func, *args, **kwargs): with paddle.base.framework._dygraph_guard(None), UniqueNameGuard( self.var_name_generator ): - args, kwargs = convert_meta_to_variable( - args - ), convert_meta_to_variable(kwargs) + args, kwargs = ( + convert_meta_to_variable(args), + convert_meta_to_variable(kwargs), + ) with paddle.static.program_guard( self.main_program, self.startup_program @@ -225,9 +270,11 @@ def convert_meta_to_input_spec(args): pred=lambda x: isinstance(x, MetaInfo), true_fn=lambda x: x.to_input_spec(), # TODO(xiongkun): can x be tensor ? - false_fn=lambda x: paddle.static.InputSpec.from_tensor(x) - if isinstance(x, paddle.Tensor) - else x, + false_fn=lambda x: ( + paddle.static.InputSpec.from_tensor(x) + if isinstance(x, paddle.Tensor) + else x + ), ) diff --git a/python/paddle/jit/sot/opcode_translator/executor/executor_cache.py b/python/paddle/jit/sot/opcode_translator/executor/executor_cache.py index f94884d0c118b..bbefddda639ad 100644 --- a/python/paddle/jit/sot/opcode_translator/executor/executor_cache.py +++ b/python/paddle/jit/sot/opcode_translator/executor/executor_cache.py @@ -56,12 +56,16 @@ class OpcodeExecutorCache(metaclass=Singleton): MAX_CACHE_SIZE = 20 cache: dict[types.CodeType, GuardedFunctions] translate_count: int - symbolic_inputs: dict[str, dict[int, int]] + code_symbolic_inputs: dict[types.CodeType, dict[str, dict[int, int]]] def __init__(self): self.cache = {} self.translate_count = 0 - self.symbolic_inputs = {} + self.code_symbolic_inputs = {} + + def get_symbolic_inputs(self, code: types.CodeType): + self.code_symbolic_inputs.setdefault(code, {}) + return self.code_symbolic_inputs[code] def clear(self): """ @@ -69,6 +73,7 @@ def clear(self): """ self.cache.clear() self.translate_count = 0 + self.code_symbolic_inputs.clear() def __call__(self, frame: types.FrameType, **kwargs) -> CustomCode: code: types.CodeType = frame.f_code diff --git a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py index 99ea75ebbcd48..93de3c8dfe815 100644 --- a/python/paddle/jit/sot/opcode_translator/executor/function_graph.py +++ b/python/paddle/jit/sot/opcode_translator/executor/function_graph.py @@ -22,9 +22,9 @@ from collections import namedtuple from copy import deepcopy from functools import cached_property -from typing import Any, Callable +from typing import Any, Callable, Tuple, Union -from typing_extensions import TypeGuard +from typing_extensions import TypeAlias, TypeGuard import paddle from paddle.jit.utils import OrderedSet @@ -37,7 +37,7 @@ ast_infer_meta, ) from ...profiler import EventGuard, event_register -from ...symbolic.statement_ir import Reference, Symbol +from ...symbolic.statement_ir import Reference, StatementIR, Symbol from ...symbolic.symbolic_context import SymbolicTraceContext from ...utils import ( NameGenerator, @@ -81,6 +81,15 @@ map_variables, ) +CompileGraphResult: TypeAlias = Tuple[ + Callable[..., Any], + Tuple[ + StatementIR, + OrderedSet[Union[TensorVariable, SymbolicVariable]], + OrderedSet[Union[TensorVariable, SymbolicVariable]], + ], +] + def convert_to_meta(inputs: Any): """ @@ -329,7 +338,7 @@ def _restore_origin_opcode(self, stack_vars, store_var_info, instr_idx): self.pycode_gen.gen_enable_eval_frame() - name_gen = NameGenerator("__start_compile_saved_orig_") + name_gen = NameGenerator("___compile_fn_saved_orig_") # here is not update changed values, it just give names to stack vars # and want keep same interface as _build_compile_fn_with_name_store @@ -344,13 +353,18 @@ def _restore_origin_opcode(self, stack_vars, store_var_info, instr_idx): return VariableLoader(store_var_info, self.pycode_gen) - def _build_compile_fn_with_name_store(self, to_store_vars, store_var_info): + def _build_compile_fn_with_name_store( + self, + compile_graph_result: CompileGraphResult, + to_store_vars, + store_var_info, + ): # var_id -> local_name mapping to_store_vars = list( filter(lambda x: not isinstance(x, NullVariable), to_store_vars) ) - self.start_compile(*to_store_vars) - name_gen = NameGenerator("__start_compile_saved_") + self.compile_function(compile_graph_result, to_store_vars) + name_gen = NameGenerator("___compile_fn_saved_") for var in to_store_vars[::-1]: if store_var_info[var.id] is None: @@ -363,23 +377,38 @@ def _build_compile_fn_with_name_store(self, to_store_vars, store_var_info): return VariableLoader(store_var_info, self.pycode_gen) - def get_compiled_fn(self, *ret_vars): + def compile_graph(self, *ret_vars: VariableBase) -> CompileGraphResult: ret_items = [ ret_item for ret_var in ret_vars for ret_item in ret_var.flatten_items() ] - tensor_items = self._find_tensor_outputs(ret_items) - compiled_fn, _ = self.sir_ctx.compile_fn( - [Symbol(tensor_var.var_name) for tensor_var in tensor_items], + symbolic_outputs = self._find_tensor_outputs(ret_items) + statement_ir = self.sir_ctx.return_TOS( + [Symbol(tensor_var.var_name) for tensor_var in symbolic_outputs] + ) + if not statement_ir.statements: + return self.sir_ctx.compile_do_nothing(), ( + statement_ir, + OrderedSet(), + OrderedSet(), + ) + input_names = statement_ir.inputs + symbolic_inputs = self._find_tensor_inputs(input_names) + compiled_fn = self.sir_ctx.compile_fn( + statement_ir.name, + [var.meta.to_input_spec() for var in symbolic_inputs], **self._kwargs, ) + return compiled_fn, (statement_ir, symbolic_inputs, symbolic_outputs) - return compiled_fn - - @event_register("start_compile", event_level=2) - def start_compile(self, *ret_vars: VariableBase): + @event_register("compile_function", event_level=2) + def compile_function( + self, + compile_graph_result: CompileGraphResult, + ret_vars: list[VariableBase], + ): """ Generate bytecode based on the information collected by the simulation execution. @@ -393,48 +422,24 @@ def start_compile(self, *ret_vars: VariableBase): """ from ..breakpoint import BreakpointManager - BreakpointManager().on_event("start_compile") - - ret_items = [ - ret_item - for ret_var in ret_vars - for ret_item in ret_var.flatten_items() - ] - - tensor_items = self._find_tensor_outputs(ret_items) - compiled_fn, statement_ir = self.sir_ctx.compile_fn( - [Symbol(tensor_var.var_name) for tensor_var in tensor_items], - **self._kwargs, - ) - input_names = statement_ir.inputs - compiled_fn_name = f"__compiled_fn_{statement_ir.name}" + BreakpointManager().on_event("compile_function") + graph_fn, ( + statement_ir, + symbolic_inputs, + symbolic_outputs, + ) = compile_graph_result + compiled_fn_name = f"___graph_fn_{statement_ir.name}" # prepare function and inputs - self.pycode_gen.gen_load_object(compiled_fn, compiled_fn_name) - for name in input_names: - found = False - for variable in self.input_variables: - if ( - isinstance(variable, (TensorVariable, SymbolicVariable)) - and variable.get_symbol().name == name - ): - if isinstance(variable, SymbolicVariable): - self.pycode_gen.gen_load_object( - paddle.to_tensor, "___paddle_to_tensor" - ) - variable.tracker.gen_instructions(self.pycode_gen) - found = True - if isinstance(variable, SymbolicVariable): - self.pycode_gen.gen_call_function(1) - break - assert found, f"can't find input {name} in SIR." + self.pycode_gen.gen_load_object(graph_fn, compiled_fn_name) + self.gen_load_inputs(symbolic_inputs) # Pack all args into a tuple, because we don't support *args now. - self.pycode_gen.gen_build_tuple(count=len(input_names)) - # call the compiled_fn + self.pycode_gen.gen_build_tuple(count=len(symbolic_inputs)) + # call the graph_fn self.pycode_gen.gen_call_function(argc=1) # Store outputs to f_locals - self.pycode_gen.gen_unpack_sequence(count=len(tensor_items)) - for tensor_var in tensor_items: + self.pycode_gen.gen_unpack_sequence(count=len(symbolic_outputs)) + for tensor_var in symbolic_outputs: self.pycode_gen.gen_store_fast(tensor_var.out_var_name) # restore the outputs. for ret_var in ret_vars: @@ -725,6 +730,36 @@ def remove_global_guarded_variable(self, variable: VariableBase): if variable in self._global_guarded_variables: self._global_guarded_variables.remove(variable) + def _find_tensor_inputs( + self, input_names: list[str] + ) -> OrderedSet[TensorVariable | SymbolicVariable]: + inputs: OrderedSet[TensorVariable | SymbolicVariable] = OrderedSet() + for name in input_names: + found = False + for variable in self.input_variables: + if ( + isinstance(variable, (TensorVariable, SymbolicVariable)) + and variable.get_symbol().name == name + ): + inputs.add(variable) + found = True + break + assert found, f"can't find input {name} in SIR." + assert len(inputs) == len(input_names), "Number of inputs not match." + return inputs + + def gen_load_inputs( + self, inputs: OrderedSet[TensorVariable | SymbolicVariable] + ): + for input_var in inputs: + if isinstance(input_var, SymbolicVariable): + self.pycode_gen.gen_load_object( + paddle.to_tensor, "___paddle_to_tensor" + ) + input_var.tracker.gen_instructions(self.pycode_gen) + if isinstance(input_var, SymbolicVariable): + self.pycode_gen.gen_call_function(1) + def _find_tensor_outputs( self, outputs: list[VariableBase] ) -> OrderedSet[TensorVariable | SymbolicVariable]: @@ -738,12 +773,14 @@ def _find_tensor_outputs( def is_graph_output( var, ) -> TypeGuard[TensorVariable | SymbolicVariable]: - return isinstance(var.tracker, DummyTracker) and isinstance( - var, (TensorVariable, SymbolicVariable) - ) + return isinstance( + var.tracker, (DummyTracker, SymbolicOperationTracker) + ) and isinstance(var, (TensorVariable, SymbolicVariable)) def collect_related_dummy_tensor(var): - if isinstance(var.tracker, DummyTracker): + if isinstance( + var.tracker, (DummyTracker, SymbolicOperationTracker) + ): if is_graph_output(var): return [var] else: @@ -758,7 +795,9 @@ def collect_related_dummy_tensor(var): ] = OrderedSet() # Find Tensor Variables from outputs. for output in outputs: - if isinstance(output.tracker, DummyTracker): + if isinstance( + output.tracker, (DummyTracker, SymbolicOperationTracker) + ): if is_graph_output(output): output_tensors.add(output) else: @@ -809,7 +848,7 @@ def restore_print_stmts(self, variables: list[VariableBase]): add_to_global_guarded_vars=False, ) - def restore_inplace_tensor(self, variables: list[VariableBase]): + def restore_inplace_tensor(self, variables: OrderedSet[VariableBase]): for var in variables: if not var.tracker.is_traceable(): continue diff --git a/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py b/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py index 70870913a6a02..3146609a595b0 100644 --- a/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py +++ b/python/paddle/jit/sot/opcode_translator/executor/opcode_executor.py @@ -1737,11 +1737,12 @@ def RETURN_CONST(self, instr: Instruction): return self.compile_return(ret_const) def compile_return(self, ret_val): - compile_fn = self._graph.get_compiled_fn(ret_val) - if compile_fn.graph_size() < ENV_MIN_GRAPH_SIZE.get(): + compile_graph_result = self._graph.compile_graph(ret_val) + graph_fn, _ = compile_graph_result + if graph_fn.graph_size() < ENV_MIN_GRAPH_SIZE.get(): self.new_code = None else: - self._graph.start_compile(ret_val) + self._graph.compile_function(compile_graph_result, [ret_val]) self._graph.pycode_gen.gen_return() self.new_code = self._graph.pycode_gen.gen_pycode() self.guard_fn = self._graph.guard_fn @@ -1775,15 +1776,16 @@ def get_compute_fn_and_update_changed_vars( store_vars.append(_var) store_var_info[_var.id] = name - compile_fn = self._graph.get_compiled_fn(*store_vars) + compile_graph_result = self._graph.compile_graph(*store_vars) + graph_fn, _ = compile_graph_result - if compile_fn.graph_size() < ENV_MIN_GRAPH_SIZE.get(): + if graph_fn.graph_size() < ENV_MIN_GRAPH_SIZE.get(): return self._graph._restore_origin_opcode( list(stack), store_var_info, end_idx ) else: return self._graph._build_compile_fn_with_name_store( - store_vars, store_var_info + compile_graph_result, store_vars, store_var_info ) @fallback_when_occur_error diff --git a/python/paddle/jit/sot/opcode_translator/executor/tracker.py b/python/paddle/jit/sot/opcode_translator/executor/tracker.py index 41ce17dba7cbc..85a7f68f6847a 100644 --- a/python/paddle/jit/sot/opcode_translator/executor/tracker.py +++ b/python/paddle/jit/sot/opcode_translator/executor/tracker.py @@ -127,7 +127,7 @@ def need_guard(self) -> bool: return False -class SymbolicOperationTracker(DummyTracker): +class SymbolicOperationTracker(Tracker): """ SymbolicOperationTracker is a subclass of Tracker that specifically tracks variables cannot be reproduced from the frame. It is mostly generated by complex operations of symbolic variables. @@ -151,6 +151,14 @@ def trace_value_from_frame(self): def __repr__(self) -> str: return f"SymbolicOperationTracker(num_inputs={len(self.inputs)})" + def is_traceable(self): + # TODO(zrr1999): to implement gen_instructions and trace_value_from_frame + return False + + def need_guard(self) -> bool: + # TODO(zrr1999): to implement gen_instructions and trace_value_from_frame + return False + class DanglingTracker(Tracker): """ diff --git a/python/paddle/jit/sot/opcode_translator/executor/variables/basic.py b/python/paddle/jit/sot/opcode_translator/executor/variables/basic.py index 965b7edba28ed..ffec4b1485cb6 100644 --- a/python/paddle/jit/sot/opcode_translator/executor/variables/basic.py +++ b/python/paddle/jit/sot/opcode_translator/executor/variables/basic.py @@ -90,6 +90,8 @@ core.DataType.BOOL: "bool", } +STATIC_DIM_FREQ_THRESHOLD = 5 + class ConstantVariable(VariableBase): """ @@ -174,24 +176,6 @@ def chr(self): DummyTracker([self]), ) - @check_guard - def make_stringify_guard(self) -> list[StringifyExpression]: - if ( - ENV_SOT_ALLOW_DYNAMIC_SHAPE.get() - and isinstance(self.value, int) - and self.tracker.need_guard() - ): - from ..executor_cache import OpcodeExecutorCache - - frame_value_tracer = self.tracker.trace_value_from_frame() - symbolic_inputs = OpcodeExecutorCache().symbolic_inputs - symbolic_inputs.setdefault(frame_value_tracer.inlined_expr, {}) - symbolic_input = symbolic_inputs[frame_value_tracer.inlined_expr] - symbolic_input.setdefault(self.value, 0) - symbolic_input[self.value] += 1 - - return super().make_stringify_guard() - @VariableFactory.register_from_value() def from_value(value: Any, graph: FunctionGraph, tracker: Tracker): if type(value) in ConstTypes: @@ -349,10 +333,24 @@ def __init__( raise InnerError( f"Required type(tensor) is paddle.Tensor or ProxyTensor, but received {type(tensor).__name__}." ) + dynamic_axes: list[int] = [] + if ENV_SOT_ALLOW_DYNAMIC_SHAPE.get() and self.tracker.is_traceable(): + dynamic_axes = self.analyse_dynamic_axes() + self.meta.dynamic_axes = dynamic_axes self.origin_meta = self.meta self.var_name = TensorVariable.var_name_generator.next() self.graph.side_effects.record_mutable_variable(self) + def analyse_dynamic_axes(self): + shape_dims = ( + self.shape.proxy.get_all() + ) # Trigger convert all shape dims to Variable + return [ + i + for i, dim in enumerate(shape_dims) + if isinstance(dim, SymbolicVariable) + ] + def __len__(self): if self.meta.shape[0] == -1: raise BreakGraphError( @@ -399,9 +397,13 @@ def _reconstruct(self, codegen: PyCodeGen): def make_stringify_guard(self) -> list[StringifyExpression]: frame_value_tracer = self.tracker.trace_value_from_frame() + if ENV_SOT_ALLOW_DYNAMIC_SHAPE.get(): + str_left_expr = f"MetaInfo.from_tensor({{}}, dynamic_axes={self.meta.dynamic_axes}).guard_str()" + else: + str_left_expr = "MetaInfo.from_tensor({}).guard_str()" return [ StringifyExpression( - f"MetaInfo.from_tensor({{}}).guard_str() == '{self.origin_meta.guard_str()}'", + f"{str_left_expr} == '{self.origin_meta.guard_str()}'", [frame_value_tracer], union_free_vars( {"MetaInfo": MetaInfo}, @@ -483,15 +485,15 @@ def size(self): @tensor_property def shape(self): + # TODO(zrr1999): support more tensor properties if self.meta.is_dynamic_shape(): raise BreakGraphError( f"Getting shape for a dynamic shape tensor causes graph break. shape = {self.meta.shape}" ) from .container import ListVariable - return ListVariable( - self.meta.shape, self.graph, tracker=DummyTracker([self]) - ) + tracker = GetAttrTracker(self, "shape") + return ListVariable(self.meta.shape, self.graph, tracker=tracker) def numel(self): return self.size @@ -605,7 +607,7 @@ class SymbolicVariable(VariableBase): def __init__( self, - value: int | MetaInfo, + value: int | None | MetaInfo, graph: FunctionGraph, tracker: Tracker, ): @@ -663,7 +665,9 @@ def make_stringify_guard(self) -> list[StringifyExpression]: from ..executor_cache import OpcodeExecutorCache frame_value_tracer = self.tracker.trace_value_from_frame() - symbolic_inputs = OpcodeExecutorCache().symbolic_inputs + symbolic_inputs = OpcodeExecutorCache().get_symbolic_inputs( + self.graph.pycode_gen._origin_code + ) assert frame_value_tracer.inlined_expr in symbolic_inputs @@ -681,25 +685,42 @@ def make_stringify_guard(self) -> list[StringifyExpression]: ) ] + @staticmethod + def should_create_symbolic_variable( + value: Any, tracker: Tracker, symbolic_inputs: dict[str, dict[int, int]] + ): + tracker_expr = tracker.trace_value_from_frame().inlined_expr + symbolic_inputs.setdefault(tracker_expr, {}) + for expr, symbolic_input in symbolic_inputs.items(): + if tracker.match_expr(expr): + symbolic_input.setdefault(value, 0) + symbolic_input[value] += 1 + if symbolic_input[value] >= STATIC_DIM_FREQ_THRESHOLD: + return False + if len(symbolic_input.keys()) > 1: + return True + return False + return False + @VariableFactory.register_from_value(successor="ConstantVariable") def from_value(value: Any, graph: FunctionGraph, tracker: Tracker): if not ENV_SOT_ALLOW_DYNAMIC_SHAPE.get(): - return + return None if not isinstance(value, int): - return - if not tracker.need_guard(): - return + return None + if not tracker.is_traceable(): + return None from ..executor_cache import OpcodeExecutorCache - symbolic_inputs = OpcodeExecutorCache().symbolic_inputs + symbolic_inputs = OpcodeExecutorCache().get_symbolic_inputs( + graph.pycode_gen._origin_code + ) - for tracker_expr, symbolic_input in symbolic_inputs.items(): - if tracker.match_expr(tracker_expr): - symbolic_input.setdefault(value, 0) - symbolic_input[value] += 1 - # TODO(zrr1999): determine frequency - return SymbolicVariable(value, graph, tracker) + if SymbolicVariable.should_create_symbolic_variable( + value, tracker, symbolic_inputs + ): + return SymbolicVariable(value, graph, tracker) return None diff --git a/python/paddle/jit/sot/symbolic/compile_cache.py b/python/paddle/jit/sot/symbolic/compile_cache.py index b697e721532f9..5cb06059bb3db 100644 --- a/python/paddle/jit/sot/symbolic/compile_cache.py +++ b/python/paddle/jit/sot/symbolic/compile_cache.py @@ -21,6 +21,7 @@ from paddle.amp.auto_cast import amp_state from paddle.base.data_feeder import convert_dtype from paddle.framework import _dygraph_tracer, use_pir_api +from paddle.static import InputSpec from ..infer_meta import convert_meta_to_input_spec from ..profiler import EventGuard @@ -162,7 +163,13 @@ class CompileSIRCache(Cache, metaclass=Singleton): def __init__(self): super().__init__(weak=False) - def key_fn(self, context: SymbolicTraceContext, sir_name: str, **kwargs): + def key_fn( + self, + context: SymbolicTraceContext, + sir_name: str, + input_spec: list[InputSpec], + **kwargs, + ): """ generate a hash key for a SIR @@ -176,10 +183,16 @@ def key_fn(self, context: SymbolicTraceContext, sir_name: str, **kwargs): """ sir = context.get_sir(sir_name) # NOTE(dev): Is str(sir) a heavy operation ? - hash_key = hash((str(sir), kwargs['training'])) + hash_key = hash((str(sir), *input_spec, kwargs['training'])) return hash_key - def value_fn(self, context: SymbolicTraceContext, sir_name: str, **kwargs): + def value_fn( + self, + context: SymbolicTraceContext, + sir_name: str, + input_spec: list[InputSpec], + **kwargs, + ): """ Generate static graph function @@ -196,6 +209,7 @@ def value_fn(self, context: SymbolicTraceContext, sir_name: str, **kwargs): return FallbackWrapper( paddle.jit.to_static( compile_sir(context, sir_name), + input_spec=[input_spec], build_strategy=build_strategy, backend=backend, full_graph=True, diff --git a/python/paddle/jit/sot/symbolic/symbolic_context.py b/python/paddle/jit/sot/symbolic/symbolic_context.py index cc6487f696d0a..4efe3038c2781 100644 --- a/python/paddle/jit/sot/symbolic/symbolic_context.py +++ b/python/paddle/jit/sot/symbolic/symbolic_context.py @@ -14,6 +14,10 @@ from __future__ import annotations +from typing import Any, Callable + +from paddle.static import InputSpec + from ..utils import log from .compile_cache import CompileSIRCache from .statement_ir import ( @@ -126,7 +130,15 @@ def replace_TOS(self, sir): self.sir_stack.append(sir) self.statement_factory.update(sir) - def compile_do_nothing(self, ret_vals): + def return_TOS(self, ret_vals): + cur_sir: StatementIR = self.TOS + cur_sir.inputs = cur_sir.analyse_inputs() + cur_sir.outputs = ret_vals + log(2, "start subgraph compile and execution.\n") + log(2, self.TOS, "\n") + return cur_sir + + def compile_do_nothing(self) -> Callable[[...], Any]: """ Return a dummy function, which will return an empty list. @@ -141,29 +153,12 @@ def __call__(*args, **kwargs): def graph_size(self): return 0 - # return None function - dummy_stmt_ir = StatementIR("dummy_func") - dummy_stmt_ir.outputs = [] - dummy_stmt_ir.inputs = [] - return DummyFunc(), dummy_stmt_ir + return DummyFunc() - def compile_fn(self, ret_vals, **kwargs): + def compile_fn(self, sir_name: str, input_spec: list[InputSpec], **kwargs): """ start compile and return the python function, which must can be to_static without errors. """ - cur_sir: StatementIR = self.TOS - # step0: if no statement, return a dummy function - if len(cur_sir.statements) == 0: - return self.compile_do_nothing(ret_vals) - # step1: analyse sir inputs and outputs - cur_sir.inputs = cur_sir.analyse_inputs() - # TODO: output analysis - cur_sir.outputs = ret_vals - log(2, "start subgraph compile and execution.\n") - log(2, self.TOS, "\n") - # step2: call compile_sir and get python function, third cache is triggered here. - static_func = CompileSIRCache()(self, cur_sir.name, **kwargs) - # step3: GC and reset TOS - # self.reset_TOS() + static_func = CompileSIRCache()(self, sir_name, input_spec, **kwargs) - return static_func, cur_sir + return static_func diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py index ddf0cf9c8b02e..c281e335efb3d 100644 --- a/python/paddle/jit/translated_layer.py +++ b/python/paddle/jit/translated_layer.py @@ -892,8 +892,7 @@ def _run_dygraph(instance, input, program_holder): for i, value in enumerate(input): if not isinstance(value, (np.ndarray, core.eager.Tensor)): raise TypeError( - "The type of input in TranslatedLayer must be numpy array or Variable(Tensor), but received %s." - % type(value) + f"The type of input in TranslatedLayer must be numpy array or Variable(Tensor), but received {type(value)}." ) # NOTE: In order to unify the API, firstly convert the input to Tensor if isinstance(value, np.ndarray): @@ -925,8 +924,7 @@ def _run_dygraph(instance, input, program_holder): persistable_vars.append(instance._buffers[dy_var_name]) else: raise ValueError( - "The persistable variable %s does not exist in current TranslatedLayer." - % var_name + f"The persistable variable {var_name} does not exist in current TranslatedLayer." ) output_vars = [] @@ -1426,7 +1424,7 @@ def _construct(model_path, configs=None): # 0. dir and filename check model_path = os.path.normpath(model_path) if not os.path.isdir(model_path): - raise ValueError("There is no directory named '%s'" % model_path) + raise ValueError(f"There is no directory named '{model_path}'") model_filename = None params_filename = None if configs is not None: @@ -1591,8 +1589,7 @@ def _get_program_holder(self, method_name='forward'): program_holder = self._program_holder_dict.get(method_name, None) if program_holder is None: raise ValueError( - "The method `%s` does not exist in loaded TranslatedLayer." - % method_name + f"The method `{method_name}` does not exist in loaded TranslatedLayer." ) return program_holder diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index a9d8312bb4ca0..bf87c3fc0f0a8 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -69,7 +69,9 @@ Upsample, UpsamplingBilinear2D, UpsamplingNearest2D, + ZeroPad1D, ZeroPad2D, + ZeroPad3D, ) # TODO: import all neural network related api under this directory, @@ -135,6 +137,8 @@ AvgPool3D, FractionalMaxPool2D, FractionalMaxPool3D, + LPPool1D, + LPPool2D, MaxPool1D, MaxPool2D, MaxPool3D, @@ -300,4 +304,8 @@ 'Unflatten', 'FractionalMaxPool2D', 'FractionalMaxPool3D', + 'LPPool1D', + 'LPPool2D', + 'ZeroPad1D', + 'ZeroPad3D', ] diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 4543d5c8ca14d..bc0f0e1d2c388 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -144,6 +144,8 @@ avg_pool3d, fractional_max_pool2d, fractional_max_pool3d, + lp_pool1d, + lp_pool2d, max_pool1d, max_pool2d, max_pool3d, @@ -220,6 +222,8 @@ 'avg_pool1d', 'avg_pool2d', 'avg_pool3d', + 'lp_pool1d', + 'lp_pool2d', 'max_pool1d', 'max_pool2d', 'max_pool3d', diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index 3dd30afeec986..ddfb04d8530a1 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -1543,7 +1543,7 @@ def tanhshrink(x, name=None): return out -def thresholded_relu(x, threshold=1.0, name=None): +def thresholded_relu(x, threshold=1.0, value=0.0, name=None): r""" thresholded relu activation. @@ -1553,7 +1553,7 @@ def thresholded_relu(x, threshold=1.0, name=None): \left\{ \begin{array}{rl} x,& \text{if } \ x > threshold \\ - 0,& \text{otherwise} + value,& \text{otherwise} \end{array} \right. @@ -1561,6 +1561,7 @@ def thresholded_relu(x, threshold=1.0, name=None): Parameters: x (Tensor): The input Tensor with data type float32, float64. threshold (float, optional): The value of threshold for thresholded_relu. Default is 1.0 + value (float, optional): The value to replace with when x is less than threshold. Default is 0.0 name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: @@ -1580,7 +1581,7 @@ def thresholded_relu(x, threshold=1.0, name=None): """ if in_dynamic_or_pir_mode(): - return _C_ops.thresholded_relu(x, threshold) + return _C_ops.thresholded_relu(x, threshold, value) else: check_variable_and_dtype( x, @@ -1594,19 +1595,19 @@ def thresholded_relu(x, threshold=1.0, name=None): type='thresholded_relu', inputs={'X': x}, outputs={'Out': out}, - attrs={'threshold': threshold}, + attrs={'threshold': threshold, 'value': value}, ) return out @inplace_apis_in_dygraph_only -def thresholded_relu_(x, threshold=1.0, name=None): +def thresholded_relu_(x, threshold=1.0, value=0.0, name=None): r""" Inplace version of ``thresholded_relu`` API, the output Tensor will be inplaced with input ``x``. Please refer to :ref:`api_paddle_nn_functional_thresholded_relu`. """ if in_dynamic_mode(): - return _C_ops.thresholded_relu_(x, threshold) + return _C_ops.thresholded_relu_(x, threshold, value) def log_softmax(x, axis=-1, dtype=None, name=None): diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index c9272e3a9c05e..3703abd739d57 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -2343,3 +2343,260 @@ def fractional_max_pool3d( ) return (pool_out, mask) if return_mask else pool_out + + +def lp_pool1d( + x, + norm_type, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + data_format="NCL", + name=None, +): + """ + This API implements power-average pooling 1d operation. + See more details in :ref:`api_paddle_nn_LPPool1d` . + + Args: + x (Tensor): The input tensor of pooling operator which is a 3-D tensor with + shape [N, C, L]. where `N` is batch size, `C` is the number of channels, + `L` is the length of the feature. The data type is float16, float32 or float64. + norm_type (int|float): The number the power operation. + kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list, + it must contain two integers, (kernel_size_Height, kernel_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The stride size. If it is a tuple or list, + it must contain two integers, (stride_Height, stride_Width). + Otherwise, the stride size will be a square of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode(bool, optional): When True, it will use `ceil` instead of `floor` to compute the output shape. Default: False. + data_format(str, optional): The data format of the input and output data. An optional string from: `"NCL"`, + `"NLC"`. When it is `"NCL"`, the data is stored in the order of: + `[batch_size, input_channels, input_length]`. Default:`"NCL"`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + + Examples: + .. code-block:: python + + >>> import paddle + >>> import paddle.nn as nn + + >>> data = paddle.uniform([1, 3, 32], paddle.float32) + >>> LPPool1D = nn.LPPool1D(norm_type=3, kernel_size=2, stride=2, padding=0) + >>> pool_out = LPPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] + """ + # NCL to NCHW + ori_data_format = data_format + if data_format == "NCL": + data_format = "NCHW" + axis = 2 + else: + data_format = "NHWC" + axis = 1 + + if not in_dynamic_mode(): + check_variable_and_dtype( + x, 'x', ['float16', 'float32', 'float64'], 'lp_pool1d' + ) + _check_input(x, 3) + x = unsqueeze(x, [axis]) + kernel_size = convert_to_list(kernel_size, 1, 'kernel_size') + kernel_size = [1] + kernel_size + if stride is None: + stride = kernel_size + else: + stride = convert_to_list(stride, 1, 'pool_stride') + stride = [1] + stride + + _check_value_limitation(kernel_size, "kernel_size", min_limit=1e-3) + _check_value_limitation(stride, "stride", min_limit=1e-3) + + channel_last = _channel_last(ori_data_format, 1) + padding, padding_algorithm = _update_padding_nd( + padding, 1, channel_last=channel_last, ceil_mode=ceil_mode + ) + + # use 2d to implement 1d should expand padding in advance. + padding = _expand_low_nd_padding(padding) + + if in_dynamic_or_pir_mode(): + output = _C_ops.lp_pool2d( + x, + kernel_size, + stride, + padding, + ceil_mode, + True, + data_format, + 'lp', + False, + False, + padding_algorithm, + norm_type, + ) + return squeeze(output, [axis]) + + else: + op_type = 'lp_pool2d' + helper = LayerHelper(op_type, **locals()) + dtype = helper.input_dtype(input_param_name='x') + pool_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type=op_type, + inputs={"x": x}, + outputs={"out": pool_out}, + attrs={ + "pooling_type": "lp", + "kernel_size": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "ceil_mode": ceil_mode, + "exclusive": True, + "data_format": data_format, + "norm_type": norm_type, + }, + ) + return squeeze(pool_out, [axis]) + + +def lp_pool2d( + x, + norm_type, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + data_format="NCHW", + name=None, +): + """ + This API implements power-average pooling 2d operation. + See more details in :ref:`api_paddle_nn_LPPool2d` . + + Args: + x (Tensor): The input tensor of pooling operator which is a 4-D tensor with + shape [N, C, H, W]. The format of input tensor is `"NCHW"` or + `"NHWC"`, where `N` is batch size, `C` is the number of channels, + `H` is the height of the feature, and `W` is the width of the + feature. The data type if float32 or float64. + norm_type (int|float): The number the power operation. + kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list, + it must contain two integers, (kernel_size_Height, kernel_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The stride size. If it is a tuple or list, + it must contain two integers, (stride_Height, stride_Width). + Otherwise, the stride size will be a square of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode(bool, optional): When True, it will use `ceil` instead of `floor` to compute the output shape. Default: False. + data_format (string, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. Default: "NCHW". + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: The output tensor of pooling result. The data type is same as input tensor. + + Examples: + .. code-block:: python + + >>> import paddle + >>> import paddle.nn.functional as F + + >>> # lp pool2d + >>> x = paddle.uniform([1, 3, 32, 32], paddle.float32) + >>> out = F.lp_pool2d(x, + ... norm_type=2, + ... kernel_size=2, + ... stride=2, padding=0) + >>> print(out.shape) + [1, 3, 16, 16] + """ + + _check_input(x, 4) + if norm_type == 0: + raise ValueError("`norm_type` cannot be 0.") + + norm_type = float(norm_type) + kernel_size = convert_to_list(kernel_size, 2, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = convert_to_list(stride, 2, 'pool_stride') + + _check_value_limitation(kernel_size, "kernel_size", min_limit=1e-3) + _check_value_limitation(stride, "stride", min_limit=1e-3) + + channel_last = _channel_last(data_format, 2) + padding, padding_algorithm = _update_padding_nd( + padding, 2, channel_last, ceil_mode=ceil_mode + ) + + if in_dynamic_or_pir_mode(): + output = _C_ops.lp_pool2d( + x, + kernel_size, + stride, + padding, + ceil_mode, + True, + data_format, + 'lp', + False, + False, + padding_algorithm, + norm_type, + ) + return output + else: + op_type = 'lp_pool2d' + helper = LayerHelper(op_type, **locals()) + check_variable_and_dtype( + x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'lp_pool2d' + ) + dtype = helper.input_dtype(input_param_name='x') + pool_out = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type=op_type, + inputs={"x": x}, + outputs={"out": pool_out}, + attrs={ + "pooling_type": "lp", + "kernel_size": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "ceil_mode": ceil_mode, + "exclusive": True, + "data_format": data_format, + "norm_type": norm_type, + }, + ) + + return pool_out diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index e281d6cd48589..270f0bb9234ea 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -18,7 +18,7 @@ Assign, NumpyArrayInitializer, # noqa: F401 ) -from .Bilinear import Bilinear +from .bilinear import Bilinear from .constant import ( Constant, ConstantInitializer, # noqa: F401 diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/bilinear.py similarity index 100% rename from python/paddle/nn/initializer/Bilinear.py rename to python/paddle/nn/initializer/bilinear.py diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 27d5cd4ecefa4..80751daecd0e7 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -103,6 +103,8 @@ AvgPool3D, FractionalMaxPool2D, FractionalMaxPool3D, + LPPool1D, + LPPool2D, MaxPool1D, MaxPool2D, MaxPool3D, diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index c1234c28bc47d..b08f5f9ca8bbb 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -1164,13 +1164,14 @@ class ThresholdedReLU(Layer): \left\{ \begin{array}{rl} x,& \text{if } \ x > threshold \\ - 0,& \text{otherwise} + value,& \text{otherwise} \end{array} \right. Parameters: threshold (float, optional): The value of threshold for ThresholdedReLU. Default is 1.0 + value (float, optinal): The value to replace with when x is less than threshold. Default is 0.0 name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -1191,17 +1192,18 @@ class ThresholdedReLU(Layer): [2., 0., 0.]) """ - def __init__(self, threshold=1.0, name=None): + def __init__(self, threshold=1.0, value=0.0, name=None): super().__init__() self._threshold = threshold + self._value = value self._name = name def forward(self, x): - return F.thresholded_relu(x, self._threshold, self._name) + return F.thresholded_relu(x, self._threshold, self._value, self._name) def extra_repr(self): name_str = f', name={self._name}' if self._name else '' - return f'threshold={self._threshold}{name_str}' + return f'threshold={self._threshold}, value={self._value}{name_str}' class Silu(Layer): diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 6faf07bb6eb19..6b34c9fa90f6b 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -1077,6 +1077,67 @@ def extra_repr(self): return f'padding={self._pad}, mode={self._mode}, value={self._value}, data_format={self._data_format}{name_str}' +class ZeroPad1D(Layer): + """ + This interface is used to construct a callable object of the ``ZeroPad1D`` class. + Pads the input tensor boundaries with zero. + + Parameters: + padding (Tensor | List[int] | int): The padding size with data type int. If is int, use the + same padding in all dimensions. Else [len(padding)/2] dimensions of input will be padded. + The pad has the form (pad_left, pad_right). + data_format (str): An string from: "NCL", "NLC". Specify the data format of the input data. + Default is "NCL" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - x(Tensor): The input tensor of zeropad1d operator, which is a 3-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of zeropad1d operator, which is a 3-D tensor. + The data type is same as input x. + + Examples: + + .. code-block:: python + + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = (1, 2, 3) + >>> pad = [1, 2] + >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.ZeroPad1D(padding=pad) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 2, 6], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[0., 1., 2., 3., 0., 0.], + [0., 4., 5., 6., 0., 0.]]]) + """ + + def __init__(self, padding, data_format="NCL", name=None): + super().__init__() + self._pad = _npairs(padding, 1) + self._mode = 'constant' + self._value = 0.0 + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.pad( + x, + pad=self._pad, + mode=self._mode, + value=self._value, + data_format=self._data_format, + name=self._name, + ) + + def extra_repr(self): + name_str = f', name={self._name}' if self._name else '' + return f'padding={self._pad}, data_format={self._data_format}{name_str}' + + class Pad2D(Layer): """ This interface is used to construct a callable object of the ``Pad2D`` class. @@ -1290,6 +1351,70 @@ def extra_repr(self): return f'padding={self._pad}, mode={self._mode}, value={self._value}, data_format={self._data_format}{name_str}' +class ZeroPad3D(Layer): + """ + This interface is used to construct a callable object of the ``ZeroPad3D`` class. + Pads the input tensor boundaries with zero. + + Parameters: + padding (Tensor | List[int] | int): The padding size with data type int. If is int, use the + same padding in all dimensions. Else [len(padding)/2] dimensions of input will be padded. + The pad has the form (pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back). + data_format (str): An string from: "NCDHW", "NDHWC". Specify the data format of the input data. + Default is "NCDHW" + name (str, optional) : The default value is None. Normally there is no need for + user to set this property. For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - x(Tensor): The input tensor of zeropad3d operator, which is a 5-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of zeropad3d operator, which is a 5-D tensor. + The data type is same as input x. + + Examples: + + .. code-block:: python + + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = (1, 1, 1, 2, 3) + >>> pad = [1, 0, 1, 2, 0, 0] + >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.ZeroPad3D(padding=pad) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[[0., 0., 0., 0.], + [0., 1., 2., 3.], + [0., 4., 5., 6.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]]]]]) + """ + + def __init__(self, padding, data_format="NCDHW", name=None): + super().__init__() + self._pad = _npairs(padding, 3) + self._mode = 'constant' + self._value = 0.0 + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.pad( + x, + pad=self._pad, + mode=self._mode, + value=self._value, + data_format=self._data_format, + name=self._name, + ) + + def extra_repr(self): + name_str = f', name={self._name}' if self._name else '' + return f'padding={self._pad}, data_format={self._data_format}{name_str}' + + class CosineSimilarity(Layer): """ This interface is used to compute cosine similarity between x1 and x2 along axis. diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 23eaf467d916d..3127bb636502f 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -132,6 +132,7 @@ class AvgPool2D(Layer): Output(N_i, C_j, h, w) = \frac{\sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)}{ksize[0] * ksize[1]} + Parameters: kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two integers, (pool_size_Height, pool_size_Width). @@ -153,7 +154,7 @@ class AvgPool2D(Layer): divisor_override(float, optional): If specified, it will be used as divisor, otherwise kernel_size will be used. Default None. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, - `"NDHW"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -321,6 +322,210 @@ def extra_repr(self): ) +class LPPool1D(Layer): + r""" + Performing a 1D power-average pooling over an input signal composed + of several input planes, based on the input, output_size, return_mask parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + The output value of the layer with input size (N, C, L), + output (N, C, :math:`L_{out}`) and kernel_size ksize can be precisely described as + For average pool1d: + + .. math:: + + Output(N_i, C_i, l) = sum(Input[N_i, C_i, stride \times l:stride \times l+k]^{norm\_type})^{1/norm\_type} + + Parameters: + norm_type(int|float): The number the power operation. + kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain an integer. + stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list, + it must contain an integer. Default None, then stride will be equal to the kernel_size. + padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides. + 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after]. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode(bool, optional): When True, it will use `ceil` instead of `floor` to compute the output shape. Default: False. + data_format(str, optional): The data format of the input and output data. An optional string from: `"NCL"`, + `"NLC"`. When it is `"NCL"`, the data is stored in the order of: + `[batch_size, input_channels, input_length]`. Default: "NCL" + name(str, optional): For eed to detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no nset and None by default. + + Shape: + - x(Tensor): The input tensor of lp pool1d operator, which is a 3-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of lp pool1d operator, which is a 3-D tensor. + The data type is same as input x. + + Returns: + A callable object of LPPool1D. + + Examples: + + .. code-block:: python + + >>> import paddle + >>> import paddle.nn as nn + + >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) + >>> LPPool1D = nn.LPPool1D(norm_type=2, kernel_size=2, stride=2, padding=0) + >>> pool_out = LPPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] + + """ + + def __init__( + self, + norm_type, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + data_format="NCL", + name=None, + ): + super().__init__() + self.norm_type = float(norm_type) + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.data_format = data_format + self.name = name + + def forward(self, x): + out = F.lp_pool1d( + x, + self.norm_type, + self.kernel_size, + self.stride, + self.padding, + self.ceil_mode, + self.data_format, + self.name, + ) + return out + + def extra_repr(self): + return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, padding={padding}'.format( + **self.__dict__ + ) + + +class LPPool2D(Layer): + r""" + Performing 2D power-average pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + + Example: + Input: + X shape: :math:`(N, C, H_{in}, W_{in})` + Attr: + - kernel_size: kernel_size + - norm_type: norm_type + + Output: + Out shape: :math:`(N, C, H_{out}, W_{out})` + + .. math:: + + Output(N_i, C_j, h, w) = (\sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} + Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)^{norm\_type})^{1 / norm\_type} + + Parameters: + norm_type(int|float): The number the power operation. + kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list, + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. + Default None, then stride will be equal to the kernel_size. + padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode(bool, optional): When True, it will use `ceil` instead of `floor` to compute the output shape. Default: False. + data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, + `"NHWC"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. Default: "NCHW". + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Shape: + - x(Tensor): The input tensor of lp pool2d operator, which is a 4-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of lp pool2d operator, which is a 4-D tensor. + The data type is same as input x. + + Returns: + A callable object of LPPool2D. + + Examples: + .. code-block:: python + + >>> import paddle + >>> import paddle.nn as nn + + >>> # lp pool2d + >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) + >>> LPPool2D = nn.LPPool2D(norm_type=2, kernel_size=2, stride=2, padding=0) + >>> output = LPPool2D(input) + >>> print(output.shape) + [1, 3, 16, 16] + + """ + + def __init__( + self, + norm_type, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + data_format="NCHW", + name=None, + ): + super().__init__() + self.norm_type = float(norm_type) + self.ksize = kernel_size + self.stride = kernel_size if stride is None else stride + self.padding = padding + self.ceil_mode = ceil_mode + self.data_format = data_format + self.name = name + + def forward(self, x): + return F.lp_pool2d( + x, + norm_type=self.norm_type, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + data_format=self.data_format, + name=self.name, + ) + + def extra_repr(self): + return 'norm_type={norm_type}, kernel_size={ksize}, stride={stride}, padding={padding}'.format( + **self.__dict__ + ) + + class MaxPool1D(Layer): """ This operation applies 1D max pooling over input signal @@ -458,7 +663,7 @@ class MaxPool2D(Layer): The default value is 0. ceil_mode(bool, optional): when True, will use `ceil` instead of `floor` to compute the output shape return_mask(bool, optional): Whether to return the max indices along with the outputs. - data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. diff --git a/python/paddle/nn/quant/quantized_linear.py b/python/paddle/nn/quant/quantized_linear.py index 1c2d962f720cf..41ad1839e1f8a 100644 --- a/python/paddle/nn/quant/quantized_linear.py +++ b/python/paddle/nn/quant/quantized_linear.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle import _C_ops, version +import paddle +from paddle import _C_ops from paddle.base.data_feeder import check_dtype from paddle.base.framework import convert_np_dtype_to_dtype_ from paddle.device.cuda import get_device_capability @@ -24,7 +25,7 @@ def _get_arch_info(): # Get SMVersion from device. - cuda_version = version.cuda() + cuda_version = paddle.version.cuda() if cuda_version is not None and cuda_version != 'False': major, minor = get_device_capability() arch = int(major * 10 + minor) diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py index 1e078d048a5bf..18de1d8fc1940 100644 --- a/python/paddle/profiler/utils.py +++ b/python/paddle/profiler/utils.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import functools import sys +import types from contextlib import ContextDecorator, contextmanager -from typing import Any from warnings import warn from paddle.base import core @@ -82,7 +84,12 @@ def __enter__(self): self.begin() return self - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any): + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ): self.end() def begin(self): diff --git a/python/paddle/sparse/__init__.py b/python/paddle/sparse/__init__.py index 661143f12dae8..98f5ca0b13ee5 100644 --- a/python/paddle/sparse/__init__.py +++ b/python/paddle/sparse/__init__.py @@ -17,6 +17,7 @@ add, divide, is_same_shape, + mask_as, masked_matmul, matmul, multiply, @@ -77,6 +78,7 @@ 'expm1', 'mv', 'matmul', + 'mask_as', 'masked_matmul', 'addmm', 'add', diff --git a/python/paddle/sparse/binary.py b/python/paddle/sparse/binary.py index 3aac3d5e7f144..abc943ac3c1fc 100644 --- a/python/paddle/sparse/binary.py +++ b/python/paddle/sparse/binary.py @@ -452,3 +452,60 @@ def is_same_shape(x, y): """ return x.is_same_shape(y) + + +@dygraph_only +def mask_as(x, mask, name=None): + r""" + Filter the input dense tensor `x` using the `indices` of the sparse matrix `mask`, + which in turn generates a sparse matrix of the corresponding format. + The input `x` and `mask` must have the same shape, and the sparse tensor returned has the same indices as `mask` + even `zero` values exist in the coresponding indices. + + Args: + x (Tensor): The input tensor. It should be a DenseTensor. + The data type can be float32, float64, int32, int64, complex64, complex128, int8, int16, float16. + mask (Tensor): The input tensor. It can be SparseCooTensor or SparseCsrTensor. + It should be 2D or 3D when the mask is SparseCsrTensor. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: A sparse tensor. + + Examples: + .. code-block:: python + + >>> import paddle + >>> paddle.set_device('cpu') + + >>> # csr sparse tensor + >>> crows = [0, 2, 3, 5] + >>> cols = [1, 3, 2, 0, 1] + >>> values = [1., 2., 3., 4., 5.] + >>> dense_shape = [3, 4] + >>> csr = paddle.sparse.sparse_csr_tensor(crows, cols, values, dense_shape) + >>> paddle.seed(2024) + >>> x = paddle.rand(dense_shape).astype(csr.dtype) + >>> out = paddle.sparse.mask_as(x, csr) + >>> print(out) + Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(cpu), stop_gradient=True, + crows=[0, 2, 3, 5], + cols=[1, 3, 2, 0, 1], + values=[0.23659813, 0.08467803, 0.64152628, 0.66596609, 0.90394485]) + + >>> # coo sparse tensor + >>> indices = [[0, 1, 2], [1, 2, 0]] + >>> values = [1.0, 2.0, 3.0] + >>> dense_shape = [3, 3] + >>> coo = paddle.sparse.sparse_coo_tensor(indices, values, dense_shape) + >>> paddle.seed(2024) + >>> x = paddle.rand(dense_shape).astype(coo.dtype) + >>> out = paddle.sparse.mask_as(x, coo) + >>> print(out) + Tensor(shape=[3, 3], dtype=paddle.float32, place=Place(cpu), stop_gradient=True, + indices=[[0, 1, 2], + [1, 2, 0]], + values=[0.23659813, 0.40340215, 0.64152628]) + + """ + return _C_ops.sparse_mask_as(x, mask) diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index 469145ac6a832..8039c1ead6478 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -60,11 +60,12 @@ ) from .pir_io import ( get_pir_parameters, + load_inference_model_pir, load_pir, - load_pir_inference_model, load_vars_pir, + normalize_pir_program, + save_inference_model_pir, save_pir, - save_pir_inference_model, save_vars_pir, ) @@ -183,6 +184,8 @@ def normalize_program(program, feed_vars, fetch_vars, **kwargs): >>> normalized_program = paddle.static.normalize_program(program, [image], [predict]) """ + if in_pir_mode(): + return normalize_pir_program(program, feed_vars, fetch_vars, **kwargs) if not isinstance(program, Program): raise TypeError( "program type must be `base.Program`, but received `%s`" @@ -523,7 +526,7 @@ def save_inference_model( """ if in_pir_mode(): - save_pir_inference_model( + save_inference_model_pir( path_prefix, feed_vars, fetch_vars, executor, **kwargs ) return @@ -849,7 +852,7 @@ def load_inference_model(path_prefix, executor, **kwargs): # program to get the inference result. """ if in_pir_mode(): - return load_pir_inference_model(path_prefix, executor, **kwargs) + return load_inference_model_pir(path_prefix, executor, **kwargs) # check kwargs supported_args = ('model_filename', 'params_filename') deprecated_args = ('pserver_endpoints',) diff --git a/python/paddle/static/nn/metric.py b/python/paddle/static/nn/metric.py index d2252ebc0a0bc..2be2cecf18742 100644 --- a/python/paddle/static/nn/metric.py +++ b/python/paddle/static/nn/metric.py @@ -245,6 +245,28 @@ def auc( [array(1.)] """ + if in_pir_mode(): + if ins_tag_weight is None: + ins_tag_weight = paddle.full( + shape=[1, 1], dtype="float32", fill_value=1.0 + ) + stat_pos = paddle.zeros(shape=[1, num_thresholds + 1], dtype="int64") + stat_neg = paddle.zeros(shape=[1, num_thresholds + 1], dtype="int64") + auc_out, batch_stat_pos, batch_stat_neg = _C_ops.auc( + input, + label, + stat_pos, + stat_neg, + ins_tag_weight, + curve, + num_thresholds, + slide_steps, + ) + return ( + auc_out, + batch_stat_pos, + batch_stat_neg, + ) helper = LayerHelper("auc", **locals()) if ins_tag_weight is None: diff --git a/python/paddle/static/pir_io.py b/python/paddle/static/pir_io.py index 38e5e69cfdbb1..ffbf75dfdbb26 100644 --- a/python/paddle/static/pir_io.py +++ b/python/paddle/static/pir_io.py @@ -90,18 +90,18 @@ def set_var(name, ndarray): p = t._place() if p.is_cpu_place(): place = paddle.base.CPUPlace() - # elif p.is_cuda_pinned_place(): - # place = paddle.base.CUDAPinnedPlace() - # elif p.is_xpu_place(): - # p = paddle.base.core.Place() - # p.set_place(t._place()) - # place = paddle.base.XPUPlace(p.xpu_device_id()) - # elif p.is_custom_place(): - # p = paddle.base.core.Place() - # p.set_place(t._place()) - # place = paddle.base.CustomPlace( - # paddle.device.get_device().split(':')[0], p.custom_device_id() - # ) + elif p.is_cuda_pinned_place(): + place = paddle.base.CUDAPinnedPlace() + elif p.is_xpu_place(): + p = paddle.base.core.Place() + p.set_place(t._place()) + place = paddle.base.XPUPlace(p.xpu_device_id()) + elif p.is_custom_place(): + p = paddle.base.core.Place() + p.set_place(t._place()) + place = paddle.base.CustomPlace( + paddle.device.get_device().split(':')[0], p.custom_device_id() + ) else: p = paddle.base.core.Place() p.set_place(t._place()) @@ -251,7 +251,13 @@ def normalize_pir_program(program, feed_vars, fetch_vars, **kwargs): if not all(isinstance(v, pir.Value) for v in fetch_vars): raise TypeError("fetch_vars type must be a Value or a list of Value.") - # TODO(Ruting) remind users to set auc_states to 0 if auc op were found. + # remind users to set auc_states to 0 if auc op were found. + for op in program.global_block().ops: + if op.name() == 'pd_op.auc': + warnings.warn( + "Be sure that you have set auc states to 0 before saving inference model." + ) + break # fix the bug that the activation op's output as target will be pruned. # will affect the inference performance. @@ -632,8 +638,8 @@ def load_pir(program, model_path, executor=None, var_list=None): model_prefix = model_prefix[:-9] elif model_prefix.endswith(".pdopt"): model_prefix = model_prefix[:-6] - elif model_prefix.endswith(".pdmodel"): - model_prefix = model_prefix[:-8] + elif model_prefix.endswith(".json"): + model_prefix = model_prefix[:-5] parameter_file_name = model_prefix + ".pdparams" @@ -677,7 +683,7 @@ def load_pir(program, model_path, executor=None, var_list=None): @static_only -def save_pir_inference_model( +def save_inference_model_pir( path_prefix, feed_vars, fetch_vars, executor, **kwargs ): """ @@ -752,7 +758,7 @@ def save_pir_inference_model( @static_only -def load_pir_inference_model(path_prefix, executor, **kwargs): +def load_inference_model_pir(path_prefix, executor, **kwargs): """ Load inference model from a given path. By this API, you can get the model diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 4de5e392a8493..553ea2cc5bbee 100644 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -142,6 +142,7 @@ atleast_1d, atleast_2d, atleast_3d, + block_diag, broadcast_tensors, broadcast_to, cast, @@ -306,6 +307,7 @@ inner, inverse, isfinite, + isin, isinf, isnan, isneginf, @@ -544,6 +546,7 @@ 'hypot_', 'nansum', 'nanmean', + 'block_diag', 'count_nonzero', 'tanh', 'tanh_', @@ -587,6 +590,7 @@ 'kron', 'kthvalue', 'isfinite', + 'isin', 'isinf', 'isnan', 'isneginf', diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 24c60af7499e6..6ef53a757ee23 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define functions to get create a tensor +from __future__ import annotations import math import re @@ -21,6 +21,12 @@ import paddle from paddle import _C_ops +from paddle._typing import ( + DTypeLike, + NestedNumbericSequence, + PlaceLike, + TensorLike, +) from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only from ..base.data_feeder import ( @@ -719,7 +725,12 @@ def _to_tensor_static(data, dtype=None, stop_gradient=None): return output -def to_tensor(data, dtype=None, place=None, stop_gradient=True): +def to_tensor( + data: TensorLike | NestedNumbericSequence, + dtype: DTypeLike | None = None, + place: PlaceLike | None = None, + stop_gradient: bool = True, +) -> paddle.Tensor: r""" Constructs a ``paddle.Tensor`` from ``data`` , which can be scalar, tuple, list, numpy\.ndarray, paddle\.Tensor. @@ -1644,7 +1655,7 @@ def meshgrid(*args, **kwargs): Args: *args(Tensor|list of Tensor) : tensors (tuple(list) of tensor): the shapes of input k tensors are (N1,), - (N2,),..., (Nk,). Support data types: ``float64``, ``float16``, ``float32``, ``int32``, ``int64``. + (N2,),..., (Nk,). Support data types: ``float64``, ``bfloat16``, ``float16``, ``float32``, ``int32``, ``int64``, ``complex64``, ``complex128``. **kwargs (optional): Currently, only accept name in **kwargs The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -1686,7 +1697,16 @@ def meshgrid(*args, **kwargs): check_dtype( input_.dtype, 'create data type', - ['uint16', 'float16', 'float32', 'float64', 'int32', 'int64'], + [ + 'uint16', + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'complex64', + 'complex128', + ], 'meshgrid', ) diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 9803d4a8c5c0a..c99f46677d679 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -3971,7 +3971,7 @@ def tile(x, repeat_times, name=None): Both the number of dimensions of ``x`` and the number of elements in ``repeat_times`` should be less than or equal to 6. Args: - x (Tensor): The input tensor, its data type should be bool, float16, float32, float64, int32 or int64. + x (Tensor): The input tensor, its data type should be bool, float16, float32, float64, int32, int64, complex64 or complex128. repeat_times (list|tuple|Tensor): The number of repeating times. If repeat_times is a list or tuple, all its elements should be integers or 1-D Tensors with the data type int32. If repeat_times is a Tensor, it should be an 1-D Tensor with the data type int32. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -4038,6 +4038,8 @@ def check_input(x, repeat_times): 'float64', 'int32', 'int64', + 'complex64', + 'complex128', ], 'tile', ) @@ -4209,7 +4211,7 @@ def expand(x, shape, name=None): Both the number of dimensions of ``x`` and the number of elements in ``shape`` should be less than or equal to 6. And the number of dimensions of ``x`` should be less than the number of elements in ``shape``. The dimension to expand must have a value 0. Args: - x (Tensor): The input Tensor, its data type is bool, float16, float32, float64, int32, int64, uint8 or uint16. + x (Tensor): The input Tensor, its data type is bool, float16, float32, float64, int32, int64, uint8, uint16, complex64 or complex128. shape (list|tuple|Tensor): The result shape after expanding. The data type is int32. If shape is a list or tuple, all its elements should be integers or 0-D or 1-D Tensors with the data type int32. If shape is a Tensor, it should be an 1-D Tensor with the data type int32. The value -1 in shape means keeping the corresponding dimension unchanged. @@ -4275,6 +4277,8 @@ def expand(x, shape, name=None): 'int64', 'uint8', 'uint16', + 'complex64', + 'complex128', ], 'expand', ) @@ -6861,3 +6865,67 @@ def slice_scatter(x, value, axes, starts, ends, strides, name=None): ) return output + + +def block_diag(inputs, name=None): + """ + Create a block diagonal matrix from provided tensors. + + Args: + inputs (list|tuple): ``inputs`` is a Tensor list or Tensor tuple, one or more tensors with 0, 1, or 2 dimensions. The data type: ``bool``, ``float16``, ``float32``, ``float64``, ``uint8``, ``int8``, ``int16``, ``int32``, ``int64``, ``bfloat16``, ``complex64``, ``complex128``. + name (str, optional): Name for the operation (optional, default is None). + + Returns: + Tensor, A ``Tensor``. The data type is same as ``inputs``. + + Examples: + .. code-block:: python + + >>> import paddle + + >>> A = paddle.to_tensor([[4], [3], [2]]) + >>> B = paddle.to_tensor([7, 6, 5]) + >>> C = paddle.to_tensor(1) + >>> D = paddle.to_tensor([[5, 4, 3], [2, 1, 0]]) + >>> E = paddle.to_tensor([[8, 7], [7, 8]]) + >>> out = paddle.block_diag([A, B, C, D, E]) + >>> print(out) + Tensor(shape=[9, 10], dtype=int64, place=Place(gpu:0), stop_gradient=True, + [[4, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [3, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [2, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 7, 6, 5, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 5, 4, 3, 0, 0], + [0, 0, 0, 0, 0, 2, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 8, 7], + [0, 0, 0, 0, 0, 0, 0, 0, 7, 8]]) + """ + + def to_col_block(arys, i, a): + return [ + a + if idx == i + else paddle.zeros([ary.shape[0], a.shape[1]], dtype=a.dtype) + for idx, ary in enumerate(arys) + ] + + def to_2d(ary): + if ary.ndim == 0: + return ary.unsqueeze(axis=0).unsqueeze(axis=0) + if ary.ndim == 1: + return ary.unsqueeze(axis=0) + if ary.ndim == 2: + return ary + raise ValueError( + "For 'block_diag', the dimension of each elements in 'inputs' must be 0, 1, or 2, but got " + f"{ary.ndim}" + ) + + arys = [to_2d(ary) for ary in inputs] + + matrix = [ + paddle.concat(to_col_block(arys, idx, ary), axis=0) + for idx, ary in enumerate(arys) + ] + return paddle.concat(matrix, axis=1) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index d7d8669ff0c3b..3df4cf88c94b6 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -2726,7 +2726,7 @@ def inverse(x, name=None): x (Tensor): The input tensor. The last two dimensions should be equal. When the number of dimensions is greater than 2, it is treated as batches of square matrix. The data - type can be float32 and float64. + type can be float32, float64, complex64, complex128. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: @@ -2751,7 +2751,12 @@ def inverse(x, name=None): else: def _check_input(x): - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'inverse') + check_variable_and_dtype( + x, + 'x', + ['float32', 'float64', 'complex64', 'complex128'], + 'inverse', + ) if len(x.shape) < 2: raise ValueError( "The input of inverse is expected to be a Tensor whose number " @@ -7969,3 +7974,187 @@ def sinc_(x, name=None): paddle.sin_(x) paddle.divide_(x, tmp) return paddle.where(~paddle.isnan(x), x, paddle.full_like(x, 1.0)) + + +def isin(x, test_x, assume_unique=False, invert=False, name=None): + r""" + Tests if each element of `x` is in `test_x`. + + Args: + x (Tensor): The input Tensor. Supported data type: 'bfloat16', 'float16', 'float32', 'float64', 'int32', 'int64'. + test_x (Tensor): Tensor values against which to test for each input element. Supported data type: 'bfloat16', 'float16', 'float32', 'float64', 'int32', 'int64'. + assume_unique (bool, optional): If True, indicates both `x` and `test_x` contain unique elements, which could make the calculation faster. Default: False. + invert (bool, optional): Indicate whether to invert the boolean return tensor. If True, invert the results. Default: False. + name (str, optional): Name for the operation (optional, default is None).For more information, please refer to :ref:`api_guide_Name`. + + Returns: + out (Tensor), The output Tensor with the same shape as `x`. + + Examples: + .. code-block:: python + + >>> import paddle + >>> paddle.set_device('cpu') + >>> x = paddle.to_tensor([-0., -2.1, 2.5, 1.0, -2.1], dtype='float32') + >>> test_x = paddle.to_tensor([-2.1, 2.5], dtype='float32') + >>> res = paddle.isin(x, test_x) + >>> print(res) + Tensor(shape=[5], dtype=bool, place=Place(cpu), stop_gradient=True, + [False, True, True, False, True]) + + >>> x = paddle.to_tensor([-0., -2.1, 2.5, 1.0, -2.1], dtype='float32') + >>> test_x = paddle.to_tensor([-2.1, 2.5], dtype='float32') + >>> res = paddle.isin(x, test_x, invert=True) + >>> print(res) + Tensor(shape=[5], dtype=bool, place=Place(cpu), stop_gradient=True, + [True, False, False, True, False]) + + >>> # Set `assume_unique` to True only when `x` and `test_x` contain unique values, otherwise the result may be incorrect. + >>> x = paddle.to_tensor([0., 1., 2.]*20).reshape([20, 3]) + >>> test_x = paddle.to_tensor([0., 1.]*20) + >>> correct_result = paddle.isin(x, test_x, assume_unique=False) + >>> print(correct_result) + Tensor(shape=[20, 3], dtype=bool, place=Place(cpu), stop_gradient=True, + [[True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False], + [True , True , False]]) + + >>> incorrect_result = paddle.isin(x, test_x, assume_unique=True) + >>> print(incorrect_result) + Tensor(shape=[20, 3], dtype=bool, place=Place(gpu:0), stop_gradient=True, + [[True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , True ], + [True , True , False]]) + + """ + if not isinstance(x, (paddle.Tensor, Variable, paddle.pir.Value)): + raise TypeError(f"x must be tensor type, but got {type(x)}") + if not isinstance(test_x, (paddle.Tensor, Variable, paddle.pir.Value)): + raise TypeError(f"x must be tensor type, but got {type(test_x)}") + + check_variable_and_dtype( + x, + "x", + [ + 'uint16', + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + ], + "isin", + ) + + check_variable_and_dtype( + test_x, + "test_x", + [ + 'uint16', + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + ], + "isin", + ) + + x_zero_dim = False + if len(x.shape) == 0: + x = x.reshape([1]) + x_zero_dim = True + + size_x = math.prod(x.shape) + size_t = math.prod(test_x.shape) + if size_t < math.pow(size_x, 0.145) * 10.0: + # use brute-force searching if the test_x size is small + if len(x.shape) == 0: + return paddle.zeros([], dtype='bool') + + tmp = x.reshape(tuple(x.shape) + ((1,) * test_x.ndim)) + cmp = tmp == test_x + dim = tuple(range(-1, -test_x.ndim - 1, -1)) + cmp = cmp.any(axis=dim) + if invert: + cmp = ~cmp + else: + x_flat = x.flatten() + test_x_flat = test_x.flatten() + if assume_unique: + # if x and test_x both contain unique elements, use stable argsort method which could be faster + all_elements = paddle.concat([x_flat, test_x_flat]) + sorted_index = paddle.argsort(all_elements, stable=True) + sorted_x = all_elements[sorted_index] + + duplicate_mask = paddle.full_like(sorted_index, False, dtype='bool') + if not in_dynamic_mode(): + duplicate_mask = paddle.static.setitem( + duplicate_mask, + paddle.arange(duplicate_mask.numel() - 1), + sorted_x[1:] == sorted_x[:-1], + ) + else: + duplicate_mask[:-1] = sorted_x[1:] == sorted_x[:-1] + + if invert: + duplicate_mask = duplicate_mask.logical_not() + + mask = paddle.empty_like(duplicate_mask) + if not in_dynamic_or_pir_mode(): + mask = paddle.static.setitem(mask, sorted_index, duplicate_mask) + else: + mask[sorted_index] = duplicate_mask + + cmp = mask[0 : x.numel()].reshape(x.shape) + else: + # otherwise use searchsorted method + sorted_test_x = paddle.sort(test_x_flat) + idx = paddle.searchsorted(sorted_test_x, x_flat) + test_idx = paddle.where( + idx < sorted_test_x.numel(), + idx, + paddle.zeros_like(idx, 'int64'), + ) + cmp = sorted_test_x[test_idx] == x_flat + cmp = cmp.logical_not() if invert else cmp + cmp = cmp.reshape(x.shape) + + if x_zero_dim: + return cmp.reshape([]) + else: + return cmp diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 736ae891f2fb8..9ec4cd1e2ec7f 100755 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -130,6 +130,7 @@ def argsort(x, axis=-1, descending=False, stable=False, name=None): x, 'x', [ + 'uint16', 'float16', 'float32', 'float64', diff --git a/python/paddle/tensor/tensor.prototype.pyi b/python/paddle/tensor/tensor.prototype.pyi index 735c8da282545..ffc870d34cb7a 100644 --- a/python/paddle/tensor/tensor.prototype.pyi +++ b/python/paddle/tensor/tensor.prototype.pyi @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# The `Tensor` template for `tools/gen_tensor_stub.py` generates the stub file `tensor.pyi`. -# Add docstring, attributes, methods and alias with type annotaions for `Tensor` +# The `Tensor` template `tensor.prototype.pyi` for `tools/gen_tensor_stub.py` to generate the stub file `tensor.pyi`. +# Add docstring, attributes, methods and alias with type annotaions for `Tensor` in `tensor.prototype.pyi` # if not conveniently coding in original place (like c++ source file). -from typing import Any, overload +from typing import Any, Literal, overload import numpy.typing as npt from typing_extensions import TypeAlias @@ -180,7 +180,7 @@ class Tensor: | tuple[None | bool | int | _Slice, ...] | list[Tensor | bool | int] ), - value: Tensor | npt.NDArray[Any] | int | float | complex | bool, + value: Tensor | npt.NDArray[Any] | complex | bool, ) -> None: ... def __len__(self) -> int: ... @@ -260,4 +260,4 @@ class Tensor: def type(self) -> Any: ... # annotation: ${tensor_alias} - __qualname__ = "Tensor" + __qualname__: Literal["Tensor"] diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index 5118460f2ad66..45fb1a89e0903 100755 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -34,8 +34,6 @@ class VisibleDeprecationWarning(UserWarning): See more details from https://peps.python.org/pep-0565/ """ - ... - def deprecated(update_to="", since="", reason="", level=0): """Decorate a function to signify its deprecation. diff --git a/python/paddle/utils/environments.py b/python/paddle/utils/environments.py index 7054dd1cc43a9..256fa9669be2d 100644 --- a/python/paddle/utils/environments.py +++ b/python/paddle/utils/environments.py @@ -17,6 +17,8 @@ import os from typing import Generic, TypeVar +from typing_extensions import Self + T = TypeVar("T") @@ -100,7 +102,7 @@ def __init__(self, variable: EnvironmentVariable[T], value: T): self.original_value = variable.get() self.variable.set(value) - def __enter__(self) -> EnvironmentVariableGuard: + def __enter__(self) -> Self: return self def __exit__(self, exc_type, exc_value, traceback) -> None: diff --git a/python/paddle/utils/layers_utils.py b/python/paddle/utils/layers_utils.py index 99a3f122b56d7..42587601f09cb 100644 --- a/python/paddle/utils/layers_utils.py +++ b/python/paddle/utils/layers_utils.py @@ -13,12 +13,15 @@ # limitations under the License. import copy +import typing from collections import defaultdict from collections.abc import Sequence +from typing import Any, Dict, TypeVar, Union from uuid import uuid4 from weakref import WeakKeyDictionary import numpy as np +from typing_extensions import TypeGuard import paddle from paddle.pir.core import convert_np_dtype_to_dtype_ @@ -31,6 +34,12 @@ ) from ..pir import Value +_T = TypeVar("_T") + +Structure = Union[ + _T, Dict[str, "Structure[_T]"], typing.Sequence["Structure[_T]"] +] + def convert_to_list(value, n, name, dtype=int): """ @@ -102,7 +111,7 @@ def convert_to_list(value, n, name, dtype=int): return value_list -def is_sequence(seq): +def is_sequence(seq: Any) -> TypeGuard[typing.Sequence[Any]]: """ Whether `seq` is an entry or nested structure """ @@ -164,7 +173,7 @@ def to_sequence(nest): return [nest] -def flatten(nest): +def flatten(nest: Structure[_T]) -> typing.Sequence[_T]: """ :alias_main: paddle.flatten :alias: paddle.flatten,paddle.tensor.flatten,paddle.tensor.manipulation.flatten diff --git a/python/requirements.txt b/python/requirements.txt index ada631fed6814..3b94629a108ae 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,5 +1,5 @@ httpx -numpy>=1.13, <2.0 +numpy>=1.13 protobuf>=3.20.2 Pillow decorator diff --git a/python/setup.py.in b/python/setup.py.in index 67d23a089aa37..ae5a7c0bdc5f6 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -710,6 +710,12 @@ package_data['paddle.libs']= [] if('${WITH_SHARED_PHI}' == 'ON'): package_data['paddle.libs'] += [('libphi' if os.name != 'nt' else 'phi') + ext_name] shutil.copy('${PHI_LIB}', libs_path) + if('${PHI_KERNEL_GPU_LIB}'): + package_data['paddle.libs'] += [ + ('libphi_kernel_gpu' if os.name != 'nt' else 'phi_kernel_gpu') + + ext_name + ] + shutil.copy('${PHI_KERNEL_GPU_LIB}', libs_path) if('${WITH_SHARED_IR}' == 'ON'): package_data['paddle.libs'] += [('libpir' if os.name != 'nt' else 'pir') + ext_name] @@ -1054,6 +1060,36 @@ if '${WITH_STRIP}' == 'ON': if os.system(command) != 0: raise Exception("strip *.so failed, command: %s" % command) + +def check_build_dependency(): + missing_modules = '''Missing build dependency: {dependency} +Please run 'pip install -r python/requirements.txt' to make sure you have all the dependencies installed. +'''.strip() + + with open('${PADDLE_SOURCE_DIR}' + '/python/requirements.txt') as f: + build_dependencies = ( + f.read().splitlines() + ) # Specify the dependencies to install + + python_dependencies_module = [] + installed_packages = [] + + for dependency in build_dependencies: + python_dependencies_module.append( + re.sub("_|-", '', re.sub(r"==.*|>=.*|<=.*", '', dependency)) + ) + reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze']) + + for r in reqs.split(): + installed_packages.append( + re.sub("_|-", '', r.decode().split('==')[0]).lower() + ) + + for dependency in python_dependencies_module: + if dependency.lower() not in installed_packages: + raise RuntimeError(missing_modules.format(dependency=dependency)) + + def install_cpp_dist_and_build_test(paddle_install_dir, paddle_lib_test_dir): """install cpp distribution and build test target @@ -1095,6 +1131,9 @@ def install_cpp_dist_and_build_test(paddle_install_dir, paddle_lib_test_dir): subprocess.check_call(["cmake", "--build", paddle_lib_test_dir]) +# check build dependency +check_build_dependency() + # install cpp distribution if '${WITH_CPP_DIST}' == 'ON': paddle_install_dir = '${PADDLE_INSTALL_DIR}' @@ -1112,6 +1151,28 @@ package_data['paddle.base'] = package_data.get('paddle.base', []) + [ package_data['paddle.tensor'] = package_data.get('paddle.tensor', []) + ['tensor.pyi'] +def generate_tensor_stub(paddle_binary_dir, paddle_source_dir): + print('-'*2, 'Generate stub file tensor.pyi ... ') + script_path = paddle_source_dir + '/tools/' + sys.path.append(script_path) + import gen_tensor_stub + + gen_tensor_stub.generate_stub_file( + input_file=paddle_source_dir + + '/python/paddle/tensor/tensor.prototype.pyi', + output_file=paddle_binary_dir + '/python/paddle/tensor/tensor.pyi', + ) + + shutil.copy( + paddle_binary_dir + '/python/paddle/tensor/tensor.pyi', + paddle_source_dir + '/python/paddle/tensor/tensor.pyi', + ) + print('-'*2, 'End Generate stub file tensor.pyi ... ') + +# generate stub file `tensor.pyi` +generate_tensor_stub('${PADDLE_BINARY_DIR}', '${PADDLE_SOURCE_DIR}') + + with redirect_stdout(): setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}', diff --git a/python/setup_cinn.py.in b/python/setup_cinn.py.in index 597e9b9187f6c..aa68da69a9f7c 100644 --- a/python/setup_cinn.py.in +++ b/python/setup_cinn.py.in @@ -220,10 +220,10 @@ if platform.system() == 'Linux' and platform.machine() == 'x86_64': cuda_major_version = version.split('.')[0] except Exception as e: raise ValueError("CUDA not found") - + install_requires.append(PADDLE_CUDA_INSTALL_REQUIREMENTS[cuda_major_version].split("|")) - - + + with redirect_stdout(): setup( diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt index 15cf679177709..113283aff3500 100644 --- a/python/unittest_py/requirements.txt +++ b/python/unittest_py/requirements.txt @@ -2,7 +2,7 @@ PyGithub coverage==5.5 pycrypto ; platform_system != "Windows" mock -gym==0.26.2 +gymnasium==0.29.1 pygame==2.5.2 hypothesis opencv-python<=4.2.0.32 @@ -19,3 +19,4 @@ wandb>=0.13 ; python_version<"3.12" xlsxwriter==3.0.9 xdoctest==1.1.1 ubelt==1.3.3 # just for xdoctest +mypy==1.10.0 diff --git a/r/Dockerfile b/r/Dockerfile index 2605e98f7684d..f2fa52082ba96 100644 --- a/r/Dockerfile +++ b/r/Dockerfile @@ -30,7 +30,7 @@ RUN echo "channels:" >> ~/.condarc && \ echo " simpleitk: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud" >> ~/.condarc && \ echo "show_channel_urls: true" >> ~/.condarc && \ echo "channel_priority: strict" >> ~/.condarc - + # Install R RUN conda install -y r -c conda-forge @@ -44,4 +44,4 @@ RUN Rscript -e 'install.packages("reticulate", repos="https://cran.rstudio.com") COPY example example RUN cd example && \ curl -O https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz && \ - tar -zxvf mobilenet-test-model-data.tar.gz && rm mobilenet-test-model-data.tar.gz + tar -zxvf mobilenet-test-model-data.tar.gz && rm mobilenet-test-model-data.tar.gz diff --git a/setup.py b/setup.py index aab6fe0bcfd82..d2f93f15249c5 100644 --- a/setup.py +++ b/setup.py @@ -1061,6 +1061,12 @@ def get_package_data_and_package_dir(): ('libphi' if os.name != 'nt' else 'phi') + ext_suffix ] shutil.copy(env_dict.get("PHI_LIB"), libs_path) + if env_dict.get("PHI_KERNEL_GPU_LIB"): + package_data['paddle.libs'] += [ + ('libphi_kernel_gpu' if os.name != 'nt' else 'phi_kernel_gpu') + + ext_suffix + ] + shutil.copy(env_dict.get("PHI_KERNEL_GPU_LIB"), libs_path) if env_dict.get("WITH_SHARED_IR") == "ON": package_data['paddle.libs'] += [ @@ -1796,6 +1802,25 @@ def submodules_not_exists_or_empty(folder): sys.exit(1) +def generate_tensor_stub(paddle_binary_dir, paddle_source_dir): + print('-' * 2, 'Generate stub file tensor.pyi ... ') + script_path = paddle_source_dir + '/tools/' + sys.path.append(script_path) + import gen_tensor_stub + + gen_tensor_stub.generate_stub_file( + input_file=paddle_source_dir + + '/python/paddle/tensor/tensor.prototype.pyi', + output_file=paddle_binary_dir + '/python/paddle/tensor/tensor.pyi', + ) + + shutil.copy( + paddle_binary_dir + '/python/paddle/tensor/tensor.pyi', + paddle_source_dir + '/python/paddle/tensor/tensor.pyi', + ) + print('-' * 2, 'End Generate stub file tensor.pyi ... ') + + def main(): # Parse the command line and check arguments before we proceed with building steps and setup parse_input_command(filter_args_list) @@ -1875,6 +1900,9 @@ def main(): package_data['paddle.libs'], ) + # generate stub file `tensor.pyi` + generate_tensor_stub(paddle_binary_dir, paddle_source_dir) + setup( name=package_name, version=paddle_version, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f732dad9e7f54..9fd22a6cf8b46 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -227,6 +227,10 @@ if(${len} GREATER_EQUAL 1) target_link_libraries(${test_name} $<TARGET_LINKER_FILE:${paddle_lib}>) if(WITH_SHARED_PHI) target_link_libraries(${test_name} $<TARGET_LINKER_FILE:phi>) + if(WITH_GPU OR WITH_ROCM) + target_link_libraries(${test_name} + $<TARGET_LINKER_FILE:phi_kernel_gpu>) + endif() endif() if(WITH_SHARED_IR) target_link_libraries(${test_name} $<TARGET_LINKER_FILE:pir>) diff --git a/test/deprecated/amp/test_collect_operator_stats.py b/test/amp/test_collect_operator_stats.py similarity index 94% rename from test/deprecated/amp/test_collect_operator_stats.py rename to test/amp/test_collect_operator_stats.py index 8b1d4f021a96d..80e592414e016 100644 --- a/test/deprecated/amp/test_collect_operator_stats.py +++ b/test/amp/test_collect_operator_stats.py @@ -157,11 +157,14 @@ class TestOpStatsStatic(unittest.TestCase): def test_while_op(self): paddle.enable_static() main_program, startup_program = build_while_model() - self.assertEqual(main_program.num_blocks, 2) - - paddle.static.amp.debugging.collect_operator_stats( - program=main_program, print_subblocks=True - ) + if paddle.framework.use_pir_api(): + self.assertEqual(main_program.num_blocks, 1) + else: + self.assertEqual(main_program.num_blocks, 2) + + paddle.static.amp.debugging.collect_operator_stats( + program=main_program, print_subblocks=True + ) paddle.disable_static() diff --git a/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh.py b/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh.py index 595f58b206193..8cf3f185dcbfc 100644 --- a/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh.py +++ b/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import unittest import paddle import paddle.distributed as dist @@ -183,14 +182,11 @@ def run_pr_to_rs_case(self): tgt_out_value = (self._mesh.process_ids, [-1, 1, -1], {}) def run_pr_to_ss_case(self): - # [Partial(), Replicate()] --> [Shard(0), Shard(1)] - # raise NotImplementedError - with unittest.TestCase().assertRaises(NotImplementedError): - self.create_program( - [self.BATCH_SIZE, self.SEQ_LEN, self.HIDDEN_SIZE], - [dist.Partial(dist.ReduceType.kRedSum), dist.Replicate()], - [dist.Shard(0), dist.Shard(1)], - ) + self.create_program( + [self.BATCH_SIZE, self.SEQ_LEN, self.HIDDEN_SIZE], + [dist.Partial(dist.ReduceType.kRedSum), dist.Replicate()], + [dist.Shard(0), dist.Shard(1)], + ) def run_ss_to_ss_case(self): # [Shard(0), Shard(1)] --> [Shard(1), Shard(0)] diff --git a/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh_cross_mesh.py b/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh_cross_mesh.py index 47bfb9a44df06..532426208c1ee 100644 --- a/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh_cross_mesh.py +++ b/test/auto_parallel/hybrid_strategy/pir_reshard_nd_mesh_cross_mesh.py @@ -102,7 +102,7 @@ def run_pp_to_rr_case(self): rank_id = dist.get_rank() if rank_id in self._mesh0.process_ids: - assert new_ops_name[-1] == "pd_op.send_v2" + assert new_ops_name[2] == "pd_op.send_v2" else: assert new_ops_name[2] == "pd_op.recv_v2" assert new_ops_name[-2] == "pd_op.c_allreduce_sum_" diff --git a/test/auto_parallel/pir/mlp_demo_3d.py b/test/auto_parallel/pir/mlp_demo_3d.py index 41ac0d25f682a..a743aa218e659 100644 --- a/test/auto_parallel/pir/mlp_demo_3d.py +++ b/test/auto_parallel/pir/mlp_demo_3d.py @@ -118,50 +118,43 @@ def test_to_static_program(self): rank = paddle.distributed.get_rank() ops = dist_program.global_block().ops op_names = [op.name() for op in ops] - if rank < 4: - std_ops = [ - 'pd_op.data', - 'builtin.parameter', - 'pd_op.data', - 'pd_op.relu', - 'pd_op.matmul', - 'pd_op.relu', - 'dist_op.reshard', - 'dist_op.reshard', - 'pd_op.relu_grad', - 'pd_op.matmul_grad', - 'dist_op.reshard', - 'dist_op.reshard', - 'pd_op.relu_grad', - 'pd_op.sgd_', - ] - else: - std_ops = [ - 'pd_op.data', - 'builtin.parameter', - 'pd_op.data', - 'dist_op.reshard', - 'pd_op.matmul', - 'dist_op.reshard', - 'pd_op.relu', - 'pd_op.subtract', - 'pd_op.square', - 'pd_op.mean', - 'builtin.shadow_output', - 'pd_op.full', - 'pd_op.full_like', - 'dist_op.reshard', - 'pd_op.mean_grad', - 'dist_op.reshard', - 'pd_op.square_grad', - 'pd_op.subtract_grad', - 'pd_op.relu_grad', - 'pd_op.matmul_grad', - 'dist_op.reshard', - 'dist_op.reshard', - 'pd_op.sgd_', - ] - + std_ops = [ + 'pd_op.data', + 'pd_op.data', + 'builtin.parameter', + 'builtin.parameter', + 'pd_op.data', + 'pd_op.data', + 'pd_op.relu', + 'pd_op.matmul', + 'pd_op.relu', + 'dist_op.reshard', + 'pd_op.matmul', + 'dist_op.reshard', + 'pd_op.relu', + 'pd_op.subtract', + 'pd_op.square', + 'pd_op.mean', + 'builtin.shadow_output', + 'pd_op.full', + 'pd_op.full_like', + 'dist_op.reshard', + 'pd_op.mean_grad', + 'dist_op.reshard', + 'pd_op.square_grad', + 'pd_op.subtract_grad', + 'pd_op.relu_grad', + 'pd_op.matmul_grad', + 'dist_op.reshard', + 'dist_op.reshard', + 'pd_op.relu_grad', + 'pd_op.matmul_grad', + 'dist_op.reshard', + 'dist_op.reshard', + 'pd_op.relu_grad', + 'pd_op.sgd_', + 'pd_op.sgd_', + ] assert op_names == std_ops def test_loss_value(self): diff --git a/test/auto_parallel/pir/pir_reshard_s_to_r.py b/test/auto_parallel/pir/pir_reshard_s_to_r.py index 933eb855730ea..1d4afcddf0d64 100644 --- a/test/auto_parallel/pir/pir_reshard_s_to_r.py +++ b/test/auto_parallel/pir/pir_reshard_s_to_r.py @@ -81,7 +81,7 @@ def run_pir_test_case(self): std_ops, ) elif self._shard == 1: - np.testing.assert_equal(main_program.num_ops(), 10) + np.testing.assert_equal(main_program.num_ops(), 8) std_ops = [ 'builtin.parameter', 'pd_op.data', @@ -89,9 +89,7 @@ def run_pir_test_case(self): 'pd_op.c_allgather', 'pd_op.full', 'pd_op.split_with_num', - 'builtin.split', 'pd_op.full', - 'builtin.combine', 'pd_op.concat', ] diff --git a/test/auto_parallel/pir/pir_reshard_s_to_r_cross_mesh.py b/test/auto_parallel/pir/pir_reshard_s_to_r_cross_mesh.py index 771fbf29491ba..6b2fab19e2dab 100644 --- a/test/auto_parallel/pir/pir_reshard_s_to_r_cross_mesh.py +++ b/test/auto_parallel/pir/pir_reshard_s_to_r_cross_mesh.py @@ -65,12 +65,14 @@ def run_pir_test_case(self): ops = [op.name() for op in main_program.global_block().ops] if self._shard == 0: if paddle.distributed.get_rank() == 0: - np.testing.assert_equal(main_program.num_ops(), 4) + np.testing.assert_equal(main_program.num_ops(), 6) std_ops = [ 'builtin.parameter', 'pd_op.data', 'dist_op.shard_tensor', 'pd_op.send_v2', + 'dist_op.reshard', + 'pd_op.c_allgather', ] np.testing.assert_equal( ops, @@ -91,19 +93,25 @@ def run_pir_test_case(self): ) elif self._shard == 1: if paddle.distributed.get_rank() == 0: - np.testing.assert_equal(main_program.num_ops(), 4) + np.testing.assert_equal(main_program.num_ops(), 10) std_ops = [ 'builtin.parameter', 'pd_op.data', 'dist_op.shard_tensor', 'pd_op.send_v2', + 'dist_op.reshard', + 'pd_op.c_allgather', + 'pd_op.full', + 'pd_op.split_with_num', + 'pd_op.full', + 'pd_op.concat', ] np.testing.assert_equal( ops, std_ops, ) elif paddle.distributed.get_rank() == 1: - np.testing.assert_equal(main_program.num_ops(), 11) + np.testing.assert_equal(main_program.num_ops(), 9) std_ops = [ 'builtin.parameter', 'pd_op.data', @@ -112,9 +120,7 @@ def run_pir_test_case(self): 'pd_op.c_allgather', 'pd_op.full', 'pd_op.split_with_num', - 'builtin.split', 'pd_op.full', - 'builtin.combine', 'pd_op.concat', ] diff --git a/test/auto_parallel/reshard_p_to_r_cross_mesh.py b/test/auto_parallel/reshard_p_to_r_cross_mesh.py index 6960530bf3bb3..605a245cd19db 100644 --- a/test/auto_parallel/reshard_p_to_r_cross_mesh.py +++ b/test/auto_parallel/reshard_p_to_r_cross_mesh.py @@ -90,12 +90,14 @@ def run_pir_static_test_case(self): ops = [op.name() for op in main_program.global_block().ops] if paddle.distributed.get_rank() == 0: - np.testing.assert_equal(main_program.num_ops(), 4) + np.testing.assert_equal(main_program.num_ops(), 6) std_ops = [ 'builtin.parameter', 'pd_op.data', 'dist_op.shard_tensor', 'pd_op.send_v2', + 'dist_op.reshard', + 'pd_op.c_allreduce_sum_', ] else: np.testing.assert_equal(main_program.num_ops(), 5) diff --git a/test/auto_parallel/spmd_rules/test_flatten_rule.py b/test/auto_parallel/spmd_rules/test_flatten_rule.py index 599b2ddf4bf95..9a9ae6b921842 100644 --- a/test/auto_parallel/spmd_rules/test_flatten_rule.py +++ b/test/auto_parallel/spmd_rules/test_flatten_rule.py @@ -38,7 +38,7 @@ def setUp(self): def test_flatten_infer_forward(self): # shape: [8, 16, 8, 24] --> [8, 16 * 8, 24] - # dims_mapping: [0, -1, -1, 1] --> [0, -1, -1, 1] [ 0, -1, 1] + # dims_mapping: [0, -1, -1, 1] --> [0, -1, -1, 1], ([0, -1, 1], [-1, 0, -1, -1, 1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([0, -1, -1, 1]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = 2 @@ -51,14 +51,17 @@ def test_flatten_infer_forward(self): infered_output_dist_attrs = result_dist_attrs[1] self.assertEqual(len(infered_input_dist_attrs), 1) - self.assertEqual(len(infered_output_dist_attrs), 1) + self.assertEqual(len(infered_output_dist_attrs), 2) self.assertEqual( infered_input_dist_attrs[0].dims_mapping, [0, -1, -1, 1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [0, -1, 1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, 0, -1, -1, 1] + ) # shape: [8, 16, 8, 24] --> [8, 16 * 8, 24] - # dims_mapping: [-1, 0, -1, 1] --> [-1, 0, -1, 1] [ -1, 0, 1] + # dims_mapping: [-1, 0, -1, 1] --> [-1, 0, -1, 1] ([ -1, 0, 1], [-1, -1, 0, -1, 1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([-1, 0, -1, 1]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = 2 @@ -74,9 +77,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [-1, 0, -1, 1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [-1, 0, 1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, -1, 0, -1, 1] + ) # shape: [8, 16, 8, 24] --> [8, 16 * 8, 24] - # dims_mapping: [-1, -1, 1, 0] --> [-1, -1, -1, 0] [ -1, -1, 0] + # dims_mapping: [-1, -1, 1, 0] --> [-1, -1, -1, 0] ([ -1, -1, 0], [-1, -1, -1, -1, 0] // xshape) self.x_dist_tensor_spec.set_dims_mapping([-1, -1, 1, 0]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = 2 @@ -92,9 +98,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [-1, -1, -1, 0] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [-1, -1, 0]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, -1, -1, -1, 0] + ) # shape: [8, 16, 8, 24] --> [8 * 16 * 8 * 24] - # dims_mapping: [-1, 0, 1, -1] --> [-1, -1, -1, -1] [ -1] + # dims_mapping: [-1, 0, 1, -1] --> [-1, -1, -1, -1] ([ -1], [-1, -1, -1, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([-1, 0, 1, -1]) self.attrs['start_axis'] = 0 self.attrs['stop_axis'] = -1 @@ -110,9 +119,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [-1, -1, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [-1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, -1, -1, -1, -1] + ) # shape: [8, 16, 8, 24] --> [8 * 16 * 8 * 24] - # dims_mapping: [0, -1, -1, 1] --> [0, -1, -1, -1] [ 0] + # dims_mapping: [0, -1, -1, 1] --> [0, -1, -1, -1] ([ 0], [-1, 0, -1, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([0, -1, -1, 1]) self.attrs['start_axis'] = 0 self.attrs['stop_axis'] = -1 @@ -128,9 +140,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [0, -1, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [0]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, 0, -1, -1, -1] + ) # shape: [8, 16, 8, 24] --> [8 * 16 * 8 * 24] - # dims_mapping: [1, 0, -1, -1] --> [1, -1, -1, -1] [ 1] + # dims_mapping: [1, 0, -1, -1] --> [1, -1, -1, -1] ([ 1], [-1, 1, -1, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([1, 0, -1, -1]) self.attrs['start_axis'] = 0 self.attrs['stop_axis'] = -1 @@ -146,9 +161,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [1, -1, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, 1, -1, -1, -1] + ) # shape: [8, 16, 8, 24] --> [8, 16 * 8 * 24] - # dims_mapping: [-1, -1, 0, 1] --> [-1, -1, -1, -1] [-1, -1] + # dims_mapping: [-1, -1, 0, 1] --> [-1, -1, -1, -1] ([-1, -1], [-1, -1, -1, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([-1, -1, 0, 1]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = -1 @@ -164,9 +182,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [-1, -1, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [-1, -1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, -1, -1, -1, -1] + ) # shape: [8, 16, 8, 24] --> [8, 16 * 8 * 24] - # dims_mapping: [-1, 0, -1, 1] --> [-1, 0, -1, -1] [-1, 0] + # dims_mapping: [-1, 0, -1, 1] --> [-1, 0, -1, -1] ([-1, 0], [-1, -1, 0, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([-1, 0, -1, 1]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = -1 @@ -182,9 +203,12 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [-1, 0, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [-1, 0]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, -1, 0, -1, -1] + ) # shape: [8, 16, 8, 24] --> [8, 16 * 8 * 24] - # dims_mapping: [0, 1, -1, -1] --> [0, 1, -1, -1] [0, 1] + # dims_mapping: [0, 1, -1, -1] --> [0, 1, -1, -1] ([0, 1], [-1, 0, 1, -1, -1] // xshape) self.x_dist_tensor_spec.set_dims_mapping([0, 1, -1, -1]) self.attrs['start_axis'] = 1 self.attrs['stop_axis'] = -1 @@ -200,6 +224,9 @@ def test_flatten_infer_forward(self): infered_input_dist_attrs[0].dims_mapping, [0, 1, -1, -1] ) self.assertEqual(infered_output_dist_attrs[0].dims_mapping, [0, 1]) + self.assertEqual( + infered_output_dist_attrs[1].dims_mapping, [-1, 0, 1, -1, -1] + ) def test_flatten_infer_backward(self): process_mesh = auto.ProcessMesh(mesh=[[0, 1, 2, 3], [4, 5, 6, 7]]) diff --git a/test/autograd/CMakeLists.txt b/test/autograd/CMakeLists.txt index 14336674c2ce0..82dc290ddbe3f 100644 --- a/test/autograd/CMakeLists.txt +++ b/test/autograd/CMakeLists.txt @@ -20,3 +20,4 @@ set_tests_properties(test_minimize PROPERTIES TIMEOUT 60) if(NOT WIN32) set_tests_properties(test_autograd_functional_prim PROPERTIES TIMEOUT 60) endif() +set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 160) diff --git a/test/deprecated/autograd/test_autograd_functional_static.py b/test/autograd/test_autograd_functional_static.py similarity index 99% rename from test/deprecated/autograd/test_autograd_functional_static.py rename to test/autograd/test_autograd_functional_static.py index 127cb93a4cbc6..c8089ce437d0d 100644 --- a/test/deprecated/autograd/test_autograd_functional_static.py +++ b/test/autograd/test_autograd_functional_static.py @@ -353,6 +353,9 @@ def run_test_by_entries(self, pd_f, np_f, inps, batch=False): np.testing.assert_allclose(pd_entry, np_entry, self.rtol, self.atol) def test_square(self): + if paddle.framework.use_pir_api(): + return + def pd_f(x): return paddle.multiply(x, x) diff --git a/test/cinn/ir/test_llir_schedule_fuse_split.py b/test/cinn/ir/test_llir_schedule_fuse_split.py index 612e3a36c59a1..91930d82a90d6 100644 --- a/test/cinn/ir/test_llir_schedule_fuse_split.py +++ b/test/cinn/ir/test_llir_schedule_fuse_split.py @@ -158,7 +158,7 @@ def elementwise_fuse_assign_loop( i_j_k_fused % 128, ], ) - Y[i1, j1, k1] = 2.0 * X[i1, j1, k1] + Y[i1, j1, k1] = X[i1, j1, k1] * 2.0 assert str(origin.elementwise_fuse_assign_loop) == str( expected.elementwise_fuse_assign_loop diff --git a/test/collective/fleet/test_c_comm_init_op.sh b/test/collective/fleet/test_c_comm_init_op.sh index 9b99e553d182b..dbf148856d435 100644 --- a/test/collective/fleet/test_c_comm_init_op.sh +++ b/test/collective/fleet/test_c_comm_init_op.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/fleet/test_fused_attention_pass_with_mp.sh b/test/collective/fleet/test_fused_attention_pass_with_mp.sh index 4b2b48cdc08df..777b6b106ee70 100644 --- a/test/collective/fleet/test_fused_attention_pass_with_mp.sh +++ b/test/collective/fleet/test_fused_attention_pass_with_mp.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/fleet/test_new_group.sh b/test/collective/fleet/test_new_group.sh index 4914183fb46f9..4ec46d22cdb48 100755 --- a/test/collective/fleet/test_new_group.sh +++ b/test/collective/fleet/test_new_group.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/multinode/multinode_dist_test.sh b/test/collective/multinode/multinode_dist_test.sh index 8ea1937f8318a..002b9eee612ec 100644 --- a/test/collective/multinode/multinode_dist_test.sh +++ b/test/collective/multinode/multinode_dist_test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -84,7 +84,7 @@ if [[ $exit_code -ne 0 ]]; then fi #display system context -for i in {1..2}; do +for i in {1..2}; do sleep 3 ps -aux netstat -anlp diff --git a/test/collective/test_mpi_comm.sh b/test/collective/test_mpi_comm.sh index 062d3c1ed8e5e..83ef86fd4713e 100644 --- a/test/collective/test_mpi_comm.sh +++ b/test/collective/test_mpi_comm.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/test_orthogonal_strategy.sh b/test/collective/test_orthogonal_strategy.sh index 6b4df2b124617..f65ac14842bb6 100644 --- a/test/collective/test_orthogonal_strategy.sh +++ b/test/collective/test_orthogonal_strategy.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/test_strategy_group.sh b/test/collective/test_strategy_group.sh index d6c3a0e79fa87..7dc334278d00a 100644 --- a/test/collective/test_strategy_group.sh +++ b/test/collective/test_strategy_group.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/collective/test_world_size_and_rank.sh b/test/collective/test_world_size_and_rank.sh index c559c4bd26cff..e6762009083bc 100644 --- a/test/collective/test_world_size_and_rank.sh +++ b/test/collective/test_world_size_and_rank.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/contrib/CMakeLists.txt b/test/contrib/CMakeLists.txt index e723b8abaf396..02219f03129b8 100644 --- a/test/contrib/CMakeLists.txt +++ b/test/contrib/CMakeLists.txt @@ -20,3 +20,4 @@ py_test_modules( FLAGS_conv_workspace_size_limit=1000) set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120) +set_tests_properties(test_image_classification_fp16 PROPERTIES TIMEOUT 120) diff --git a/test/contrib/test_bf16_utils.py b/test/contrib/test_bf16_utils.py new file mode 100644 index 0000000000000..ce542e9603dad --- /dev/null +++ b/test/contrib/test_bf16_utils.py @@ -0,0 +1,111 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import unittest + +import paddle +from paddle.static import amp + +paddle.enable_static() + + +class AMPTest(unittest.TestCase): + def setUp(self): + self.bf16_list = copy.copy(amp.bf16.amp_lists.bf16_list) + self.fp32_list = copy.copy(amp.bf16.amp_lists.fp32_list) + self.gray_list = copy.copy(amp.bf16.amp_lists.gray_list) + self.amp_lists_ = None + + def tearDown(self): + self.assertEqual(self.amp_lists_.bf16_list, self.bf16_list) + self.assertEqual(self.amp_lists_.fp32_list, self.fp32_list) + self.assertEqual(self.amp_lists_.gray_list, self.gray_list) + + def test_amp_lists(self): + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16() + + def test_amp_lists_1(self): + # 1. w={'exp}, b=None + self.bf16_list.add('exp') + self.fp32_list.remove('exp') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'exp'}) + + def test_amp_lists_2(self): + # 2. w={'tanh'}, b=None + self.fp32_list.remove('tan') + self.bf16_list.add('tan') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'tan'}) + + def test_amp_lists_3(self): + # 3. w={'lstm'}, b=None + self.bf16_list.add('lstm') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'lstm'}) + + def test_amp_lists_4(self): + # 4. w=None, b={'matmul_v2'} + self.bf16_list.remove('matmul_v2') + self.fp32_list.add('matmul_v2') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'matmul_v2'} + ) + + def test_amp_lists_5(self): + # 5. w=None, b={'matmul_v2'} + self.fp32_list.add('matmul_v2') + self.bf16_list.remove('matmul_v2') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'matmul_v2'} + ) + + def test_amp_lists_6(self): + # 6. w=None, b={'lstm'} + self.fp32_list.add('lstm') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'lstm'} + ) + + def test_amp_lists_7(self): + self.fp32_list.add('reshape2') + self.gray_list.remove('reshape2') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( + custom_fp32_list={'reshape2'} + ) + + def test_amp_list_8(self): + self.bf16_list.add('reshape2') + self.gray_list.remove('reshape2') + + self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( + custom_bf16_list={'reshape2'} + ) + + +class AMPTest2(unittest.TestCase): + def test_amp_lists_(self): + # 7. w={'lstm'} b={'lstm'} + # raise ValueError + self.assertRaises( + ValueError, amp.bf16.AutoMixedPrecisionListsBF16, {'lstm'}, {'lstm'} + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/contrib/test_image_classification_fp16.py b/test/contrib/test_image_classification_fp16.py similarity index 67% rename from test/deprecated/contrib/test_image_classification_fp16.py rename to test/contrib/test_image_classification_fp16.py index c3cfa834a4ed2..d927b68eff9b3 100644 --- a/test/deprecated/contrib/test_image_classification_fp16.py +++ b/test/contrib/test_image_classification_fp16.py @@ -23,30 +23,130 @@ import numpy # TODO: remove sys.path.append -sys.path.append("../../legacy_test") +sys.path.append("../legacy_test") import nets import paddle from paddle import base +from paddle.framework import in_pir_mode +from paddle.nn import Layer +from paddle.pir_utils import test_with_pir_api from paddle.static.amp import decorate paddle.enable_static() +def img_conv_group_pir( + input, + in_channels, + out_channels, + conv_num_filter, + kernel_size, + pool_size, + pool_stride=1, + pool_padding=0, + pool_type='max', + global_pooling=False, + conv_with_batchnorm=False, + conv_batchnorm_drop_rate=0.0, + conv_stride=1, + conv_padding=1, + conv_filter_size=3, + conv_dilation=1, + conv_groups=1, + param_attr=None, + bias_attr=None, + conv_act=None, + use_cudnn=True, +): + tmp = input + assert isinstance(conv_num_filter, (list, tuple)) + + def __extend_list__(obj): + if not hasattr(obj, '__len__'): + return [obj] * len(conv_num_filter) + else: + assert len(obj) == len(conv_num_filter) + return obj + + conv_padding = __extend_list__(conv_padding) + conv_filter_size = __extend_list__(conv_filter_size) + param_attr = __extend_list__(param_attr) + conv_with_batchnorm = __extend_list__(conv_with_batchnorm) + conv_batchnorm_drop_rate = __extend_list__(conv_batchnorm_drop_rate) + + for i in range(len(conv_num_filter)): + local_conv_act = conv_act + if conv_with_batchnorm[i]: + local_conv_act = None + + conv = paddle.nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride=conv_stride, + padding=conv_padding[i], + dilation=conv_dilation, + groups=conv_groups, + bias_attr=bias_attr, + ) + conv_out = conv(input) + + if conv_with_batchnorm[i]: + batch_norm = paddle.nn.BatchNorm(in_channels, act=conv_act) + tmp = batch_norm(tmp) + drop_rate = conv_batchnorm_drop_rate[i] + if abs(drop_rate) > 1e-5: + tmp = paddle.nn.functional.dropout(x=tmp, p=drop_rate) + + if pool_type == 'max': + pool_out = paddle.nn.functional.max_pool2d( + x=tmp, + kernel_size=pool_size, + stride=pool_stride, + ) + else: + pool_out = paddle.nn.functional.avg_pool2d( + x=tmp, + kernel_size=pool_size, + stride=pool_stride, + ) + return pool_out + + def resnet_cifar10(input, depth=32): def conv_bn_layer( - input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False + input, + ch_out, + filter_size, + stride, + padding, + act='relu', + bias_attr=False, ): - tmp = paddle.static.nn.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr, - ) - return paddle.static.nn.batch_norm(input=tmp, act=act) + if in_pir_mode(): + conv = paddle.nn.Conv2D( + in_channels=input.shape[1], + out_channels=ch_out, + kernel_size=filter_size, + stride=stride, + padding=padding, + bias_attr=bias_attr, + ) + tmp = conv(input) + bn = paddle.nn.BatchNorm(tmp.shape[1], act=act) + return bn(tmp) + else: + tmp = paddle.static.nn.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr, + ) + return paddle.static.nn.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride): if ch_in != ch_out: @@ -80,17 +180,32 @@ def layer_warp(block_func, input, ch_in, ch_out, count, stride): def vgg16_bn_drop(input): def conv_block(input, num_filter, groups, dropouts): - return nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max', - ) + if in_pir_mode(): + return img_conv_group_pir( + input, + in_channels=3, + out_channels=num_filter, + conv_num_filter=[num_filter] * groups, + kernel_size=3, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='max', + conv_act='relu', + conv_with_batchnorm=True, + ) + else: + return nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max', + ) conv1 = conv_block(input, 64, 2, [0.3, 0]) conv2 = conv_block(conv1, 128, 2, [0.4, 0]) @@ -100,7 +215,11 @@ def conv_block(input, num_filter, groups, dropouts): drop = paddle.nn.functional.dropout(x=conv5, p=0.5) fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None) - bn = paddle.static.nn.batch_norm(input=fc1, act='relu') + if in_pir_mode(): + batch_norm = paddle.nn.BatchNorm(4096) + bn = batch_norm(fc1) + else: + bn = paddle.static.nn.batch_norm(input=fc1, act='relu') drop2 = paddle.nn.functional.dropout(x=bn, p=0.5) fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None) return fc2 @@ -110,8 +229,8 @@ def train(net_type, use_cuda, save_dirname, is_local): classdim = 10 data_shape = [3, 32, 32] - train_program = base.Program() - startup_prog = base.Program() + train_program = paddle.static.Program() + startup_prog = paddle.static.Program() paddle.seed(123) with base.program_guard(train_program, startup_prog): images = paddle.static.data( @@ -128,31 +247,85 @@ def train(net_type, use_cuda, save_dirname, is_local): else: raise ValueError("%s network is not supported" % net_type) - logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax") - cost, predict = paddle.nn.functional.softmax_with_cross_entropy( - logits, label, return_softmax=True - ) - avg_cost = paddle.mean(cost) - acc = paddle.static.accuracy(input=predict, label=label) + optimizer = paddle.optimizer.Lamb(learning_rate=0.001) - # Test program - test_program = train_program.clone(for_test=True) + if in_pir_mode(): - optimizer = paddle.optimizer.Lamb(learning_rate=0.001) + class layer(Layer): + def __init__(self, classdim, act): + super().__init__() + self.classdim = classdim + self.act = act - amp_lists = paddle.static.amp.AutoMixedPrecisionLists( - custom_black_varnames={"loss", "conv2d_0.w_0"} - ) - mp_optimizer = decorate( - optimizer=optimizer, - amp_lists=amp_lists, - init_loss_scaling=8.0, - use_dynamic_loss_scaling=True, - ) + def forward(self, x): + logits = paddle.static.nn.fc( + x=x, size=self.classdim, activation=self.act + ) + ( + cost, + predict, + ) = paddle.nn.functional.softmax_with_cross_entropy( + logits, label, return_softmax=True + ) + return cost, predict + + model = layer(classdim, "softmax") + model, optimizer = paddle.amp.decorate( + models=model, + optimizers=optimizer, + level="O2", + dtype='float16', + ) + scaler = paddle.amp.GradScaler( + init_loss_scaling=8.0, use_dynamic_loss_scaling=True + ) + + with paddle.amp.auto_cast( + enable=True, + level='O2', + dtype='float16', + custom_black_list={'transpose2', 'concat'}, + use_promote=True, + ): + cost, predict = model(net) + avg_cost = paddle.mean(cost) + acc = paddle.static.accuracy(input=predict, label=label) + # Test program + value_map = paddle.pir.IrMapping() + test_program = train_program.clone(value_map) + fetch_list = [] + fetch_list.append(value_map.look_up(avg_cost)) + fetch_list.append(value_map.look_up(acc)) + + scaled = scaler.scale(avg_cost) + scaler.minimize(optimizer, scaled, startup_program=startup_prog) + loss_scaling = optimizer.get_loss_scaling() + scaled_loss = optimizer.get_scaled_loss() + else: + logits = paddle.static.nn.fc( + x=net, size=classdim, activation="softmax" + ) + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( + logits, label, return_softmax=True + ) + avg_cost = paddle.mean(cost) + acc = paddle.static.accuracy(input=predict, label=label) + # Test program + test_program = train_program.clone(for_test=True) + fetch_list = [avg_cost, acc] + amp_lists = paddle.static.amp.AutoMixedPrecisionLists( + custom_black_varnames={"loss", "conv2d_0.w_0"} + ) + mp_optimizer = decorate( + optimizer=optimizer, + amp_lists=amp_lists, + init_loss_scaling=8.0, + use_dynamic_loss_scaling=True, + ) - mp_optimizer.minimize(avg_cost) - loss_scaling = mp_optimizer.get_loss_scaling() - scaled_loss = mp_optimizer.get_scaled_loss() + mp_optimizer.minimize(avg_cost) + loss_scaling = mp_optimizer.get_loss_scaling() + scaled_loss = mp_optimizer.get_scaled_loss() BATCH_SIZE = 128 PASS_NUM = 1 @@ -190,7 +363,7 @@ def train_loop(main_program): loss_t, acc_t = exe.run( program=test_program, feed=feeder.feed(test_data), - fetch_list=[avg_cost, acc], + fetch_list=fetch_list, ) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") @@ -456,10 +629,12 @@ def test_amp_lists_7(self): {'lstm'}, ) + @test_with_pir_api def test_vgg_cuda(self): with self.scope_prog_guard(): self.main('vgg', use_cuda=True) + @test_with_pir_api def test_resnet_cuda(self): with self.scope_prog_guard(): self.main('resnet', use_cuda=True) @@ -474,44 +649,5 @@ def scope_prog_guard(self): yield -class TestAmpWithNonIterableDataLoader(unittest.TestCase): - def decorate_with_data_loader(self): - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - with paddle.base.unique_name.guard(): - image = paddle.static.data( - name='image', shape=[-1, 3, 224, 224], dtype='float32' - ) - label = paddle.static.data( - name='label', shape=[-1, 1], dtype='int64' - ) - - net = vgg16_bn_drop(image) - logits = paddle.static.nn.fc( - x=net, size=10, activation="softmax" - ) - cost, predict = paddle.nn.functional.softmax_with_cross_entropy( - logits, label, return_softmax=True - ) - avg_cost = paddle.mean(cost) - - optimizer = paddle.optimizer.Lamb(learning_rate=0.001) - amp_lists = paddle.static.amp.AutoMixedPrecisionLists( - custom_black_varnames={"loss", "conv2d_0.w_0"} - ) - mp_optimizer = decorate( - optimizer=optimizer, - amp_lists=amp_lists, - init_loss_scaling=8.0, - use_dynamic_loss_scaling=True, - ) - - mp_optimizer.minimize(avg_cost) - - def test_non_iterable_dataloader(self): - self.decorate_with_data_loader() - - if __name__ == '__main__': unittest.main() diff --git a/test/cpp/auto_parallel/spmd_rule_test.cc b/test/cpp/auto_parallel/spmd_rule_test.cc index 0ae0b8ed3eaf1..8395588b5ce58 100644 --- a/test/cpp/auto_parallel/spmd_rule_test.cc +++ b/test/cpp/auto_parallel/spmd_rule_test.cc @@ -1853,6 +1853,71 @@ TEST(CumSumGradInferSpmd, Ctor) { std::vector<int64_t>({-1, -1, -1})); } +TEST(Flatten, Ctor) { + std::vector<int64_t> mesh_shape = {2, 2}; + std::vector<int64_t> process_ids = {0, 1, 2, 3}; + std::vector<std::string> dim_names = {"x", "y"}; + ProcessMesh process_mesh(mesh_shape, process_ids, dim_names); + + auto build_input = [&](const std::vector<int64_t>& shape, + const std::vector<int64_t>& dim_mapping) { + auto t_dist_attr = TensorDistAttr(); + t_dist_attr.set_process_mesh(process_mesh); + t_dist_attr.set_dims_mapping(dim_mapping); + t_dist_attr.set_dynamic_dims(std::vector<bool>(shape.size(), false)); + auto input = + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); + return input; + }; + + // [b, h/ph, w/pw, c, ph, pw]; dp + auto input1 = build_input({4, 16, 16, 4, 2, 2}, {0, -1, -1, -1, -1, -1}); + // [b, h/ph, w/pw, c, ph, pw] => [b, h/ph, w/pw, hidden_size] + auto spmd1 = FlattenInferSpmd(input1, -3, -1); + EXPECT_EQ(spmd1.first.size(), static_cast<size_t>(1)); + EXPECT_EQ(spmd1.second.size(), static_cast<size_t>(2)); + check_dim_mapping(spmd1.first[0], {0, -1, -1, -1, -1, -1}); + check_dim_mapping(spmd1.second[0], {0, -1, -1, -1}); + check_dim_mapping(spmd1.second[1], {-1, 0, -1, -1, -1, -1, -1}); // x_shape + + // [b, h/ph, w/pw, c, ph, pw]; dp, mp + auto input2 = build_input({4, 16, 16, 4, 2, 2}, {-1, 0, -1, 1, -1, -1}); + auto spmd2 = FlattenInferSpmd(input2, 1, 4); + EXPECT_EQ(spmd2.first.size(), static_cast<size_t>(1)); + EXPECT_EQ(spmd2.second.size(), static_cast<size_t>(2)); + check_dim_mapping(spmd2.first[0], {-1, 0, -1, -1, -1, -1}); + check_dim_mapping(spmd2.second[0], {-1, 0, -1}); + check_dim_mapping(spmd2.second[1], {-1, -1, 0, -1, -1, -1, -1}); // x_shape + + // [b, s, nh, h/nh]; dp , mp + auto input3 = build_input({2, 1024, 32, 32}, {0, -1, 1, -1}); + // [b, s, nh, h/nh] => [b, s, h] + auto spmd3 = FlattenInferSpmd(input3, 2, 3); + EXPECT_EQ(spmd3.first.size(), static_cast<size_t>(1)); + EXPECT_EQ(spmd3.second.size(), static_cast<size_t>(2)); + check_dim_mapping(spmd3.first[0], {0, -1, 1, -1}); + check_dim_mapping(spmd3.second[0], {0, -1, 1}); + check_dim_mapping(spmd3.second[1], {-1, 0, -1, 1, -1}); // x_shape + + // [b, c, d, h, w]; dp, mp + auto input4 = build_input({4, 16, 16, 4, 16}, {-1, -1, 0, 1, -1}); + auto spmd4 = FlattenInferSpmd(input4, 1, 4); + EXPECT_EQ(spmd4.first.size(), static_cast<size_t>(1)); + EXPECT_EQ(spmd4.second.size(), static_cast<size_t>(2)); + check_dim_mapping(spmd4.first[0], {-1, -1, -1, -1, -1}); + check_dim_mapping(spmd4.second[0], {-1, -1}); + check_dim_mapping(spmd4.second[1], {-1, -1, -1, -1, -1, -1}); // x_shape + + auto out_grad = build_input({2, 1024, 1024}, {0, -1, 1}); + auto xshape = build_input({0, 2, 1024, 4, 1024 / 4}, {-1, 0, -1, 1, -1}); + auto spmd_grad = FlattenGradInferSpmd(xshape, out_grad); + EXPECT_EQ(spmd_grad.first.size(), static_cast<size_t>(2)); + EXPECT_EQ(spmd_grad.second.size(), static_cast<size_t>(1)); + check_dim_mapping(spmd_grad.first[0], {-1, 0, -1, 1, -1}); + check_dim_mapping(spmd_grad.first[1], {0, -1, 1}); + check_dim_mapping(spmd_grad.second[0], {0, -1, 1, -1}); +} + } // namespace auto_parallel } // namespace distributed } // namespace paddle diff --git a/test/cpp/eager/task_tests/CMakeLists.txt b/test/cpp/eager/task_tests/CMakeLists.txt index 39a11d9582ae3..393421be711f0 100755 --- a/test/cpp/eager/task_tests/CMakeLists.txt +++ b/test/cpp/eager/task_tests/CMakeLists.txt @@ -4,7 +4,7 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) paddle_test(test_egr_task_hook SRCS hook_test.cc) paddle_test(test_egr_task_backward SRCS backward_test.cc) paddle_test(test_egr_task_grad SRCS grad_test.cc) - paddle_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc) + paddle_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS phi) paddle_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc) paddle_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc) paddle_test(test_egr_task_autocodegen SRCS generated_test.cc) diff --git a/test/cpp/fluid/benchmark/CMakeLists.txt b/test/cpp/fluid/benchmark/CMakeLists.txt index 0b14b812af9f9..cb8e47a0b305f 100644 --- a/test/cpp/fluid/benchmark/CMakeLists.txt +++ b/test/cpp/fluid/benchmark/CMakeLists.txt @@ -1,4 +1,4 @@ -paddle_test(op_tester SRCS op_tester.cc DEPS common) +paddle_test(op_tester SRCS op_tester.cc DEPS common phi) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/test/cpp/fluid/memory/malloc_test.cu b/test/cpp/fluid/memory/malloc_test.cu index 5728363ac8877..a86d3c4988b7b 100644 --- a/test/cpp/fluid/memory/malloc_test.cu +++ b/test/cpp/fluid/memory/malloc_test.cu @@ -37,8 +37,6 @@ const int NUM_STREAMS = 8; const int N = 2; const float DELTA = 1e-1; -using CudaDevCtxVec = std::vector<std::unique_ptr<phi::GPUContext>>; - __global__ void kernel(float *x, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; for (int i = tid; i < n; i += blockDim.x * gridDim.x) { @@ -46,51 +44,58 @@ __global__ void kernel(float *x, int n) { } } -void CheckKernelOutput(float *x, int n) { +void CheckKernelOutput(const AllocationPtr &x, int n) { auto host_x = std::unique_ptr<float[]>(new float[n]); for (int i = 0; i < n; ++i) { #ifdef PADDLE_WITH_HIP - EXPECT_TRUE( - hipSuccess == - hipMemcpy(host_x.get(), x, n * sizeof(float), hipMemcpyDeviceToHost)); + EXPECT_TRUE(hipSuccess == hipMemcpy(host_x.get(), + (x->ptr()), + n * sizeof(float), + hipMemcpyDeviceToHost)); #else - EXPECT_TRUE( - cudaSuccess == - cudaMemcpy(host_x.get(), x, n * sizeof(float), cudaMemcpyDeviceToHost)); + EXPECT_TRUE(cudaSuccess == cudaMemcpy(host_x.get(), + (x->ptr()), + n * sizeof(float), + cudaMemcpyDeviceToHost)); #endif EXPECT_GE(host_x[i] + DELTA, 3.14159f * i); EXPECT_LE(host_x[i] - DELTA, 3.14159f * i); } } -void MultiStreamCompute(float **data, - float **second_data, - const phi::GPUContext &ctx) { +void MultiStreamCompute(const AllocationPtr &first_data, + const AllocationPtr &second_data, + phi::GPUContext *ctx) { // multi-streams - AllocationPtr allocation_ptr = - Alloc(ctx.GetPlace(), - N * sizeof(float), - phi::Stream(reinterpret_cast<phi::StreamId>(ctx.stream()))); - EXPECT_GE(allocation_ptr->size(), N * sizeof(float)); - *data = reinterpret_cast<float *>(allocation_ptr->ptr()); + EXPECT_GE(first_data->size(), N * sizeof(float)); + #ifdef PADDLE_WITH_HIP - hipLaunchKernelGGL((kernel), dim3(1), dim3(64), 0, ctx.stream(), *data, N); + hipLaunchKernelGGL((kernel), + dim3(1), + dim3(64), + 0, + ctx->stream(), + reinterpret_cast<float *>(first_data->ptr()), + N); #else - kernel<<<1, 64, 0, ctx.stream()>>>(*data, N); + kernel<<<1, 64, 0, ctx->stream()>>>( + reinterpret_cast<float *>(first_data->ptr()), N); #endif + EXPECT_GE(second_data->size(), N * sizeof(float)); // allocate and compute on same stream again - allocation_ptr = - Alloc(ctx.GetPlace(), - N * sizeof(float), - phi::Stream(reinterpret_cast<phi::StreamId>(ctx.stream()))); - EXPECT_GE(allocation_ptr->size(), N * sizeof(float)); - *second_data = reinterpret_cast<float *>(allocation_ptr->ptr()); + #ifdef PADDLE_WITH_HIP - hipLaunchKernelGGL( - (kernel), dim3(1), dim3(64), 0, ctx.stream(), *second_data, N); + hipLaunchKernelGGL((kernel), + dim3(1), + dim3(64), + 0, + ctx->stream(), + reinterpret_cast<float *>(second_data->ptr()), + N); #else - kernel<<<1, 64, 0, ctx.stream()>>>(*second_data, N); + kernel<<<1, 64, 0, ctx->stream()>>>( + reinterpret_cast<float *>(second_data->ptr()), N); #endif } @@ -100,23 +105,26 @@ TEST(Malloc, GPUContextMultiStream) { AllocationPtr main_stream_alloc_ptr = Alloc(place, N * sizeof(float)); EXPECT_GE(main_stream_alloc_ptr->size(), N * sizeof(float)); - float *main_stream_data = - reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()); - float *data[NUM_STREAMS]; - float *second_data[NUM_STREAMS]; - CudaDevCtxVec dev_ctx; + AllocationPtr first_data[NUM_STREAMS], second_data[NUM_STREAMS]; + std::vector<phi::GPUContext *> dev_ctx; // default stream #ifdef PADDLE_WITH_HIP - hipLaunchKernelGGL((kernel), dim3(1), dim3(64), 0, 0, main_stream_data, N); + hipLaunchKernelGGL((kernel), + dim3(1), + dim3(64), + 0, + 0, + reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()), + N); #else - kernel<<<1, 64>>>(main_stream_data, N); + kernel<<<1, 64>>>(reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()), N); #endif main_stream_alloc_ptr.reset(); for (int i = 0; i < NUM_STREAMS; ++i) { - auto ctx = std::make_unique<phi::GPUContext>(place); + auto ctx = new phi::GPUContext(place); ctx->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(place, ctx->stream()) .get()); @@ -133,8 +141,16 @@ TEST(Malloc, GPUContextMultiStream) { .GetAllocator(paddle::platform::CUDAPinnedPlace()) .get()); ctx->PartialInitWithAllocator(); - dev_ctx.emplace_back(std::move(ctx)); - MultiStreamCompute(&data[i], &second_data[i], *dev_ctx[i]); + dev_ctx.emplace_back(ctx); + first_data[i] = + Alloc(ctx->GetPlace(), + N * sizeof(float), + phi::Stream(reinterpret_cast<phi::StreamId>(ctx->stream()))); + second_data[i] = + Alloc(ctx->GetPlace(), + N * sizeof(float), + phi::Stream(reinterpret_cast<phi::StreamId>(ctx->stream()))); + MultiStreamCompute(first_data[i], second_data[i], ctx); } #ifdef PADDLE_WITH_HIP @@ -142,10 +158,21 @@ TEST(Malloc, GPUContextMultiStream) { #else EXPECT_TRUE(cudaSuccess == cudaDeviceSynchronize()); #endif + for (int i = 0; i < NUM_STREAMS; ++i) { - CheckKernelOutput(data[i], N); + CheckKernelOutput(first_data[i], N); CheckKernelOutput(second_data[i], N); } + + // For cudaMallocAsyncAllocator, cudaFreeAsync is executed on _malloc_stream, + // which is the stream passed at Alloc(). Therefore, the stream must be + // postponed until the the memory is freed. Otherwise, the stream would be + // destroyed before the cudaFreeAsync is called. + for (int i = 0; i < NUM_STREAMS; i++) { + first_data[i].release(); + second_data[i].release(); + delete dev_ctx[i]; + } } TEST(Malloc, GPUContextMultiThreadMultiStream) { @@ -154,24 +181,27 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) { AllocationPtr main_stream_alloc_ptr = Alloc(place, N * sizeof(float)); EXPECT_GE(main_stream_alloc_ptr->size(), N * sizeof(float)); - float *main_stream_data = - reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()); - float *data[NUM_STREAMS]; - float *second_data[NUM_STREAMS]; - CudaDevCtxVec dev_ctx; - std::vector<std::thread> threads; + AllocationPtr first_data[NUM_STREAMS], second_data[NUM_STREAMS]; + std::vector<phi::GPUContext *> dev_ctx; // default stream #ifdef PADDLE_WITH_HIP - hipLaunchKernelGGL((kernel), dim3(1), dim3(64), 0, 0, main_stream_data, N); + hipLaunchKernelGGL((kernel), + dim3(1), + dim3(64), + 0, + 0, + reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()), + N); #else - kernel<<<1, 64>>>(main_stream_data, N); + kernel<<<1, 64>>>(reinterpret_cast<float *>(main_stream_alloc_ptr->ptr()), N); #endif main_stream_alloc_ptr.reset(); + std::vector<std::thread> threads; for (int i = 0; i < NUM_STREAMS; ++i) { - auto ctx = std::make_unique<phi::GPUContext>(place); + auto ctx = new phi::GPUContext(place); ctx->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(place, ctx->stream()) .get()); @@ -192,23 +222,43 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) { .GetAllocator(paddle::platform::CUDAPinnedPlace()) .get()); ctx->PartialInitWithAllocator(); - dev_ctx.emplace_back(std::move(ctx)); - threads.emplace_back( - MultiStreamCompute, &data[i], &second_data[i], std::cref(*dev_ctx[i])); + dev_ctx.emplace_back(ctx); + first_data[i] = + Alloc(ctx->GetPlace(), + N * sizeof(float), + phi::Stream(reinterpret_cast<phi::StreamId>(ctx->stream()))); + second_data[i] = + Alloc(ctx->GetPlace(), + N * sizeof(float), + phi::Stream(reinterpret_cast<phi::StreamId>(ctx->stream()))); + threads.emplace_back(MultiStreamCompute, + std::ref(first_data[i]), + std::ref(second_data[i]), + ctx); } for (int i = 0; i < NUM_STREAMS; ++i) { threads[i].join(); } + #ifdef PADDLE_WITH_HIP EXPECT_TRUE(hipSuccess == hipDeviceSynchronize()); #else EXPECT_TRUE(cudaSuccess == cudaDeviceSynchronize()); #endif + for (int i = 0; i < NUM_STREAMS; ++i) { - CheckKernelOutput(data[i], N); + CheckKernelOutput(first_data[i], N); CheckKernelOutput(second_data[i], N); } + + // There are dependencies on the pointer deconstructing. Manually + // release the pointers would resolve the conflict. + for (int i = 0; i < NUM_STREAMS; i++) { + first_data[i].release(); + second_data[i].release(); + delete dev_ctx[i]; + } } TEST(Malloc, AllocZero) { diff --git a/test/cpp/inference/infer_ut/run.sh b/test/cpp/inference/infer_ut/run.sh index 88cdb3bacc1e5..91fd69d5e76f6 100755 --- a/test/cpp/inference/infer_ut/run.sh +++ b/test/cpp/inference/infer_ut/run.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/cpp/pir/cinn/compilation_task_test.cc b/test/cpp/pir/cinn/compilation_task_test.cc index 254ab7c4baf8a..3fbe4ed4ba60b 100644 --- a/test/cpp/pir/cinn/compilation_task_test.cc +++ b/test/cpp/pir/cinn/compilation_task_test.cc @@ -24,6 +24,7 @@ #include "paddle/cinn/hlir/dialect/operator/ir/op_attribute.h" #include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" #include "paddle/cinn/hlir/framework/pir/compilation_task.h" +#include "paddle/cinn/hlir/framework/pir/utils.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" #include "paddle/cinn/utils/data_util.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" @@ -34,6 +35,7 @@ PD_DECLARE_bool(cinn_bucket_compile); +using cinn::hlir::framework::pir::CompatibleInfo; using cinn::hlir::framework::pir::OpLoweringGroup; using cinn::hlir::framework::pir::OpLoweringGroupPtr; @@ -50,8 +52,11 @@ ProgramInfo BuildProgram(std::vector<int64_t> input_shape) { input_shape, value_one, phi::DataType::FLOAT32, phi::GPUPlace()); std::vector<OpLoweringGroupPtr> groups; + const std::string fn_name = CompatibleInfo::GroupOpsName( + std::initializer_list<::pir::Operation*>({full_op_x.operation()})); groups.emplace_back(std::make_shared<OpLoweringGroup>( - std::initializer_list<::pir::Operation*>({full_op_x.operation()}))); + std::initializer_list<::pir::Operation*>({full_op_x.operation()}), + fn_name)); groups.back()->mut_output_ops().insert(full_op_x.operation()); return {program, groups}; diff --git a/test/cpp/pir/cinn/file_tile_config_test.cc b/test/cpp/pir/cinn/file_tile_config_test.cc index 3cdcc7a390bbe..d863baca924f7 100644 --- a/test/cpp/pir/cinn/file_tile_config_test.cc +++ b/test/cpp/pir/cinn/file_tile_config_test.cc @@ -39,7 +39,7 @@ TEST(ConfigSearcher, TestReduceDemo) { constexpr int kMaxThreadsPerBlock = 1024; // Step 1: Construct iter space and tile config. - cinn::ir::search::IterSpace iter_space; + cinn::ir::BucketInfo bucket_info; int s_dimension_lower = 32; int s_dimension_upper = 128; auto s_dimension_type = "S"; @@ -49,61 +49,52 @@ TEST(ConfigSearcher, TestReduceDemo) { auto r_dimension_type = "R"; auto r_dimension_is_dynamic = true; - iter_space.space.push_back(cinn::ir::search::IterSpace::Dimension{ - s_dimension_lower, - s_dimension_upper, - s_dimension_type, - s_dimension_is_dynamic, - std::vector<double>(128 - 32, 1.0)}); - iter_space.space.push_back( - cinn::ir::search::IterSpace::Dimension{r_dimension_lower, - r_dimension_upper, - r_dimension_type, - r_dimension_is_dynamic, - std::vector<double>(1, 1.0)}); - cinn::ir::BucketInfo bucket_info; - bucket_info.sp_lower_bound = iter_space.space[0].lower_bound; - bucket_info.sp_upper_bound = iter_space.space[0].upper_bound; - bucket_info.rb_lower_bound = iter_space.space[1].lower_bound; - bucket_info.rb_upper_bound = iter_space.space[1].upper_bound; + bucket_info.space.push_back( + cinn::ir::BucketInfo::Dimension{s_dimension_lower, + s_dimension_upper, + s_dimension_type, + s_dimension_is_dynamic, + std::vector<double>(128 - 32, 1.0)}); + bucket_info.space.push_back( + cinn::ir::BucketInfo::Dimension{r_dimension_lower, + r_dimension_upper, + r_dimension_type, + r_dimension_is_dynamic, + std::vector<double>(1, 1.0)}); + cinn::ir::ScheduleConfig::TileConfig tile_config; tile_config.spatial_inner_num = 32; tile_config.warp_num = 32; tile_config.tree_reduce_num = 128; std::vector<std::pair<std::string, std::string>> iter_space_type = { - std::make_pair("R", "dynamic"), std::make_pair("S", "dynamic")}; + std::make_pair("S", "dynamic"), std::make_pair("R", "dynamic")}; // Step 2: Add to json/Read from json cinn::ir::FileTileConfigDatabase file_database; - file_database.AddConfig(cinn::common::DefaultTarget(), - iter_space_type, - bucket_info, - tile_config, - 2); + file_database.AddConfig( + cinn::common::DefaultTarget(), bucket_info, tile_config, 2); cinn::ir::TileConfigMap tile_config_map = file_database.GetConfigs(cinn::common::DefaultTarget(), iter_space_type); for (auto& it : tile_config_map) { - LOG(INFO) << "sp_lower_bound is " << it.first.sp_lower_bound; - LOG(INFO) << "sp_upper_bound is " << it.first.sp_upper_bound; - LOG(INFO) << "rb_lower_bound is " << it.first.rb_lower_bound; - LOG(INFO) << "rb_upper_bound is " << it.first.rb_upper_bound; + LOG(INFO) << "bucket info is: "; + auto dims = it.first.space.size(); + for (int i = 0; i < dims; i++) { + LOG(INFO) << "Dimension " << i + << " 's lower_bound is: " << it.first.space[i].lower_bound; + LOG(INFO) << "Dimension " << i + << " 's upper_bound is: " << it.first.space[i].upper_bound; + auto dimension_lower = i == 0 ? s_dimension_lower : r_dimension_lower; + auto dimension_upper = i == 0 ? s_dimension_upper : r_dimension_upper; + PADDLE_ENFORCE_EQ(it.first.space[i].lower_bound, + dimension_lower, + ::common::errors::InvalidArgument( + "GetConfigs function gets wrong dimension_lower")); + PADDLE_ENFORCE_EQ(it.first.space[i].upper_bound, + dimension_upper, + ::common::errors::InvalidArgument( + "GetConfigs function gets wrong dimension_upper")); + } LOG(INFO) << "tile config is " << it.second.spatial_inner_num << " " << it.second.warp_num << " " << it.second.tree_reduce_num; - PADDLE_ENFORCE_EQ(it.first.sp_lower_bound, - s_dimension_lower, - ::common::errors::InvalidArgument( - "GetConfigs function gets wrong s_dimension_lower")); - PADDLE_ENFORCE_EQ(it.first.sp_upper_bound, - s_dimension_upper, - ::common::errors::InvalidArgument( - "GetConfigs function gets wrong s_dimension_upper")); - PADDLE_ENFORCE_EQ(it.first.rb_lower_bound, - r_dimension_lower, - ::common::errors::InvalidArgument( - "GetConfigs function gets wrong r_dimension_lower")); - PADDLE_ENFORCE_EQ(it.first.rb_upper_bound, - r_dimension_upper, - ::common::errors::InvalidArgument( - "GetConfigs function gets wrong r_dimension_upprt")); PADDLE_ENFORCE_EQ(it.second.spatial_inner_num, tile_config.spatial_inner_num, ::common::errors::InvalidArgument( diff --git a/test/cpp/pir/cinn/pir_compiler_test.cc b/test/cpp/pir/cinn/pir_compiler_test.cc index 8e2df8e02ac8c..622a4fec701f1 100644 --- a/test/cpp/pir/cinn/pir_compiler_test.cc +++ b/test/cpp/pir/cinn/pir_compiler_test.cc @@ -25,6 +25,7 @@ #include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" #include "paddle/cinn/hlir/dialect/runtime/ir/jit_kernel_op.h" #include "paddle/cinn/hlir/dialect/runtime/ir/runtime_dialect.h" +#include "paddle/cinn/hlir/framework/pir/utils.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" #include "paddle/cinn/utils/data_util.h" #include "paddle/fluid/framework/new_executor/interpretercore.h" @@ -38,6 +39,7 @@ #include "paddle/pir/include/core/program.h" #include "paddle/pir/include/dialect/control_flow/ir/cf_op.h" +using cinn::hlir::framework::pir::CompatibleInfo; using cinn::hlir::framework::pir::OpLoweringGroup; using cinn::hlir::framework::pir::OpLoweringGroupPtr; @@ -74,18 +76,26 @@ ProgramInfo BuildProgram() { builder.Build<pir::YieldOp>(std::vector<pir::Value>{relu_op_y.result(0)}); std::vector<OpLoweringGroupPtr> groups; + const auto full_op_x_ops = + std::initializer_list<::pir::Operation*>({full_op_x.operation()}); groups.emplace_back(std::make_shared<OpLoweringGroup>( - std::initializer_list<::pir::Operation*>( - {full_op_x.operation()}))); // For coverage + full_op_x_ops, + CompatibleInfo::GroupOpsName(full_op_x_ops))); // For coverage groups[0]->mut_output_values().push_back(groups[0]->ops().back()->result(0)); + + const auto full_op_y_ops = + std::initializer_list<::pir::Operation*>({full_op_x.operation()}); groups.emplace_back(std::make_shared<OpLoweringGroup>( - std::initializer_list<::pir::Operation*>({full_op_y.operation()}))); + full_op_y_ops, CompatibleInfo::GroupOpsName(full_op_y_ops))); + groups[1]->mut_output_values().push_back(groups[1]->ops().back()->result(0)); - groups.emplace_back(std::make_shared<OpLoweringGroup>( + const auto vector_ops = std::vector<::pir::Operation*>({tan_op_x.operation(), relu_op_x.operation(), tan_op_y.operation(), - relu_op_y.operation()}))); + relu_op_y.operation()}); + groups.emplace_back(std::make_shared<OpLoweringGroup>( + vector_ops, CompatibleInfo::GroupOpsName(vector_ops))); groups[2]->mut_output_values().push_back(groups[2]->ops().back()->result(0)); return {program, groups}; @@ -127,14 +137,16 @@ ProgramInfo BuildSoftmax() { auto yield_op = builder.Build<pir::YieldOp>(std::vector<pir::Value>{divide}); std::vector<OpLoweringGroupPtr> groups; - groups.emplace_back(std::make_shared<OpLoweringGroup>( + const auto vector_ops = std::initializer_list<::pir::Operation*>({max.defining_op(), broadcast_1.defining_op(), sub.defining_op(), exp.defining_op(), sum.defining_op(), broadcast_2.defining_op(), - divide.defining_op()}))); + divide.defining_op()}); + groups.emplace_back(std::make_shared<OpLoweringGroup>( + vector_ops, CompatibleInfo::GroupOpsName(vector_ops))); groups[0]->mut_output_values().push_back(groups[0]->ops().back()->result(0)); groups[0]->set_op_pattern_kind(cinn::hlir::framework::kReduction); diff --git a/test/cpp/pir/cinn/symbolic_lower_test.cc b/test/cpp/pir/cinn/symbolic_lower_test.cc index 83de069dd622e..0c748d9b96da8 100644 --- a/test/cpp/pir/cinn/symbolic_lower_test.cc +++ b/test/cpp/pir/cinn/symbolic_lower_test.cc @@ -24,6 +24,7 @@ #include "paddle/cinn/hlir/framework/pir/group.h" #include "paddle/cinn/hlir/framework/pir/op_lowering_group.h" #include "paddle/cinn/hlir/framework/pir/op_lowering_impl.h" +#include "paddle/cinn/hlir/framework/pir/utils.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" #include "paddle/common/ddim.h" #include "paddle/fluid/framework/new_executor/interpretercore.h" @@ -39,6 +40,7 @@ PD_DECLARE_bool(cinn_bucket_compile); +using cinn::hlir::framework::pir::CompatibleInfo; using cinn::hlir::framework::pir::OpLoweringGroup; using cinn::hlir::framework::pir::OpLoweringGroupPtr; @@ -88,9 +90,11 @@ BuildGroupProgramForLowering() { builder.Build<paddle::dialect::FetchOp>(group_op->result(0), "out", 0); std::vector<OpLoweringGroupPtr> groups; - groups.emplace_back( - std::make_shared<OpLoweringGroup>(std::vector<::pir::Operation*>( - {exp.operation(), reshape.operation(), sub.operation()}))); + groups.emplace_back(std::make_shared<OpLoweringGroup>( + std::vector<::pir::Operation*>( + {exp.operation(), reshape.operation(), sub.operation()}), + CompatibleInfo::GroupOpsName(std::vector<::pir::Operation*>( + {exp.operation(), reshape.operation(), sub.operation()})))); groups[0]->mut_output_ops().insert(groups[0]->ops().back()); std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> value_to_shape_data; @@ -176,9 +180,11 @@ BuildBroadcastGroupProgramForLowering() { builder.Build<paddle::dialect::FetchOp>(group_op->result(0), "out", 0); std::vector<OpLoweringGroupPtr> groups; - groups.emplace_back( - std::make_shared<OpLoweringGroup>(std::vector<::pir::Operation*>( - {x_broadcast.operation(), sub.operation()}))); + groups.emplace_back(std::make_shared<OpLoweringGroup>( + std::vector<::pir::Operation*>( + {x_broadcast.operation(), sub.operation()}), + CompatibleInfo::GroupOpsName(std::vector<::pir::Operation*>( + {x_broadcast.operation(), sub.operation()})))); groups[0]->mut_output_ops().insert(groups[0]->ops().back()); std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> diff --git a/test/cpp/pir/cinn/tile_config_searcher_test.cc b/test/cpp/pir/cinn/tile_config_searcher_test.cc index f54aa848b655a..289113a96bbab 100644 --- a/test/cpp/pir/cinn/tile_config_searcher_test.cc +++ b/test/cpp/pir/cinn/tile_config_searcher_test.cc @@ -66,22 +66,22 @@ TEST(ConfigSearcher, TestReduceDemo) { schedule_config_manager.SetPolicy("custom"); // Step 3: Construct iter space and objective function. - cinn::ir::search::IterSpace iter_space; - iter_space.space.push_back(cinn::ir::search::IterSpace::Dimension{ - 33, - 128, - "S", - /* is_dynamic = */ true, - std::vector<double>(128 - 32, 1.0)}); - iter_space.space.push_back( - cinn::ir::search::IterSpace::Dimension{1024, - 1024, - "R", - /* is_dynamic = */ false, - std::vector<double>(1, 1.0)}); + cinn::ir::BucketInfo bucket_info; + bucket_info.space.push_back( + cinn::ir::BucketInfo::Dimension{33, + 128, + "S", + /* is_dynamic = */ true, + std::vector<double>(128 - 32, 1.0)}); + bucket_info.space.push_back( + cinn::ir::BucketInfo::Dimension{1024, + 1024, + "R", + /* is_dynamic = */ false, + std::vector<double>(1, 1.0)}); std::unique_ptr<cinn::ir::search::BaseObjectiveFunc> obj_func = std::make_unique<cinn::ir::search::WeightedSamplingTrailObjectiveFunc>( - program.get(), iter_space); + program.get(), bucket_info); // Step 4: Construct config candidate range and constraints. std::vector<std::pair<int, int>> candidate_range{ diff --git a/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc b/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc index 9ec1928ef10ff..a7674d60451cd 100644 --- a/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc +++ b/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc @@ -426,7 +426,7 @@ TEST(pattern_rewrite, Patterns) { // true)); CHECK_EQ(pm.Run(&program), true); - EXPECT_EQ(program.block()->size(), 19u); + EXPECT_EQ(program.block()->size(), 17u); } void BuildConstantFoldingProgram(pir::Program *program, diff --git a/test/cpp_extension/CMakeLists.txt b/test/cpp_extension/CMakeLists.txt index 284695e9235a1..517ab10749baf 100644 --- a/test/cpp_extension/CMakeLists.txt +++ b/test/cpp_extension/CMakeLists.txt @@ -6,4 +6,6 @@ if(WITH_TESTING) set_tests_properties(test_cpp_extension_setup PROPERTIES TIMEOUT 120) set_tests_properties(test_cpp_extension_jit PROPERTIES TIMEOUT 120) endif() + py_test(test_mixed_extension_setup SRCS test_mixed_extension_setup.py) + set_tests_properties(test_mixed_extension_setup PROPERTIES TIMEOUT 120) endif() diff --git a/test/deprecated/cpp_extension/mix_relu_and_extension.cc b/test/cpp_extension/mix_relu_and_extension.cc similarity index 100% rename from test/deprecated/cpp_extension/mix_relu_and_extension.cc rename to test/cpp_extension/mix_relu_and_extension.cc diff --git a/test/deprecated/cpp_extension/mix_relu_and_extension_setup.py b/test/cpp_extension/mix_relu_and_extension_setup.py similarity index 95% rename from test/deprecated/cpp_extension/mix_relu_and_extension_setup.py rename to test/cpp_extension/mix_relu_and_extension_setup.py index 1576b4f9d23f4..823d0183cfda8 100644 --- a/test/deprecated/cpp_extension/mix_relu_and_extension_setup.py +++ b/test/cpp_extension/mix_relu_and_extension_setup.py @@ -13,9 +13,7 @@ # limitations under the License. import os -import sys -sys.path.append("../../cpp_extension") from utils import paddle_includes from paddle.utils.cpp_extension import CppExtension, setup diff --git a/test/deprecated/cpp_extension/test_mixed_extension_setup.py b/test/cpp_extension/test_mixed_extension_setup.py similarity index 100% rename from test/deprecated/cpp_extension/test_mixed_extension_setup.py rename to test/cpp_extension/test_mixed_extension_setup.py diff --git a/test/custom_op/CMakeLists.txt b/test/custom_op/CMakeLists.txt index d59250643b883..950f870261eb8 100644 --- a/test/custom_op/CMakeLists.txt +++ b/test/custom_op/CMakeLists.txt @@ -11,6 +11,8 @@ if(WITH_TESTING) set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180) set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180) + py_test(test_custom_cast_op_jit SRCS test_custom_cast_op_jit.py) + set_tests_properties(test_custom_cast_op_jit PROPERTIES TIMEOUT 180) endif() if(NOT WIN32) diff --git a/test/deprecated/custom_op/custom_cast_op.cc b/test/custom_op/custom_cast_op.cc similarity index 100% rename from test/deprecated/custom_op/custom_cast_op.cc rename to test/custom_op/custom_cast_op.cc diff --git a/test/deprecated/custom_op/test_custom_cast_op_jit.py b/test/custom_op/test_custom_cast_op_jit.py similarity index 100% rename from test/deprecated/custom_op/test_custom_cast_op_jit.py rename to test/custom_op/test_custom_cast_op_jit.py diff --git a/test/custom_op/test_custom_relu_op_setup.py b/test/custom_op/test_custom_relu_op_setup.py index f5339d8dcce0a..c32abc0df1615 100644 --- a/test/custom_op/test_custom_relu_op_setup.py +++ b/test/custom_op/test_custom_relu_op_setup.py @@ -22,6 +22,7 @@ import paddle from paddle import static +from paddle.pir_utils import test_with_pir_api from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.vision.transforms import Compose, Normalize @@ -230,6 +231,7 @@ def _test_dynamic(self): check_output(out, pd_out, "out") check_output(x_grad, pd_x_grad, "x_grad") + @test_with_pir_api def _test_static_save_and_load_inference_model(self): paddle.enable_static() np_data = np.random.random((1, 1, 28, 28)).astype("float32") diff --git a/test/deprecated/CMakeLists.txt b/test/deprecated/CMakeLists.txt index ffaf747a547d0..2254fc3d5f9dd 100644 --- a/test/deprecated/CMakeLists.txt +++ b/test/deprecated/CMakeLists.txt @@ -132,12 +132,9 @@ if(WITH_TESTING) if(WIN32 AND WIN_UNITTEST_LEVEL LESS 2) message(STATUS "Skip tests unrelated to CUDA/TRT") else() - add_subdirectory(amp) add_subdirectory(asp) - add_subdirectory(autograd) add_subdirectory(custom_op) add_subdirectory(custom_runtime) - add_subdirectory(cpp_extension) add_subdirectory(prim) add_subdirectory(standalone_executor) add_subdirectory(tokenizer) @@ -155,11 +152,6 @@ if(WITH_TESTING) if(WITH_DISTRIBUTE) add_subdirectory(collective) - add_subdirectory(distributed_passes) - endif() - - if(NOT WIN32 OR NOT WITH_GPU) - add_subdirectory(fft) endif() endif() diff --git a/test/deprecated/amp/CMakeLists.txt b/test/deprecated/amp/CMakeLists.txt deleted file mode 100755 index 60cf0f5fa43d2..0000000000000 --- a/test/deprecated/amp/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -file( - GLOB TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -function(py_test_modules TARGET_NAME) - if(WITH_TESTING) - set(options SERIAL) - set(oneValueArgs "") - set(multiValueArgs MODULES DEPS ENVS) - cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) - - if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE - AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL "")) - add_test( - NAME ${TARGET_NAME} - COMMAND - ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - ${py_test_modules_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -m coverage run --branch -p - ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - else() - add_test( - NAME ${TARGET_NAME} - COMMAND - ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - ${py_test_modules_ENVS} ${PYTHON_EXECUTABLE} - ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() - - if(py_test_modules_SERIAL) - set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) - endif() - if(WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) - endif() - endif() -endfunction() - -foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) -endforeach() diff --git a/test/deprecated/asp/CMakeLists.txt b/test/deprecated/asp/CMakeLists.txt index 24b7364d5ba68..886d7908d0e0a 100644 --- a/test/deprecated/asp/CMakeLists.txt +++ b/test/deprecated/asp/CMakeLists.txt @@ -9,6 +9,6 @@ foreach(TEST_OP ${TEST_OPS}) endforeach() set_tests_properties(test_asp_pruning_dynamic PROPERTIES TIMEOUT 30) -set_tests_properties(test_asp_pruning_static PROPERTIES TIMEOUT 30) +set_tests_properties(test_asp_pruning_static_deprecated PROPERTIES TIMEOUT 30) set_tests_properties(test_asp_optimize_dynamic PROPERTIES TIMEOUT 30) -set_tests_properties(test_asp_optimize_static PROPERTIES TIMEOUT 30) +set_tests_properties(test_asp_optimize_static_deprecated PROPERTIES TIMEOUT 30) diff --git a/test/deprecated/asp/test_asp_customized_pruning.py b/test/deprecated/asp/test_asp_customized_pruning.py index f17acd61de42a..44bb65682fef0 100644 --- a/test/deprecated/asp/test_asp_customized_pruning.py +++ b/test/deprecated/asp/test_asp_customized_pruning.py @@ -18,8 +18,6 @@ import numpy as np import paddle -from paddle import base -from paddle.base import core from paddle.incubate import asp as sparsity from paddle.incubate.asp.supported_layer_list import ( supported_layers_and_prune_func_map, @@ -184,160 +182,5 @@ def test_training_pruning(self): self.assertEqual(supported_layer_count, self.supported_layer_count_ref) -class TestASPStaticCustomizedPruneFunc(unittest.TestCase): - def setUp(self): - paddle.enable_static() - - self.main_program = base.Program() - self.startup_program = base.Program() - - self.customer_prefix = "customer_layer" - - def build_model(): - img = paddle.static.data( - name='img', shape=[None, 3, 32, 32], dtype='float32' - ) - label = paddle.static.data( - name='label', shape=[None, 1], dtype='int64' - ) - hidden = paddle.static.nn.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu" - ) - hidden = paddle.static.nn.fc( - x=hidden, size=32, activation='relu', name=self.customer_prefix - ) - hidden = paddle.static.nn.fc( - x=hidden, size=32, activation='relu', name=self.customer_prefix - ) - hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu') - prediction = paddle.static.nn.fc( - x=hidden, size=10, activation='softmax' - ) - return img, label, prediction - - with base.program_guard(self.main_program, self.startup_program): - self.img, self.label, self.predict = build_model() - self.supported_layer_count_ref = 5 - - self.place = paddle.CPUPlace() - if core.is_compiled_with_cuda(): - self.place = paddle.CUDAPlace(0) - self.exe = base.Executor(self.place) - - sparsity.add_supported_layer(self.customer_prefix, my_own_pruning) - - def test_inference_pruning(self): - self.exe.run(self.startup_program) - - sparsity.prune_model( - self.main_program, mask_algo="mask_1d", with_mask=False - ) - - supported_layer_count = 0 - for param in self.main_program.global_block().all_parameters(): - mat = np.array( - base.global_scope().find_var(param.name).get_tensor() - ) - if sparsity.asp.ASPHelper._is_supported_layer( - self.main_program, param.name - ): - supported_layer_count += 1 - if self.customer_prefix in param.name: - self.assertLessEqual( - np.sum(mat.flatten() - static_tensor.flatten()), 1e-4 - ) - else: - if (len(param.shape) == 4 and param.shape[1] < 4) or ( - len(param.shape) == 2 and param.shape[0] < 4 - ): - self.assertFalse( - paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) - ) - else: - self.assertTrue( - sparsity.check_sparsity( - mat.T, - func_name=sparsity.CheckMethod.CHECK_1D, - n=2, - m=4, - ) - ) - self.assertEqual(supported_layer_count, self.supported_layer_count_ref) - - def test_training_pruning(self): - with base.program_guard(self.main_program, self.startup_program): - loss = paddle.mean( - paddle.nn.functional.cross_entropy( - input=self.predict, - label=self.label, - reduction='none', - use_softmax=False, - ) - ) - optimizer = sparsity.decorate( - paddle.optimizer.SGD(learning_rate=0.01) - ) - optimizer.minimize(loss, self.startup_program) - - self.exe.run(self.startup_program) - - sparsity.prune_model( - self.main_program, mask_algo="mask_1d", with_mask=True - ) - - supported_layer_count = 0 - for param in self.main_program.global_block().all_parameters(): - mat = np.array( - base.global_scope().find_var(param.name).get_tensor() - ) - if sparsity.asp.ASPHelper._is_supported_layer( - self.main_program, param.name - ): - mat_mask = np.array( - base.global_scope() - .find_var(sparsity.asp.ASPHelper._get_mask_name(param.name)) - .get_tensor() - ) - supported_layer_count += 1 - if self.customer_prefix in param.name: - self.assertLessEqual( - np.sum(mat.flatten() - static_tensor.flatten()), 1e-4 - ) - self.assertLessEqual( - np.sum( - mat_mask.flatten() - static_tensor_mask.flatten() - ), - 1e-4, - ) - else: - if (len(param.shape) == 4 and param.shape[1] < 4) or ( - len(param.shape) == 2 and param.shape[0] < 4 - ): - self.assertFalse( - sparsity.check_sparsity(mat.T, n=2, m=4) - ) - self.assertFalse( - sparsity.check_sparsity(mat_mask.T, n=2, m=4) - ) - else: - self.assertTrue( - sparsity.check_sparsity( - mat.T, - func_name=sparsity.CheckMethod.CHECK_1D, - n=2, - m=4, - ) - ) - self.assertTrue( - sparsity.check_sparsity( - mat_mask.T, - func_name=sparsity.CheckMethod.CHECK_1D, - n=2, - m=4, - ) - ) - self.assertEqual(supported_layer_count, self.supported_layer_count_ref) - - if __name__ == '__main__': unittest.main() diff --git a/test/deprecated/asp/test_asp_customized_pruning_deprecated.py b/test/deprecated/asp/test_asp_customized_pruning_deprecated.py new file mode 100644 index 0000000000000..c088c1c827f5c --- /dev/null +++ b/test/deprecated/asp/test_asp_customized_pruning_deprecated.py @@ -0,0 +1,205 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 NVIDIA Corporation. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core +from paddle.incubate import asp as sparsity +from paddle.nn.layer.layers import Layer + + +class MyOwnLayer(Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +static_tensor = None +static_tensor_mask = None + + +def my_own_pruning(tensor, m, n, mask_algo, param_name): + global static_tensor + global static_tensor_mask + if static_tensor is None: + static_tensor = np.random.rand(*tensor.shape).astype(np.float32) + if static_tensor_mask is None: + static_tensor_mask = np.random.rand(*tensor.shape).astype(np.float32) + return static_tensor, static_tensor_mask + + +class TestASPStaticCustomizedPruneFunc(unittest.TestCase): + def setUp(self): + paddle.enable_static() + + self.main_program = base.Program() + self.startup_program = base.Program() + + self.customer_prefix = "customer_layer" + + def build_model(): + img = paddle.static.data( + name='img', shape=[None, 3, 32, 32], dtype='float32' + ) + label = paddle.static.data( + name='label', shape=[None, 1], dtype='int64' + ) + hidden = paddle.static.nn.conv2d( + input=img, num_filters=4, filter_size=3, padding=2, act="relu" + ) + hidden = paddle.static.nn.fc( + x=hidden, size=32, activation='relu', name=self.customer_prefix + ) + hidden = paddle.static.nn.fc( + x=hidden, size=32, activation='relu', name=self.customer_prefix + ) + hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu') + prediction = paddle.static.nn.fc( + x=hidden, size=10, activation='softmax' + ) + return img, label, prediction + + with base.program_guard(self.main_program, self.startup_program): + self.img, self.label, self.predict = build_model() + self.supported_layer_count_ref = 5 + + self.place = paddle.CPUPlace() + if core.is_compiled_with_cuda(): + self.place = paddle.CUDAPlace(0) + self.exe = base.Executor(self.place) + + sparsity.add_supported_layer(self.customer_prefix, my_own_pruning) + + def test_inference_pruning(self): + self.exe.run(self.startup_program) + + sparsity.prune_model( + self.main_program, mask_algo="mask_1d", with_mask=False + ) + + supported_layer_count = 0 + for param in self.main_program.global_block().all_parameters(): + mat = np.array( + base.global_scope().find_var(param.name).get_tensor() + ) + if sparsity.asp.ASPHelper._is_supported_layer( + self.main_program, param.name + ): + supported_layer_count += 1 + if self.customer_prefix in param.name: + self.assertLessEqual( + np.sum(mat.flatten() - static_tensor.flatten()), 1e-4 + ) + else: + if (len(param.shape) == 4 and param.shape[1] < 4) or ( + len(param.shape) == 2 and param.shape[0] < 4 + ): + self.assertFalse( + paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) + ) + else: + self.assertTrue( + sparsity.check_sparsity( + mat.T, + func_name=sparsity.CheckMethod.CHECK_1D, + n=2, + m=4, + ) + ) + self.assertEqual(supported_layer_count, self.supported_layer_count_ref) + + def test_training_pruning(self): + with base.program_guard(self.main_program, self.startup_program): + loss = paddle.mean( + paddle.nn.functional.cross_entropy( + input=self.predict, + label=self.label, + reduction='none', + use_softmax=False, + ) + ) + optimizer = sparsity.decorate( + paddle.optimizer.SGD(learning_rate=0.01) + ) + optimizer.minimize(loss, self.startup_program) + + self.exe.run(self.startup_program) + + sparsity.prune_model( + self.main_program, mask_algo="mask_1d", with_mask=True + ) + + supported_layer_count = 0 + for param in self.main_program.global_block().all_parameters(): + mat = np.array( + base.global_scope().find_var(param.name).get_tensor() + ) + if sparsity.asp.ASPHelper._is_supported_layer( + self.main_program, param.name + ): + mat_mask = np.array( + base.global_scope() + .find_var(sparsity.asp.ASPHelper._get_mask_name(param.name)) + .get_tensor() + ) + supported_layer_count += 1 + if self.customer_prefix in param.name: + self.assertLessEqual( + np.sum(mat.flatten() - static_tensor.flatten()), 1e-4 + ) + self.assertLessEqual( + np.sum( + mat_mask.flatten() - static_tensor_mask.flatten() + ), + 1e-4, + ) + else: + if (len(param.shape) == 4 and param.shape[1] < 4) or ( + len(param.shape) == 2 and param.shape[0] < 4 + ): + self.assertFalse( + sparsity.check_sparsity(mat.T, n=2, m=4) + ) + self.assertFalse( + sparsity.check_sparsity(mat_mask.T, n=2, m=4) + ) + else: + self.assertTrue( + sparsity.check_sparsity( + mat.T, + func_name=sparsity.CheckMethod.CHECK_1D, + n=2, + m=4, + ) + ) + self.assertTrue( + sparsity.check_sparsity( + mat_mask.T, + func_name=sparsity.CheckMethod.CHECK_1D, + n=2, + m=4, + ) + ) + self.assertEqual(supported_layer_count, self.supported_layer_count_ref) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/asp/test_asp_optimize_static.py b/test/deprecated/asp/test_asp_optimize_static_deprecated.py similarity index 100% rename from test/deprecated/asp/test_asp_optimize_static.py rename to test/deprecated/asp/test_asp_optimize_static_deprecated.py diff --git a/test/deprecated/asp/test_asp_pruning_static.py b/test/deprecated/asp/test_asp_pruning_static_deprecated.py similarity index 100% rename from test/deprecated/asp/test_asp_pruning_static.py rename to test/deprecated/asp/test_asp_pruning_static_deprecated.py diff --git a/test/deprecated/asp/test_asp_save_load.py b/test/deprecated/asp/test_asp_save_load.py index f9966c321b37e..83379f2f5ae3c 100644 --- a/test/deprecated/asp/test_asp_save_load.py +++ b/test/deprecated/asp/test_asp_save_load.py @@ -18,7 +18,6 @@ import numpy as np import paddle -from paddle import base from paddle.base import core from paddle.incubate.asp import ASPHelper @@ -120,90 +119,5 @@ def test_save_and_load(self): ) -class TestASPStaticOptimize(unittest.TestCase): - def setUp(self): - paddle.enable_static() - - self.main_program = base.Program() - self.startup_program = base.Program() - - def build_model(): - img = paddle.static.data( - name='img', shape=[None, 3, 32, 32], dtype='float32' - ) - label = paddle.static.data( - name='label', shape=[None, 1], dtype='int64' - ) - hidden = paddle.static.nn.conv2d( - input=img, num_filters=4, filter_size=3, padding=2, act="relu" - ) - hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu') - prediction = paddle.static.nn.fc( - x=hidden, size=10, activation='softmax' - ) - return img, label, prediction - - with base.program_guard(self.main_program, self.startup_program): - self.img, self.label, predict = build_model() - self.loss = paddle.mean( - paddle.nn.functional.cross_entropy( - input=predict, - label=self.label, - reduction='none', - use_softmax=False, - ) - ) - self.optimizer = paddle.optimizer.SGD(learning_rate=0.01) - self.optimizer = paddle.incubate.asp.decorate(self.optimizer) - self.optimizer.minimize(self.loss, self.startup_program) - - self.place = paddle.CPUPlace() - if core.is_compiled_with_cuda(): - self.place = paddle.CUDAPlace(0) - self.exe = base.Executor(self.place) - self.exe.run(self.startup_program) - - paddle.incubate.asp.prune_model(self.main_program) - - def test_save_and_load(self): - path = "/tmp/paddle_asp_save_st/" - param_path = path + "asp.pdparams" - model_path = path + "asp.pdmodel" - - paddle.save(self.main_program.state_dict(), param_path) - paddle.save(self.main_program, model_path) - - prog = paddle.load(model_path) - - state_dict = paddle.load(param_path) - prog.set_state_dict(state_dict) - - feeder = base.DataFeeder( - feed_list=[self.img, self.label], place=self.place - ) - - data = ( - np.random.randn(64, 3, 32, 32), - np.random.randint(10, size=(64, 1)), - ) - self.exe.run(prog, feed=feeder.feed([data])) - - for param in prog.global_block().all_parameters(): - if ASPHelper._is_supported_layer(prog, param.name): - mat = np.array( - base.global_scope().find_var(param.name).get_tensor() - ) - if (len(param.shape) == 4 and param.shape[1] < 4) or ( - len(param.shape) == 2 and param.shape[0] < 4 - ): - self.assertFalse( - paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) - ) - else: - self.assertTrue( - paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) - ) - - if __name__ == '__main__': unittest.main() diff --git a/test/deprecated/asp/test_asp_save_load_deprecated.py b/test/deprecated/asp/test_asp_save_load_deprecated.py new file mode 100644 index 0000000000000..28386b1d2df54 --- /dev/null +++ b/test/deprecated/asp/test_asp_save_load_deprecated.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 NVIDIA Corporation. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core +from paddle.incubate.asp import ASPHelper + + +class MyLayer(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.conv1 = paddle.nn.Conv2D( + in_channels=3, out_channels=4, kernel_size=3, padding=2 + ) + self.linear1 = paddle.nn.Linear(4624, 32) + self.linear2 = paddle.nn.Linear(32, 32) + self.linear3 = paddle.nn.Linear(32, 10) + + def forward(self, img): + hidden = self.conv1(img) + hidden = paddle.flatten(hidden, start_axis=1) + hidden = self.linear1(hidden) + hidden = self.linear2(hidden) + prediction = self.linear3(hidden) + return prediction + + +class TestASPStaticOptimize(unittest.TestCase): + def setUp(self): + paddle.enable_static() + + self.main_program = base.Program() + self.startup_program = base.Program() + + def build_model(): + img = paddle.static.data( + name='img', shape=[None, 3, 32, 32], dtype='float32' + ) + label = paddle.static.data( + name='label', shape=[None, 1], dtype='int64' + ) + hidden = paddle.static.nn.conv2d( + input=img, num_filters=4, filter_size=3, padding=2, act="relu" + ) + hidden = paddle.static.nn.fc(x=hidden, size=32, activation='relu') + prediction = paddle.static.nn.fc( + x=hidden, size=10, activation='softmax' + ) + return img, label, prediction + + with base.program_guard(self.main_program, self.startup_program): + self.img, self.label, predict = build_model() + self.loss = paddle.mean( + paddle.nn.functional.cross_entropy( + input=predict, + label=self.label, + reduction='none', + use_softmax=False, + ) + ) + self.optimizer = paddle.optimizer.SGD(learning_rate=0.01) + self.optimizer = paddle.incubate.asp.decorate(self.optimizer) + self.optimizer.minimize(self.loss, self.startup_program) + + self.place = paddle.CPUPlace() + if core.is_compiled_with_cuda(): + self.place = paddle.CUDAPlace(0) + self.exe = base.Executor(self.place) + self.exe.run(self.startup_program) + + paddle.incubate.asp.prune_model(self.main_program) + + def test_save_and_load(self): + path = "/tmp/paddle_asp_save_st/" + param_path = path + "asp.pdparams" + model_path = path + "asp.pdmodel" + + paddle.save(self.main_program.state_dict(), param_path) + paddle.save(self.main_program, model_path) + + prog = paddle.load(model_path) + + state_dict = paddle.load(param_path) + prog.set_state_dict(state_dict) + + feeder = base.DataFeeder( + feed_list=[self.img, self.label], place=self.place + ) + + data = ( + np.random.randn(64, 3, 32, 32), + np.random.randint(10, size=(64, 1)), + ) + self.exe.run(prog, feed=feeder.feed([data])) + + for param in prog.global_block().all_parameters(): + if ASPHelper._is_supported_layer(prog, param.name): + mat = np.array( + base.global_scope().find_var(param.name).get_tensor() + ) + if (len(param.shape) == 4 and param.shape[1] < 4) or ( + len(param.shape) == 2 and param.shape[0] < 4 + ): + self.assertFalse( + paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) + ) + else: + self.assertTrue( + paddle.incubate.asp.check_sparsity(mat.T, n=2, m=4) + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/autograd/CMakeLists.txt b/test/deprecated/autograd/CMakeLists.txt deleted file mode 100644 index 35e12e591aea8..0000000000000 --- a/test/deprecated/autograd/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -file( - GLOB TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0) - -foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) -endforeach() - -set_tests_properties(test_autograd_functional_static PROPERTIES TIMEOUT 160) diff --git a/test/deprecated/autograd/config.py b/test/deprecated/autograd/config.py deleted file mode 100644 index ff2d64a43bbc9..0000000000000 --- a/test/deprecated/autograd/config.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle - -DEVICES = [paddle.CPUPlace()] -if paddle.is_compiled_with_cuda(): - DEVICES.append(paddle.CUDAPlace(0)) - -DEFAULT_DTYPE = 'float64' - -# The numerical tolerance of different dtype of different order different -# derivative. It's a empirical value provided by Paddle Science team. -TOLERANCE = { - "float32": { - "first_order_grad": {"rtol": 1e-3, "atol": 1e-3, "eps": 1e-4}, - "second_order_grad": {"rtol": 1e-2, "atol": 1e-2, "eps": 1e-2}, - }, - "float64": { - "first_order_grad": {"rtol": 1e-7, "atol": 1e-7, "eps": 1e-7}, - "second_order_grad": {"rtol": 1e-5, "atol": 1e-5, "eps": 1e-5}, - }, -} diff --git a/test/deprecated/autograd/utils.py b/test/deprecated/autograd/utils.py deleted file mode 100644 index 64a16897d9b25..0000000000000 --- a/test/deprecated/autograd/utils.py +++ /dev/null @@ -1,454 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import enum -import sys -import typing - -import numpy as np - -import paddle -from paddle.incubate.autograd.utils import as_tensors - - -########################################################## -# Finite Difference Utils -########################################################## -def _product(t): - return int(np.prod(t)) - - -def _get_item(t, idx): - assert isinstance( - t, paddle.base.framework.Variable - ), "The first argument t must be Tensor." - assert isinstance( - idx, int - ), "The second argument idx must be an int number." - flat_t = paddle.reshape(t, [-1]) - return flat_t.__getitem__(idx) - - -def _set_item(t, idx, value): - assert isinstance( - t, paddle.base.framework.Variable - ), "The first argument t must be Tensor." - assert isinstance( - idx, int - ), "The second argument idx must be an int number." - flat_t = paddle.reshape(t, [-1]) - flat_t.__setitem__(idx, value) - return paddle.reshape(flat_t, t.shape) - - -def _compute_numerical_jacobian(func, xs, delta, np_dtype): - xs = list(as_tensors(xs)) - ys = list(as_tensors(func(*xs))) - fin_size = len(xs) - fout_size = len(ys) - jacobian = [[] for _ in range(fout_size)] - for i in range(fout_size): - jac_i = [[] for _ in range(fin_size)] - for j in range(fin_size): - jac_i[j] = np.zeros( - (_product(ys[i].shape), _product(xs[j].shape)), dtype=np_dtype - ) - jacobian[i] = jac_i - - for j in range(fin_size): - for q in range(_product(xs[j].shape)): - orig = _get_item(xs[j], q) - orig = paddle.assign(orig) - x_pos = orig + delta - xs[j] = _set_item(xs[j], q, x_pos) - ys_pos = as_tensors(func(*xs)) - - x_neg = orig - delta - xs[j] = _set_item(xs[j], q, x_neg) - ys_neg = as_tensors(func(*xs)) - - xs[j] = _set_item(xs[j], q, orig) - - for i in range(fout_size): - for p in range(_product(ys[i].shape)): - y_pos = _get_item(ys_pos[i], p) - y_neg = _get_item(ys_neg[i], p) - jacobian[i][j][p][q] = (y_pos - y_neg) / delta / 2.0 - return jacobian - - -def _compute_numerical_hessian(func, xs, delta, np_dtype): - xs = list(as_tensors(xs)) - ys = list(as_tensors(func(*xs))) - fin_size = len(xs) - hessian = [[] for _ in range(fin_size)] - for i in range(fin_size): - hessian_i = [[] for _ in range(fin_size)] - for j in range(fin_size): - hessian_i[j] = np.zeros( - (_product(xs[i].shape), _product(xs[j].shape)), dtype=np_dtype - ) - hessian[i] = hessian_i - - for i in range(fin_size): - for p in range(_product(xs[i].shape)): - for j in range(fin_size): - for q in range(_product(xs[j].shape)): - orig = _get_item(xs[j], q) - orig = paddle.assign(orig) - x_pos = orig + delta - xs[j] = _set_item(xs[j], q, x_pos) - jacobian_pos = _compute_numerical_jacobian( - func, xs, delta, np_dtype - ) - x_neg = orig - delta - xs[j] = _set_item(xs[j], q, x_neg) - jacobian_neg = _compute_numerical_jacobian( - func, xs, delta, np_dtype - ) - xs[j] = _set_item(xs[j], q, orig) - hessian[i][j][p][q] = ( - (jacobian_pos[0][i][0][p] - jacobian_neg[0][i][0][p]) - / delta - / 2.0 - ) - return hessian - - -def concat_to_matrix(xs, is_batched=False): - """Concats a tuple of tuple of Jacobian/Hessian matrix into one matrix""" - rows = [] - for i in range(len(xs)): - rows.append(np.concatenate(list(xs[i]), -1)) - return np.concatenate(rows, 1) if is_batched else np.concatenate(rows, 0) - - -def _compute_numerical_batch_jacobian( - func, xs, delta, np_dtype, merge_batch=True -): - no_batch_jacobian = _compute_numerical_jacobian(func, xs, delta, np_dtype) - xs = list(as_tensors(xs)) - ys = list(as_tensors(func(*xs))) - fin_size = len(xs) - fout_size = len(ys) - bs = xs[0].shape[0] - bat_jac = [] - for i in range(fout_size): - batch_jac_i = [] - for j in range(fin_size): - jac = no_batch_jacobian[i][j] - jac_shape = jac.shape - out_size = jac_shape[0] // bs - in_size = jac_shape[1] // bs - jac = np.reshape(jac, (bs, out_size, bs, in_size)) - batch_jac_i_j = np.zeros(shape=(out_size, bs, in_size)) - for p in range(out_size): - for b in range(bs): - for q in range(in_size): - batch_jac_i_j[p][b][q] = jac[b][p][b][q] - if merge_batch: - batch_jac_i_j = np.reshape(batch_jac_i_j, (out_size, -1)) - batch_jac_i.append(batch_jac_i_j) - bat_jac.append(batch_jac_i) - - return bat_jac - - -def _compute_numerical_batch_hessian(func, xs, delta, np_dtype): - xs = list(as_tensors(xs)) - batch_size = xs[0].shape[0] - fin_size = len(xs) - hessian = [] - for b in range(batch_size): - x_l = [] - for j in range(fin_size): - x_l.append(paddle.reshape(xs[j][b], shape=[1, -1])) - hes_b = _compute_numerical_hessian(func, x_l, delta, np_dtype) - if fin_size == 1: - hessian.append(hes_b[0][0]) - else: - hessian.append(hes_b) - - hessian_res = [] - for index in range(fin_size): - x_reshape = paddle.reshape(xs[index], shape=[batch_size, -1]) - for index_ in range(fin_size): - for i in range(x_reshape.shape[1]): - tmp = [] - for j in range(batch_size): - if fin_size == 1: - tmp.extend(hessian[j][i]) - else: - tmp.extend(hessian[j][i][index_][index]) - hessian_res.append(tmp) - if fin_size == 1: - return hessian_res - - hessian_result = [] - mid = len(hessian_res) // 2 - for i in range(mid): - hessian_result.append( - np.stack((hessian_res[i], hessian_res[mid + i]), axis=0) - ) - return hessian_result - - -def _compute_numerical_vjp(func, xs, v, delta, np_dtype): - xs = as_tensors(xs) - jacobian = np.array(_compute_numerical_jacobian(func, xs, delta, np_dtype)) - if v is None: - v = [paddle.ones_like(x) for x in xs] - flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) - vjp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] - for j in range(len(xs)): - for q in range(_product(xs[j].shape)): - vjp[j][q] = np.sum( - jacobian[:, j, :, q].reshape(flat_v.shape) * flat_v - ) - vjp = [vjp[j].reshape(xs[j].shape) for j in range(len(xs))] - return vjp - - -def _compute_numerical_vhp(func, xs, v, delta, np_dtype): - xs = list(as_tensors(xs)) - hessian = np.array(_compute_numerical_hessian(func, xs, delta, np_dtype)) - flat_v = np.array([v_el.numpy().reshape(-1) for v_el in v]) - vhp = [np.zeros((_product(x.shape)), dtype=np_dtype) for x in xs] - for j in range(len(xs)): - for q in range(_product(xs[j].shape)): - vhp[j][q] = np.sum( - hessian[:, j, :, q].reshape(flat_v.shape) * flat_v - ) - vhp = [vhp[j].reshape(xs[j].shape) for j in range(len(xs))] - return vhp - - -########################################################## -# TestCases of different function. -########################################################## -def reduce(x): - return paddle.sum(x) - - -def reduce_dim(x): - return paddle.sum(x, axis=0) - - -def matmul(x, y): - return paddle.matmul(x, y) - - -def mul(x, y): - return x * y - - -def pow(x, y): - return paddle.pow(x, y) - - -def o2(x, y): - return paddle.multiply(x, y), paddle.matmul(x, y.t()) - - -def unuse(x, y): - return paddle.sum(x) - - -def nested(x): - def inner(y): - return x * y - - return inner - - -def square(x): - return x * x - - -########################################################## -# Parameterized Test Utils. -########################################################## - -TEST_CASE_NAME = 'suffix' - - -def place(devices, key='place'): - """A Decorator for a class which will make the class running on different - devices . - - Args: - devices (Sequence[Paddle.CUDAPlace|Paddle.CPUPlace]): Device list. - key (str, optional): Defaults to 'place'. - """ - - def decorate(cls): - module = sys.modules[cls.__module__].__dict__ - raw_classes = { - k: v for k, v in module.items() if k.startswith(cls.__name__) - } - - for raw_name, raw_cls in raw_classes.items(): - for d in devices: - test_cls = dict(raw_cls.__dict__) - test_cls.update({key: d}) - new_name = raw_name + '.' + d.__class__.__name__ - module[new_name] = type(new_name, (raw_cls,), test_cls) - del module[raw_name] - return cls - - return decorate - - -def parameterize(fields, values=None): - """Decorator for a unittest class which make the class running on different - test cases. - - Args: - fields (Sequence): The field name sequence of test cases. - values (Sequence, optional): The test cases sequence. Defaults to None. - - """ - fields = [fields] if isinstance(fields, str) else fields - params = [dict(zip(fields, vals)) for vals in values] - - def decorate(cls): - test_cls_module = sys.modules[cls.__module__].__dict__ - for i, values in enumerate(params): - test_cls = dict(cls.__dict__) - values = { - k: staticmethod(v) if callable(v) else v - for k, v in values.items() - } - test_cls.update(values) - name = cls.__name__ + str(i) - name = ( - name + '.' + values.get('suffix') - if values.get('suffix') - else name - ) - - test_cls_module[name] = type(name, (cls,), test_cls) - - for m in list(cls.__dict__): - if m.startswith("test"): - delattr(cls, m) - return cls - - return decorate - - -########################################################## -# Utils for transpose different Jacobian/Hessian matrix format. -########################################################## - -# B is batch size, N is row size, M is column size. -MatrixFormat = enum.Enum('MatrixFormat', ('NBM', 'BNM', 'NMB', 'NM')) - - -def _np_transpose_matrix_format(src, src_format, des_format): - """Transpose Jacobian/Hessian matrix format.""" - supported_format = (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NMB) - if src_format not in supported_format or des_format not in supported_format: - raise ValueError( - f"Supported Jacobian format is {supported_format}, but got src: {src_format}, des: {des_format}" - ) - - src_axis = {c: i for i, c in enumerate(src_format.name)} - dst_axis = tuple(src_axis[c] for c in des_format.name) - - return np.transpose(src, dst_axis) - - -def _np_concat_matrix_sequence(src, src_format=MatrixFormat.NM): - """Convert a sequence of sequence of Jacobian/Hessian matrix into one huge - matrix.""" - - def concat_col(xs): - if src_format in (MatrixFormat.NBM, MatrixFormat.BNM, MatrixFormat.NM): - return np.concatenate(xs, axis=-1) - else: - return np.concatenate(xs, axis=1) - - def concat_row(xs): - if src_format in (MatrixFormat.NBM, MatrixFormat.NM, MatrixFormat.NMB): - return np.concatenate(xs, axis=0) - else: - return np.concatenate(xs, axis=1) - - supported_format = ( - MatrixFormat.NBM, - MatrixFormat.BNM, - MatrixFormat.NMB, - MatrixFormat.NM, - ) - if src_format not in supported_format: - raise ValueError( - f"Supported Jacobian format is {supported_format}, but got {src_format}" - ) - if not isinstance(src, typing.Sequence): - return src - if not isinstance(src[0], typing.Sequence): - src = [src] - - return concat_row(tuple(concat_col(xs) for xs in src)) - - -########################################################## -# Utils for generating test data. -########################################################## -def gen_static_data_and_feed(xs, v, stop_gradient=True): - feed = {} - if isinstance(xs, typing.Sequence): - static_xs = [] - for i, x in enumerate(xs): - x = paddle.static.data(f"x{i}", x.shape, x.dtype) - x.stop_gradient = stop_gradient - static_xs.append(x) - feed.update({f'x{idx}': value for idx, value in enumerate(xs)}) - else: - static_xs = paddle.static.data('x', xs.shape, xs.dtype) - static_xs.stop_gradient = stop_gradient - feed.update({'x': xs}) - - if isinstance(v, typing.Sequence): - static_v = [] - for i, e in enumerate(v): - e = paddle.static.data(f'v{i}', e.shape, e.dtype) - e.stop_gradient = stop_gradient - static_v.append(e) - feed.update({f'v{i}': value for i, value in enumerate(v)}) - elif v is not None: - static_v = paddle.static.data('v', v.shape, v.dtype) - static_v.stop_gradient = stop_gradient - feed.update({'v': v}) - else: - static_v = v - - return feed, static_xs, static_v - - -def gen_static_inputs_and_feed(xs, stop_gradient=True): - feed = {} - if isinstance(xs, typing.Sequence): - static_xs = [] - for i, x in enumerate(xs): - x = paddle.static.data(f"x{i}", x.shape, x.dtype) - x.stop_gradient = stop_gradient - static_xs.append(x) - feed.update({f'x{idx}': value for idx, value in enumerate(xs)}) - else: - static_xs = paddle.static.data('x', xs.shape, xs.dtype) - static_xs.stop_gradient = stop_gradient - feed.update({'x': xs}) - return feed, static_xs diff --git a/test/deprecated/collective/fleet/CMakeLists.txt b/test/deprecated/collective/fleet/CMakeLists.txt index c3d8d0e48e9dc..304f185847917 100644 --- a/test/deprecated/collective/fleet/CMakeLists.txt +++ b/test/deprecated/collective/fleet/CMakeLists.txt @@ -13,8 +13,8 @@ endif() if(LOCAL_ALL_ARCH AND (LINUX OR WIN32)) py_test_modules( - test_fleet_fp16_allreduce_meta_optimizer MODULES - test_fleet_fp16_allreduce_meta_optimizer ENVS + test_fleet_fp16_allreduce_meta_optimizer_deprecated MODULES + test_fleet_fp16_allreduce_meta_optimizer_deprecated ENVS "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") endif() @@ -38,6 +38,7 @@ endif() if(LOCAL_ALL_ARCH AND (LINUX OR WIN32)) py_test_modules( - test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS + test_fleet_meta_optimizer_base_deprecated MODULES + test_fleet_meta_optimizer_base_deprecated ENVS "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python") endif() diff --git a/test/deprecated/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py b/test/deprecated/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer_deprecated.py similarity index 100% rename from test/deprecated/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py rename to test/deprecated/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer_deprecated.py diff --git a/test/deprecated/collective/fleet/test_fleet_meta_optimizer_base.py b/test/deprecated/collective/fleet/test_fleet_meta_optimizer_base_deprecated.py similarity index 99% rename from test/deprecated/collective/fleet/test_fleet_meta_optimizer_base.py rename to test/deprecated/collective/fleet/test_fleet_meta_optimizer_base_deprecated.py index 8878fdc172e2f..2da076437aede 100755 --- a/test/deprecated/collective/fleet/test_fleet_meta_optimizer_base.py +++ b/test/deprecated/collective/fleet/test_fleet_meta_optimizer_base_deprecated.py @@ -22,6 +22,8 @@ MetaOptimizerBase, ) +paddle.enable_static() + class TestFleetMetaOptimizerBase(unittest.TestCase): def net(main_prog, startup_prog): diff --git a/test/deprecated/contrib/CMakeLists.txt b/test/deprecated/contrib/CMakeLists.txt index a8ed413e6ce9e..fb82eaa2b6817 100644 --- a/test/deprecated/contrib/CMakeLists.txt +++ b/test/deprecated/contrib/CMakeLists.txt @@ -8,4 +8,5 @@ foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() -set_tests_properties(test_image_classification_fp16 PROPERTIES TIMEOUT 120) +set_tests_properties(test_image_classification_fp16_deprecated + PROPERTIES TIMEOUT 120) diff --git a/test/deprecated/contrib/test_bf16_utils.py b/test/deprecated/contrib/test_bf16_utils_deprecated.py similarity index 57% rename from test/deprecated/contrib/test_bf16_utils.py rename to test/deprecated/contrib/test_bf16_utils_deprecated.py index 8bc3cf43b8748..54f3ff73e0099 100644 --- a/test/deprecated/contrib/test_bf16_utils.py +++ b/test/deprecated/contrib/test_bf16_utils_deprecated.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import copy import unittest import paddle @@ -22,92 +21,7 @@ paddle.enable_static() -class AMPTest(unittest.TestCase): - def setUp(self): - self.bf16_list = copy.copy(amp.bf16.amp_lists.bf16_list) - self.fp32_list = copy.copy(amp.bf16.amp_lists.fp32_list) - self.gray_list = copy.copy(amp.bf16.amp_lists.gray_list) - self.amp_lists_ = None - - def tearDown(self): - self.assertEqual(self.amp_lists_.bf16_list, self.bf16_list) - self.assertEqual(self.amp_lists_.fp32_list, self.fp32_list) - self.assertEqual(self.amp_lists_.gray_list, self.gray_list) - - def test_amp_lists(self): - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16() - - def test_amp_lists_1(self): - # 1. w={'exp}, b=None - self.bf16_list.add('exp') - self.fp32_list.remove('exp') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'exp'}) - - def test_amp_lists_2(self): - # 2. w={'tanh'}, b=None - self.fp32_list.remove('tan') - self.bf16_list.add('tan') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'tan'}) - - def test_amp_lists_3(self): - # 3. w={'lstm'}, b=None - self.bf16_list.add('lstm') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16({'lstm'}) - - def test_amp_lists_4(self): - # 4. w=None, b={'matmul_v2'} - self.bf16_list.remove('matmul_v2') - self.fp32_list.add('matmul_v2') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( - custom_fp32_list={'matmul_v2'} - ) - - def test_amp_lists_5(self): - # 5. w=None, b={'matmul_v2'} - self.fp32_list.add('matmul_v2') - self.bf16_list.remove('matmul_v2') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( - custom_fp32_list={'matmul_v2'} - ) - - def test_amp_lists_6(self): - # 6. w=None, b={'lstm'} - self.fp32_list.add('lstm') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( - custom_fp32_list={'lstm'} - ) - - def test_amp_lists_7(self): - self.fp32_list.add('reshape2') - self.gray_list.remove('reshape2') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( - custom_fp32_list={'reshape2'} - ) - - def test_amp_list_8(self): - self.bf16_list.add('reshape2') - self.gray_list.remove('reshape2') - - self.amp_lists_ = amp.bf16.AutoMixedPrecisionListsBF16( - custom_bf16_list={'reshape2'} - ) - - class AMPTest2(unittest.TestCase): - def test_amp_lists_(self): - # 7. w={'lstm'} b={'lstm'} - # raise ValueError - self.assertRaises( - ValueError, amp.bf16.AutoMixedPrecisionListsBF16, {'lstm'}, {'lstm'} - ) - def test_find_op_index(self): block = base.default_main_program().global_block() op_desc = core.OpDesc() diff --git a/test/deprecated/contrib/test_image_classification_fp16_deprecated.py b/test/deprecated/contrib/test_image_classification_fp16_deprecated.py new file mode 100644 index 0000000000000..6c60ad0d8e415 --- /dev/null +++ b/test/deprecated/contrib/test_image_classification_fp16_deprecated.py @@ -0,0 +1,101 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +# TODO: remove sys.path.append +sys.path.append("../../legacy_test") +import nets + +import paddle +from paddle.framework import in_pir_mode +from paddle.static.amp import decorate + +paddle.enable_static() + + +def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): + return nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max', + ) + + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = paddle.nn.functional.dropout(x=conv5, p=0.5) + fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None) + if in_pir_mode(): + batch_norm = paddle.nn.BatchNorm(4096) + bn = batch_norm(fc1) + else: + bn = paddle.static.nn.batch_norm(input=fc1, act='relu') + drop2 = paddle.nn.functional.dropout(x=bn, p=0.5) + fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None) + return fc2 + + +class TestAmpWithNonIterableDataLoader(unittest.TestCase): + def decorate_with_data_loader(self): + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + with paddle.base.unique_name.guard(): + image = paddle.static.data( + name='image', shape=[-1, 3, 224, 224], dtype='float32' + ) + label = paddle.static.data( + name='label', shape=[-1, 1], dtype='int64' + ) + + net = vgg16_bn_drop(image) + logits = paddle.static.nn.fc( + x=net, size=10, activation="softmax" + ) + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( + logits, label, return_softmax=True + ) + avg_cost = paddle.mean(cost) + + optimizer = paddle.optimizer.Lamb(learning_rate=0.001) + amp_lists = paddle.static.amp.AutoMixedPrecisionLists( + custom_black_varnames={"loss", "conv2d_0.w_0"} + ) + mp_optimizer = decorate( + optimizer=optimizer, + amp_lists=amp_lists, + init_loss_scaling=8.0, + use_dynamic_loss_scaling=True, + ) + + mp_optimizer.minimize(avg_cost) + + def test_non_iterable_dataloader(self): + self.decorate_with_data_loader() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/cpp/prim/CMakeLists.txt b/test/deprecated/cpp/prim/CMakeLists.txt index 8f7270397a382..4b62d1ef9b8cb 100644 --- a/test/deprecated/cpp/prim/CMakeLists.txt +++ b/test/deprecated/cpp/prim/CMakeLists.txt @@ -1,4 +1,4 @@ -paddle_test(test_comp_static SRCS test_static_prim.cc) +paddle_test(test_comp_static SRCS test_static_prim_deprecated.cc) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/test/deprecated/cpp/prim/test_static_prim.cc b/test/deprecated/cpp/prim/test_static_prim_deprecated.cc similarity index 99% rename from test/deprecated/cpp/prim/test_static_prim.cc rename to test/deprecated/cpp/prim/test_static_prim_deprecated.cc index dfda6cecbb411..a7bb6cbea7720 100644 --- a/test/deprecated/cpp/prim/test_static_prim.cc +++ b/test/deprecated/cpp/prim/test_static_prim_deprecated.cc @@ -31,8 +31,7 @@ PD_DECLARE_bool(prim_enabled); COMMON_DECLARE_string(tensor_operants_mode); -namespace paddle { -namespace prim { +namespace paddle::prim { using Tensor = paddle::Tensor; struct TestBaseProgram { @@ -527,5 +526,4 @@ TEST(StaticPrim, TestFlags) { ASSERT_FALSE(PrimCommonUtils::IsBwdPrimEnabled()); } -} // namespace prim -} // namespace paddle +} // namespace paddle::prim diff --git a/test/deprecated/cpp_extension/CMakeLists.txt b/test/deprecated/cpp_extension/CMakeLists.txt deleted file mode 100644 index 9f4efa9893574..0000000000000 --- a/test/deprecated/cpp_extension/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -py_test(test_mixed_extension_setup SRCS test_mixed_extension_setup.py) -set_tests_properties(test_mixed_extension_setup PROPERTIES TIMEOUT 120) diff --git a/test/deprecated/custom_op/CMakeLists.txt b/test/deprecated/custom_op/CMakeLists.txt index 346de7ea3c708..dd4fb6d8713ec 100644 --- a/test/deprecated/custom_op/CMakeLists.txt +++ b/test/deprecated/custom_op/CMakeLists.txt @@ -1,7 +1,6 @@ if(WITH_TESTING) - py_test(test_custom_raw_op_kernel_op SRCS test_custom_raw_op_kernel_op.py) - set_tests_properties(test_custom_raw_op_kernel_op PROPERTIES TIMEOUT 180) - - py_test(test_custom_cast_op_jit SRCS test_custom_cast_op_jit.py) - set_tests_properties(test_custom_cast_op_jit PROPERTIES TIMEOUT 180) + py_test(test_custom_raw_op_kernel_op_deprecated + SRCS test_custom_raw_op_kernel_op_deprecated.py) + set_tests_properties(test_custom_raw_op_kernel_op_deprecated + PROPERTIES TIMEOUT 180) endif() diff --git a/test/deprecated/custom_op/test_custom_raw_op_kernel_op.py b/test/deprecated/custom_op/test_custom_raw_op_kernel_op_deprecated.py similarity index 100% rename from test/deprecated/custom_op/test_custom_raw_op_kernel_op.py rename to test/deprecated/custom_op/test_custom_raw_op_kernel_op_deprecated.py diff --git a/test/deprecated/distributed_passes/CMakeLists.txt b/test/deprecated/distributed_passes/CMakeLists.txt deleted file mode 100644 index d9ee247cae2ba..0000000000000 --- a/test/deprecated/distributed_passes/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -file( - GLOB TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -if((NOT WITH_GPU) AND (NOT WITH_XPU)) - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_adam_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_all_reduce_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_act_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_add_act_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_momentum_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_relu_depthwise_conv_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_sgd_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_inplace_addto_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_amp_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_recompute_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_sharding_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_fp16_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_gradient_merge_pass") - list(REMOVE_ITEM TEST_OPS - "test_auto_parallel_data_parallel_optimization_pass") -endif() - -if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6))) - list(REMOVE_ITEM TEST_OPS test_dist_fuse_gemm_epilogue_pass) - list(REMOVE_ITEM TEST_OPS test_auto_parallel_fused_linear_promotion_pass) -endif() - -foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0") - list(APPEND DIST_TEST_OPS ${TEST_OP}) - set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 250) - set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST") -endforeach() diff --git a/test/deprecated/fft/CMakeLists.txt b/test/deprecated/fft/CMakeLists.txt deleted file mode 100644 index a31ec8e1f2137..0000000000000 --- a/test/deprecated/fft/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -file( - GLOB TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) -endforeach() - -set_pir_tests_properties() diff --git a/test/deprecated/ir/pir/CMakeLists.txt b/test/deprecated/ir/pir/CMakeLists.txt index bcb550df74c03..df4ff900910b3 100644 --- a/test/deprecated/ir/pir/CMakeLists.txt +++ b/test/deprecated/ir/pir/CMakeLists.txt @@ -8,5 +8,3 @@ foreach(target ${TEST_INTERP_CASES}) py_test_modules(${target} MODULES ${target} ENVS GLOG_v=1 FLAGS_enable_pir_in_executor=true) endforeach() - -add_subdirectory(translator) diff --git a/test/deprecated/ir/pir/test_pass_manager.py b/test/deprecated/ir/pir/test_pass_manager.py deleted file mode 100644 index 852b3595ba492..0000000000000 --- a/test/deprecated/ir/pir/test_pass_manager.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import paddle -from paddle import pir -from paddle.base import core -from paddle.framework import LayerHelper - -paddle.enable_static() - - -class TestShadowOutputSlice(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones([3, 9, 5], dtype='float32') - y = paddle.static.data( - name="y", shape=[3, 9, 5], dtype="float32" - ) - z = x * y # will be eliminated - - _, out, _ = paddle.split(x, num_or_sections=3, axis=1) - helper = LayerHelper('shadow_output') - helper.append_op( - type="shadow_output", - inputs={"x": [out.name]}, - outputs={"out": [y.name]}, - attrs={"name": out.name}, - ) - - new_program = pir.translate_to_pir(main_program.desc) - op_names = [op.name() for op in new_program.global_block().ops] - self.assertTrue('pd_op.multiply' in op_names) - pm = pir.PassManager() - pm.add_pass( - 'dead_code_elimination_pass', {} - ) # apply pass to eliminate dead code - pm.run(new_program) - op_names = [op.name() for op in new_program.global_block().ops] - self.assertEqual(pm.passes(), ['dead_code_elimination_pass']) - self.assertFalse(pm.empty()) - self.assertTrue( - 'pd_op.multiply' not in op_names - ) # multiply is eliminated because its output is not used - - -if __name__ == "__main__": - unittest.main() diff --git a/test/deprecated/ir/pir/test_special_op_translator.py b/test/deprecated/ir/pir/test_special_op_translator.py deleted file mode 100644 index 687f0248535ed..0000000000000 --- a/test/deprecated/ir/pir/test_special_op_translator.py +++ /dev/null @@ -1,555 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -from paddle import pir -from paddle.base import core -from paddle.framework import LayerHelper - -paddle.enable_static() - - -class TestCastOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.to_tensor([2, 3, 4], 'float64') - y = paddle.cast(x, 'uint8') - - _, mappings = pir.translate_to_pir_with_param_map(main_program.desc) - assert len(str(mappings)) > 0, "no mapping found" - - -class TestCondWithInplace(unittest.TestCase): - def test_op(self): - def cond_with_inplace(): - x = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") - y = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") - running_mean = paddle.to_tensor([0], dtype="float32") - running_variance = paddle.to_tensor([1], dtype="float32") - weight = paddle.to_tensor([2], dtype="float32") - bias = paddle.to_tensor([1], dtype="float32") - if x > y: - y = paddle.nn.functional.batch_norm( - x, running_mean, running_variance, weight, bias - ) - else: - y = paddle.nn.functional.batch_norm( - x, running_mean, running_variance, weight, bias - ) - - legacy_program = paddle.jit.to_static( - cond_with_inplace, - input_spec=[], - full_graph=True, - ) - - l = pir.translate_to_pir(legacy_program.main_program.desc) - assert l is not None - - def test_nested_op(self): - def cond_with_inplace(): - x = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") - y = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") - z = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") - running_mean = paddle.to_tensor([0], dtype="float32") - running_variance = paddle.to_tensor([1], dtype="float32") - weight = paddle.to_tensor([2], dtype="float32") - bias = paddle.to_tensor([1], dtype="float32") - if x > y: - if y > z: - z = paddle.nn.functional.batch_norm( - z, running_mean, running_variance, weight, bias - ) - else: - y = paddle.nn.functional.batch_norm( - x, running_mean, running_variance, weight, bias - ) - else: - if y > z: - z = paddle.nn.functional.batch_norm( - z, running_mean, running_variance, weight, bias - ) - else: - y = paddle.nn.functional.batch_norm( - x, running_mean, running_variance, weight, bias - ) - - legacy_program = paddle.jit.to_static( - cond_with_inplace, - input_spec=[], - full_graph=True, - ) - - l = pir.translate_to_pir(legacy_program.main_program.desc) - assert l is not None - - -class TestElementwiseOpTranscriber(unittest.TestCase): - def test_elementwise_without_y_grad(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x_data = np.random.rand(100, 2, 3) - y_data = np.random.rand(100) - x = paddle.to_tensor(x_data, dtype='float32') - x.stop_gradient = False - y = paddle.to_tensor(y_data, dtype='float32') - - out1 = paddle.tensor.math._elementwise_op( - LayerHelper('elementwise_add', x=x, y=y, axis=0) - ) - out1.stop_gradient = False - mean = paddle.mean(out1) - paddle.static.append_backward(mean) - - out = exe.run(main_program, {}, fetch_list=[out1]) - np.testing.assert_allclose( - out[0], - x_data + y_data.reshape(100, 1, 1), - rtol=1e-6, - atol=1e-6, - ) - - def test_elementwise_with_y_grad(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x_data = np.random.rand(100, 2, 3) - y_data = np.random.rand(100) - x = paddle.to_tensor(x_data, dtype='float32') - x.stop_gradient = False - y = paddle.to_tensor(y_data, dtype='float32') - y.stop_gradient = False - - out1 = paddle.tensor.math._elementwise_op( - LayerHelper('elementwise_add', x=x, y=y, axis=0) - ) - out1.stop_gradient = False - mean = paddle.mean(out1) - paddle.static.append_backward(mean) - - out = exe.run(main_program, {}, fetch_list=[out1]) - np.testing.assert_allclose( - out[0], - x_data + y_data.reshape(100, 1, 1), - rtol=1e-6, - atol=1e-6, - ) - - def test_add_inplace(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones(shape=(100, 2, 3), dtype='float32') - y = paddle.ones(shape=(100, 2, 3), dtype='float32') - - helper = LayerHelper('elementwise_add') - helper.append_op( - type="elementwise_add", - inputs={"X": x, "Y": y}, - outputs={"Out": y}, - attrs={"axis": -1}, - ) - _ = pir.translate_to_pir(main_program.desc) - - -class TestEmbeddingOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.static.data(name="x", shape=[2, 4], dtype=np.int64) - embedding = paddle.nn.Embedding( - 10, 3, weight_attr=paddle.nn.initializer.Constant(value=1.0) - ) - output = embedding(x) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestIncrementOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - data = paddle.zeros(shape=[1], dtype='float32') - counter = paddle.increment(data) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestAssignValueOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.to_tensor( - [[0.1, 0.2], [0.3, 0.4]], - place=paddle.CPUPlace(), - stop_gradient=False, - ) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestRnnOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.randn((4, 16)) - prev_h = paddle.randn((4, 32)) - - cell = paddle.nn.SimpleRNNCell(16, 32) - y, h = cell(x, prev_h) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestEmptyVarTranslate(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x1 = paddle.rand(shape=[3, 3], dtype="float32") - x1.stop_gradient = False - weight = paddle.full( - shape=[3, 3], fill_value="0.5", dtype="float32" - ) - y = paddle.nn.functional.linear(x1, weight) - y.stop_gradient = True - out1 = paddle.concat(x=[x1, y], axis=1) - out2 = paddle.mean(out1) - sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.1) - sgd_optimizer.minimize(out2) - _ = pir.translate_to_pir(main_program.desc) - - -class TestOneHotOpTranscriber(unittest.TestCase): - def test_mutable_attribute(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - depth = paddle.assign(np.array([10], dtype=np.int32)) - label = paddle.static.data( - name="label", shape=[-1, 1], dtype="int64" - ) - one_hot_label = paddle.nn.functional.one_hot( - x=label, num_classes=depth - ) - - _ = pir.translate_to_pir(main_program.desc) - - def test_normal_attribute(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - depth = 10 - label = paddle.static.data( - name="label", shape=[-1, 1], dtype="int64" - ) - one_hot_label = paddle.nn.functional.one_hot( - x=label, num_classes=depth - ) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestReduceOpTranscriber(unittest.TestCase): - def test_reduce_all(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - arr = np.ones([2, 2], dtype="float32") - x = paddle.to_tensor(arr, dtype='int32') - out1 = paddle.all(x) - - out = exe.run(main_program, {}, fetch_list=[out1]) - np.testing.assert_array_equal(out[0], np.all(arr)) - - def test_with_axis(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - arr = np.ones([2, 2], dtype="float32") - x = paddle.to_tensor(arr, dtype='int32') - out1 = paddle.all(x, axis=0) - - out = exe.run(main_program, {}, fetch_list=[out1]) - np.testing.assert_array_equal(out[0], np.all(arr, axis=0)) - - -class TestIndexPutOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.randn([2, 3]) - indices = [paddle.randint(0, 2, [2]), paddle.randint(0, 1, [2])] - value = paddle.randn([2]) - y = paddle.index_put(x, indices, value, False) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestGradAddOpTranscriber(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x_data = np.random.rand(100, 2, 3) - y_data = np.random.rand(100, 1, 1) - x = paddle.to_tensor(x_data, dtype='float32') - x.stop_gradient = False - y = paddle.to_tensor(y_data, dtype='float32') - - helper = LayerHelper('grad_add') - out = helper.create_variable_for_type_inference("float") - helper.append_op( - type="grad_add", - inputs={"X": x, "Y": y}, - outputs={"Out": out}, - attrs={"axis": -1}, - ) - - _ = pir.translate_to_pir(main_program.desc) - - -class TestShadowOutputSlice(unittest.TestCase): - def test_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.rand([3, 9, 5]) - y = paddle.static.data( - name="y", shape=[3, 9, 5], dtype="float32" - ) - - _, out, _ = paddle.split(x, num_or_sections=3, axis=1) - helper = LayerHelper('shadow_output') - helper.append_op( - type="shadow_output", - inputs={"x": [out.name]}, - outputs={"out": [y.name]}, - attrs={"name": out.name}, - ) - - l = pir.translate_to_pir(main_program.desc) - - -class TestSetValueOp(unittest.TestCase): - def test_no_mutable_attribute(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones(shape=[2, 3, 4], dtype="float32") - x = paddle.static.setitem(x, (0, 0), 6) - ret = exe.run(main_program, fetch_list=[x]) - - x_data = np.ones([2, 3, 4]).astype("float32") - x_data[0, 0] = 6 - np.testing.assert_array_equal(ret[0], x_data) - - def test_with_mutable_attribute(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones(shape=[2, 3, 4], dtype="float32") - zero = paddle.full([], 0, dtype="int32") - x = paddle.static.setitem(x, zero, 6) - ret = exe.run(main_program, fetch_list=[x]) - - x_data = np.ones([2, 3, 4]).astype("float32") - x_data[0] = 6 - np.testing.assert_array_equal(ret[0], x_data) - - def test_grad(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - exe = paddle.static.Executor(place) - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - input_shape = [7, 6, 5, 4, 3, 2] - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones(shape=input_shape, dtype="float32") - value = paddle.tensor.fill_constant([1, 3, 2], "float32", 1) - # test stop_gradient - value.stop_gradient = False - x.stop_gradient = False - attrs = { - 'axes': [0], - 'starts': [6], - 'ends': [0], - 'steps': [-4], - 'decrease_axes': [], - 'none_axes': [], - 'dtype': paddle.float32, - } - inputs = {'Input': x, 'ValueTensor': value} - - helper = LayerHelper("set_value") - y = helper.create_variable_for_type_inference(dtype=x.dtype) - - helper.append_op( - type="set_value", - inputs=inputs, - outputs={'Out': y}, - attrs=attrs, - ) - y2 = y + 1 - loss = paddle.sum(y2) - opt = paddle.optimizer.Adam() - opt.minimize(loss) - - x_data = np.arange( - 0, np.prod(input_shape), dtype="float32" - ).reshape(input_shape) - fetch_list = [x.grad_name, value.grad_name] - ret = exe.run(main_program, fetch_list=fetch_list) - self.assertTrue((ret[0][6:0:-4] == 0).all()) - - -class TestShareBufferOpTranscriber(unittest.TestCase): - def test_program(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - x = paddle.ones(shape=(100, 2, 3), dtype='float32') - y = paddle.ones(shape=(100, 2, 3), dtype='float32') - - helper = LayerHelper('share_buffer') - helper.append_op( - type="share_buffer", - inputs={"X": x}, - outputs={"Out": y, "XOut": x}, - ) - l = pir.translate_to_pir(main_program.desc) - assert ( - l.global_block().ops[2].name() == "pd_op.share_data_" - ), "share_buffer should be translated to share_data_" - - -class TestDataOp(unittest.TestCase): - def test_data_op(self): - place = core.Place() - place.set_place(paddle.CPUPlace()) - - new_scope = paddle.static.Scope() - main_program = paddle.static.Program() - with paddle.static.scope_guard(new_scope): - with paddle.static.program_guard(main_program): - _ = paddle.static.data(name="y", shape=[3, 9, 5], dtype="int64") - l = pir.translate_to_pir(main_program.desc) - self.assertTrue(len(l.global_block().ops) > 0) - self.assertTrue(l.global_block().ops[0].name() == "pd_op.data") - data_op = l.global_block().ops[0] - self.assertIn("dtype", data_op.attrs()) - self.assertEqual(str(data_op.attrs()["dtype"]), "paddle.int64") - - -class TestCheckUnregisteredOp(unittest.TestCase): - def test_program(self): - main_program = paddle.static.Program() - with paddle.static.program_guard(main_program): - x = paddle.randn((4, 16)) - prev_h = paddle.randn((4, 32)) - - cell = paddle.nn.SimpleRNNCell(16, 32) - y, h = cell(x, prev_h) - - ops = pir.check_unregistered_ops(main_program.desc) - assert len(ops) == 0 - - -if __name__ == "__main__": - unittest.main() diff --git a/test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py b/test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass_deprecated.py similarity index 99% rename from test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py rename to test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass_deprecated.py index 6b2af9ace72bf..68c109120511e 100644 --- a/test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py +++ b/test/deprecated/ir/test_ir_embedding_eltwise_layernorm_fuse_pass_deprecated.py @@ -24,6 +24,8 @@ from paddle import base from paddle.base import core +paddle.enable_static() + class EmbEltwiseLayerNormFusePassTest(PassTest): def setUp(self): diff --git a/test/deprecated/ir/test_ir_fc_fuse_pass.py b/test/deprecated/ir/test_ir_fc_fuse_pass_deprecated.py similarity index 98% rename from test/deprecated/ir/test_ir_fc_fuse_pass.py rename to test/deprecated/ir/test_ir_fc_fuse_pass_deprecated.py index e7dde8b9f6c4c..79aa2f6efc9eb 100644 --- a/test/deprecated/ir/test_ir_fc_fuse_pass.py +++ b/test/deprecated/ir/test_ir_fc_fuse_pass_deprecated.py @@ -24,6 +24,8 @@ from paddle import base from paddle.base import core +paddle.enable_static() + class FCFusePassTest(PassTest): def setUp(self): diff --git a/test/deprecated/ir/test_ir_generate_pass.py b/test/deprecated/ir/test_ir_generate_pass_deprecated.py similarity index 100% rename from test/deprecated/ir/test_ir_generate_pass.py rename to test/deprecated/ir/test_ir_generate_pass_deprecated.py diff --git a/test/deprecated/ir/test_ir_graph_to_program_pass.py b/test/deprecated/ir/test_ir_graph_to_program_pass_deprecated.py similarity index 100% rename from test/deprecated/ir/test_ir_graph_to_program_pass.py rename to test/deprecated/ir/test_ir_graph_to_program_pass_deprecated.py diff --git a/test/deprecated/ir/test_ir_preln_residual_bias_fuse_pass.py b/test/deprecated/ir/test_ir_preln_residual_bias_fuse_pass_deprecated.py similarity index 100% rename from test/deprecated/ir/test_ir_preln_residual_bias_fuse_pass.py rename to test/deprecated/ir/test_ir_preln_residual_bias_fuse_pass_deprecated.py diff --git a/test/deprecated/ir/test_ir_skip_layernorm_pass.py b/test/deprecated/ir/test_ir_skip_layernorm_pass_deprecated.py similarity index 100% rename from test/deprecated/ir/test_ir_skip_layernorm_pass.py rename to test/deprecated/ir/test_ir_skip_layernorm_pass_deprecated.py diff --git a/test/deprecated/ir/test_ir_yolo_box_pass.py b/test/deprecated/ir/test_ir_yolo_box_pass_deprecated.py similarity index 100% rename from test/deprecated/ir/test_ir_yolo_box_pass.py rename to test/deprecated/ir/test_ir_yolo_box_pass_deprecated.py diff --git a/test/deprecated/ir/test_op_input_grad_semantic.py b/test/deprecated/ir/test_op_input_grad_semantic_deprecated.py similarity index 100% rename from test/deprecated/ir/test_op_input_grad_semantic.py rename to test/deprecated/ir/test_op_input_grad_semantic_deprecated.py diff --git a/test/deprecated/legacy_test/CMakeLists.txt b/test/deprecated/legacy_test/CMakeLists.txt index 18891bc1cb65e..61046057f7c7a 100644 --- a/test/deprecated/legacy_test/CMakeLists.txt +++ b/test/deprecated/legacy_test/CMakeLists.txt @@ -33,17 +33,18 @@ set(MIXED_DIST_TEST_OPS ${DIST_TEST_OPS}) list(APPEND MIXED_DIST_TEST_OPS test_simple_dist_transpiler) list(APPEND MIXED_DIST_TEST_OPS test_communicator_async) list(APPEND MIXED_DIST_TEST_OPS test_communicator_ps_gpu) -list(APPEND MIXED_DIST_TEST_OPS test_communicator_geo) +list(APPEND MIXED_DIST_TEST_OPS test_communicator_geo_deprecated) list(APPEND MIXED_DIST_TEST_OPS test_fleet_launch_ascend) list(APPEND MIXED_DIST_TEST_OPS test_ascend_group) list(APPEND MIXED_DIST_TEST_OPS test_fleet_api_input) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base) -list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2) -list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_deprecated) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2_deprecated) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3_deprecated) list(APPEND MIXED_DIST_TEST_OPS test_fleet_auto) -list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner) -list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner_gpt) +list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner_deprecated) +list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_partitioner_gpt_deprecated) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_searcher) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_reshard) list(APPEND MIXED_DIST_TEST_OPS test_auto_parallel_dist_tensor) @@ -100,11 +101,11 @@ if(WIN32) list(REMOVE_ITEM TEST_OPS test_trainer_desc) list(REMOVE_ITEM TEST_OPS test_checkpoint_notify_op) list(REMOVE_ITEM TEST_OPS test_downpoursgd_deprecated) - list(REMOVE_ITEM TEST_OPS test_fleet) - list(REMOVE_ITEM TEST_OPS test_fleet_nocvm_1) + list(REMOVE_ITEM TEST_OPS test_fleet_deprecated) + list(REMOVE_ITEM TEST_OPS test_fleet_nocvm_1_deprecated) list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker) list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3) - list(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor) + list(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor_deprecated) list(REMOVE_ITEM TEST_OPS test_ps_dispatcher) list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp) list(REMOVE_ITEM TEST_OPS test_nvprof) @@ -115,7 +116,7 @@ endif() if(NOT WITH_DISTRIBUTE OR WIN32) # DISTRIBUTE related - list(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization) + list(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization_deprecated) list(REMOVE_ITEM TEST_OPS test_fleet_metric) list(REMOVE_ITEM TEST_OPS test_fleet_ps) list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2) @@ -130,7 +131,6 @@ endif() if(WIN32) list(REMOVE_ITEM TEST_OPS test_complex_matmul) - list(REMOVE_ITEM TEST_OPS test_ops_nms) list(REMOVE_ITEM TEST_OPS test_trt_convert_preln_residual_bias) list(REMOVE_ITEM TEST_OPS test_masked_multihead_attention_op) list(REMOVE_ITEM TEST_OPS test_fused_ec_moe_op) @@ -162,8 +162,8 @@ if((NOT WITH_GPU) AND (NOT WITH_ROCM)) # TODO(shenliang03): batch_fc_op support CPU device in future # TODO(Yancey1989): parallel dygraph support CPU device in future list(REMOVE_ITEM TEST_OPS test_fleet_base_single) - list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner) - list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_deprecated) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt_deprecated) list(REMOVE_ITEM TEST_OPS test_auto_parallel_searcher) list(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard) list(REMOVE_ITEM TEST_OPS test_auto_parallel_dist_tensor) @@ -200,8 +200,8 @@ list(REMOVE_ITEM TEST_OPS decorator_helper) if(APPLE) if(NOT WITH_DISTRIBUTE) - list(REMOVE_ITEM TEST_OPS test_desc_clone) - list(REMOVE_ITEM TEST_OPS test_program_code) + list(REMOVE_ITEM TEST_OPS test_desc_clone_deprecated) + list(REMOVE_ITEM TEST_OPS test_program_code_deprecated) endif() message( WARNING @@ -398,11 +398,9 @@ function(parallel_bash_test_modules TARGET_NAME) endif() endfunction() -list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type) +list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type_deprecated) list(REMOVE_ITEM TEST_OPS test_fetch_lod_tensor_array) -list(REMOVE_ITEM TEST_OPS test_data_norm_op) -list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) -list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) +list(REMOVE_ITEM TEST_OPS test_data_norm_op_deprecated) list(REMOVE_ITEM TEST_OPS test_imperative_mnist_sorted_gradient) list(REMOVE_ITEM TEST_OPS test_imperative_mnist) list(REMOVE_ITEM TEST_OPS test_layers_deprecated) @@ -413,7 +411,7 @@ list(REMOVE_ITEM TEST_OPS test_basic_gru_api) list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op) list(REMOVE_ITEM TEST_OPS test_basic_lstm_api) list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op) -list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass) +list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass_deprecated) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) @@ -428,6 +426,7 @@ endif() if(APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_dataset) + list(REMOVE_ITEM TEST_OPS test_dataset_deprecated) list(REMOVE_ITEM TEST_OPS test_dataset_dataloader) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_process) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exit_func) @@ -452,8 +451,8 @@ endif() # Some ops need to check results when gc is enabled # Currently, only ops that register NoNeedBufferVarsInference need to do this test -set(TEST_OPS_WITH_GC test_affine_channel_op test_gather_nd_op test_scatter_op - test_slice_op) +set(TEST_OPS_WITH_GC test_gather_nd_op test_slice_op) +set(TEST_OPS_WITH_GC test_gather_nd_op test_slice_op_deprecated) foreach(TEST_OP ${TEST_OPS_WITH_GC}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) @@ -485,10 +484,6 @@ set_tests_properties(test_logcumsumexp_op PROPERTIES TIMEOUT 30) py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4) -py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS - ${GC_ENVS}) -py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS - ${GC_ENVS}) py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS FLAGS_cudnn_deterministic=1) py_test_modules( @@ -511,11 +506,8 @@ if((WITH_GPU) AND (WITH_CUDNN_FRONTEND)) test_fused_dot_product_attention_op) endif() -set_tests_properties(test_conv2d_op_depthwise_conv - PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_api_deprecated PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") -set_tests_properties(test_conv_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") if(WITH_DISTRIBUTE) # FIXME(typhoonzero): add these tests back list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transformer") @@ -543,19 +535,23 @@ if(WITH_DISTRIBUTE) py_test_modules(test_communicator_async MODULES test_communicator_async ENVS ${dist_ENVS}) - py_test_modules(test_communicator_geo MODULES test_communicator_geo ENVS - ${dist_ENVS}) + py_test_modules(test_communicator_geo_deprecated MODULES + test_communicator_geo_deprecated ENVS ${dist_ENVS}) if(NOT APPLE) py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS}) - py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS - ${dist_ENVS}) - py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS - ${dist_ENVS}) + py_test_modules(test_fleet_base_deprecated MODULES + test_fleet_base_deprecated ENVS ${dist_ENVS}) + py_test_modules(test_fleet_base_2_deprecated MODULES + test_fleet_base_2_deprecated ENVS ${dist_ENVS}) + py_test_modules(test_fleet_base_3_deprecated MODULES + test_fleet_base_3_deprecated ENVS ${dist_ENVS}) if(NOT WIN32) - py_test_modules(test_auto_parallel_partitioner MODULES - test_auto_parallel_partitioner ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_partitioner_gpt MODULES - test_auto_parallel_partitioner_gpt ENVS ${dist_ENVS}) + py_test_modules( + test_auto_parallel_partitioner_deprecated MODULES + test_auto_parallel_partitioner_deprecated ENVS ${dist_ENVS}) + py_test_modules( + test_auto_parallel_partitioner_gpt_deprecated MODULES + test_auto_parallel_partitioner_gpt_deprecated ENVS ${dist_ENVS}) py_test_modules(test_auto_parallel_searcher MODULES test_auto_parallel_searcher ENVS ${dist_ENVS}) py_test_modules(test_auto_parallel_reshard MODULES @@ -603,22 +599,24 @@ if(WITH_DISTRIBUTE) endif() if(WIN32) - py_test_modules(test_feed_data_check_shape_type MODULES - test_feed_data_check_shape_type ENVS CUDA_VISIBLE_DEVICES=0) + py_test_modules( + test_feed_data_check_shape_type_deprecated MODULES + test_feed_data_check_shape_type_deprecated ENVS CUDA_VISIBLE_DEVICES=0) py_test_modules(test_fetch_lod_tensor_array MODULES test_fetch_lod_tensor_array ENVS CUDA_VISIBLE_DEVICES=0) else() - py_test_modules(test_feed_data_check_shape_type MODULES - test_feed_data_check_shape_type) + py_test_modules(test_feed_data_check_shape_type_deprecated MODULES + test_feed_data_check_shape_type_deprecated) py_test_modules(test_fetch_lod_tensor_array MODULES test_fetch_lod_tensor_array) endif() -py_test_modules(test_data_norm_op MODULES test_data_norm_op) +py_test_modules(test_data_norm_op_deprecated MODULES + test_data_norm_op_deprecated) py_test_modules( - test_fuse_bn_act_pass + test_fuse_bn_act_pass_deprecated MODULES - test_fuse_bn_act_pass + test_fuse_bn_act_pass_deprecated ENVS FLAGS_cudnn_deterministic=1 FLAGS_cudnn_batchnorm_spatial_persistent=1 @@ -631,8 +629,8 @@ if(NOT WIN32) endif() set_tests_properties( - test_data_norm_op test_dataloader_keep_order test_dataloader_unkeep_order - PROPERTIES LABELS "RUN_TYPE=DIST") + test_data_norm_op_deprecated test_dataloader_keep_order_deprecated + test_dataloader_unkeep_order_deprecated PROPERTIES LABELS "RUN_TYPE=DIST") if(NOT WIN32) set_tests_properties(test_multiprocess_reader_exception @@ -642,99 +640,68 @@ endif() # setting timeout value as 15S set_tests_properties(test_cross_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_lod_tensor_to_selected_rows +set_tests_properties(test_imperative_lod_tensor_to_selected_rows_deprecated PROPERTIES TIMEOUT 200) -set_tests_properties(test_lstm_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_star_gan_with_gradient_penalty - PROPERTIES TIMEOUT 120) -set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_deformable_conv_op PROPERTIES TIMEOUT 200) -set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_deformable_conv_op_deprecated PROPERTIES TIMEOUT 200) +set_tests_properties(test_regularizer_api_deprecated PROPERTIES TIMEOUT 150) set_tests_properties(test_inplace_softmax_with_cross_entropy PROPERTIES TIMEOUT 120) -set_tests_properties(test_cross_entropy2_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_gru_unit_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150) if(NOT WIN32) if(WITH_NV_JETSON) set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 1200) endif() endif() -set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120) +set_tests_properties(test_add_reader_dependency_deprecated PROPERTIES TIMEOUT + 120) set_tests_properties(test_bilateral_slice_op PROPERTIES TIMEOUT 120) set_tests_properties(test_fleet_util PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_transformer_sorted_gradient PROPERTIES TIMEOUT 120) -set_tests_properties(test_matmul_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_nearest_interp_v2_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_trilinear_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_static_save_load PROPERTIES TIMEOUT 250) -set_tests_properties(test_paddle_save_load_binary PROPERTIES TIMEOUT 120) -if(WIN32) - set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 900) -else() - set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 600) -endif() if(WITH_NV_JETSON) - set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 1500) - set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 1500) + set_tests_properties(test_conv3d_transpose_part2_op_deprecated + PROPERTIES TIMEOUT 1200) + set_tests_properties(test_layer_norm_op_deprecated PROPERTIES TIMEOUT 1500) else() - set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 250) - set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_conv3d_transpose_part2_op_deprecated + PROPERTIES TIMEOUT 120) + set_tests_properties(test_layer_norm_op_deprecated PROPERTIES TIMEOUT 250) endif() set_tests_properties(test_imperative_selected_rows_to_lod_tensor PROPERTIES TIMEOUT 200) -set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_argsort_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_argsort_op_deprecated PROPERTIES TIMEOUT 120) set_tests_properties(test_gather_nd_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_row_conv_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_deformable_conv_v1_op PROPERTIES TIMEOUT 300) set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT 120) -set_tests_properties(test_crop_tensor_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_ptb_rnn PROPERTIES TIMEOUT 120) -set_tests_properties(test_svd_op PROPERTIES TIMEOUT 80) -set_tests_properties(test_qr_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_masked_select_op PROPERTIES TIMEOUT 120) set_tests_properties(test_sigmoid_cross_entropy_with_logits_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150) -set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250) +set_tests_properties(test_sgd_op_deprecated PROPERTIES TIMEOUT 250) set_tests_properties(test_generator_dataloader_deprecated PROPERTIES TIMEOUT 120) -set_tests_properties(test_partial_concat_op PROPERTIES TIMEOUT 120) set_tests_properties(test_reduce_op PROPERTIES TIMEOUT 500) -set_tests_properties(test_conv_nn_grad PROPERTIES TIMEOUT 220) -set_tests_properties(test_program_prune_backward PROPERTIES TIMEOUT 120) +set_tests_properties(test_program_prune_backward_deprecated PROPERTIES TIMEOUT + 120) set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 250) -set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_decoupled_py_reader PROPERTIES TIMEOUT 120) -set_tests_properties(test_fuse_bn_act_pass PROPERTIES TIMEOUT 120) -set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES TIMEOUT 120) +set_tests_properties(test_decoupled_py_reader_deprecated PROPERTIES TIMEOUT 120) +set_tests_properties(test_fuse_bn_act_pass_deprecated PROPERTIES TIMEOUT 120) set_tests_properties(test_conv2d_api_deprecated PROPERTIES TIMEOUT 120) -set_tests_properties(test_elementwise_mul_op PROPERTIES TIMEOUT 120) set_tests_properties(test_dygraph_multi_forward PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_ocr_attention_model PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_mnist PROPERTIES TIMEOUT 120) -set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) -set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) -set_tests_properties(test_matmul_v2_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_regularizer_deprecated PROPERTIES TIMEOUT 150) set_tests_properties(test_slice_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_pad3d_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_dataloader_keep_order PROPERTIES TIMEOUT 120) -set_tests_properties(test_dataloader_unkeep_order PROPERTIES TIMEOUT 120) +set_tests_properties(test_slice_op_deprecated PROPERTIES TIMEOUT 120) +set_tests_properties(test_dataloader_keep_order_deprecated PROPERTIES TIMEOUT + 120) +set_tests_properties(test_dataloader_unkeep_order_deprecated PROPERTIES TIMEOUT + 120) set_tests_properties(test_reader_reset PROPERTIES TIMEOUT 120) -set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 300) set_tests_properties(test_split_program PROPERTIES TIMEOUT 120) -set_tests_properties(test_graph_send_ue_recv_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_graph_send_uv_op PROPERTIES TIMEOUT 60) set_tests_properties(test_uniform_random_op_deprecated PROPERTIES TIMEOUT 60) set_tests_properties(test_pretrained_model PROPERTIES TIMEOUT 120) @@ -756,36 +723,19 @@ set_tests_properties(test_inplace_addto_strategy_deprecated PROPERTIES TIMEOUT set(TEST_CINN_OPS test_softmax_op - test_expand_v2_op test_reduce_op test_slice_op - test_full_like_op - test_index_select_op - test_top_k_v2_op - test_elementwise_mul_op + test_slice_op_deprecated test_gather_nd_op - test_elementwise_pow_op - test_reshape_op - test_meshgrid_op test_scale_op - test_scatter_op - test_layer_norm_op - test_cast_op - test_roll_op - test_atan2_op - test_top_k_op + test_layer_norm_op_deprecated test_where_op test_arg_min_max_op - test_reverse_op - test_flip - test_triangular_solve_op test_scatter_nd_op test_instance_norm_op + test_instance_norm_op_deprecated test_cumsum_op - test_split_op - test_erf_op - test_assign_op - test_flatten_contiguous_range_op) + test_erf_op) foreach(TEST_CINN_OP ${TEST_CINN_OPS}) if(WITH_CINN) @@ -798,32 +748,31 @@ foreach(TEST_CINN_OP ${TEST_CINN_OPS}) endif() endforeach() -# In test_conditional_block, the sub block changes the dtype and place of the output variable. +# In test_conditional_block_deprecated, the sub block changes the dtype and place of the output variable. # The changed variable is used in the following op. Static build is not supported for this case. -set_tests_properties(test_conditional_block +set_tests_properties(test_conditional_block_deprecated PROPERTIES ENVIRONMENT "FLAGS_new_executor_static_build=0") # These UTs are to temporarily test static build for standalone_executor, will be removed after static build is enabled by default. set(STATIC_BUILD_TESTS test_adamw_op + test_adamw_op_deprecated test_arg_min_max_op - test_batch_norm_op + test_batch_norm_op_deprecated test_bincount_op - test_decoupled_py_reader - test_eigh_op + test_decoupled_py_reader_deprecated test_fetch_lod_tensor_array - test_fuse_bn_act_pass - test_layer_norm_op + test_fuse_bn_act_pass_deprecated + test_layer_norm_op_deprecated test_lookup_table_v2_op_deprecated - test_matmul_op - test_matmul_v2_op test_momentum_op - test_nce - test_paddle_save_load_binary + test_momentum_op_deprecated + test_nce_deprecated test_reduce_op test_sparse_conv_op test_sparse_norm_op test_tensor_array_to_tensor + test_tensor_array_to_tensor_deprecated test_unique test_one_hot_v2_op) @@ -854,20 +803,18 @@ foreach(PIR_COVERAGE_TEST ${PIR_COVERAGE_TESTS}) message(STATUS "PIR Copied OpTest: ${PIR_COVERAGE_TEST}_pir in legacy_test") endforeach() -set_tests_properties(test_decoupled_py_reader_static_build PROPERTIES TIMEOUT - 120) -set_tests_properties(test_fuse_bn_act_pass_static_build PROPERTIES TIMEOUT 120) +set_tests_properties(test_decoupled_py_reader_deprecated_static_build + PROPERTIES TIMEOUT 120) +set_tests_properties(test_fuse_bn_act_pass_deprecated_static_build + PROPERTIES TIMEOUT 120) set_tests_properties( - test_fuse_bn_act_pass_static_build + test_fuse_bn_act_pass_deprecated_static_build PROPERTIES ENVIRONMENT "FLAGS_cudnn_deterministic=1;FLAGS_cudnn_batchnorm_spatial_persistent=1;FLAGS_conv_workspace_size_limit=1000" ) -set_tests_properties(test_matmul_op_static_build PROPERTIES TIMEOUT 120) -set_tests_properties(test_matmul_v2_op_static_build PROPERTIES TIMEOUT 120) -set_tests_properties(test_layer_norm_op_static_build PROPERTIES TIMEOUT 1500) -set_tests_properties(test_paddle_save_load_binary_static_build - PROPERTIES TIMEOUT 120) +set_tests_properties(test_layer_norm_op_deprecated_static_build + PROPERTIES TIMEOUT 1500) set_tests_properties(test_reduce_op_static_build PROPERTIES TIMEOUT 500) py_test_modules(test_stride MODULES test_stride ENVS FLAGS_use_stride_kernel=true) @@ -875,6 +822,6 @@ py_test_modules(test_stride MODULES test_stride ENVS set_tests_properties(test_linalg_matrix_exp PROPERTIES TIMEOUT 120) set_pir_tests_properties() -set_tests_properties(test_fractional_max_pool2d_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_reduce_as_op PROPERTIES TIMEOUT 30) +set_tests_properties(test_attribute_var_deprecated PROPERTIES TIMEOUT 100) +set_tests_properties(test_inference_api_deprecated PROPERTIES TIMEOUT 100) diff --git a/test/deprecated/legacy_test/dist_test.sh b/test/deprecated/legacy_test/dist_test.sh index 69a893a7ddc13..3ae7b209f4a00 100644 --- a/test/deprecated/legacy_test/dist_test.sh +++ b/test/deprecated/legacy_test/dist_test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -82,7 +82,7 @@ if [[ $exit_code -ne 0 ]]; then fi #display system context -for i in {1..2}; do +for i in {1..2}; do sleep 3 ps -aux netstat -anlp diff --git a/test/deprecated/legacy_test/run_server_for_communicator_geo.py b/test/deprecated/legacy_test/run_server_for_communicator_geo.py index c384459a0ffbc..c8a7ed8f8373e 100644 --- a/test/deprecated/legacy_test/run_server_for_communicator_geo.py +++ b/test/deprecated/legacy_test/run_server_for_communicator_geo.py @@ -13,8 +13,12 @@ # limitations under the License. import os +import sys -from test_communicator_geo import TestCommunicatorGeoEnd2End +sys.path.append(".") +from test_communicator_geo_deprecated import ( + TestCommunicatorGeoEnd2End, +) import paddle diff --git a/test/deprecated/legacy_test/test_adam_op.py b/test/deprecated/legacy_test/test_adam_op.py index 0693d4f664356..50caa25457671 100644 --- a/test/deprecated/legacy_test/test_adam_op.py +++ b/test/deprecated/legacy_test/test_adam_op.py @@ -648,39 +648,6 @@ def test_check_output(self): class TestAdamOpV2(unittest.TestCase): - def test_adam_op(self): - place = base.CPUPlace() - shape = [2, 3, 8, 8] - exe = base.Executor(place) - train_prog = base.Program() - startup = base.Program() - with base.program_guard(train_prog, startup): - with base.unique_name.guard(): - data = paddle.static.data(name="data", shape=shape) - conv = paddle.static.nn.conv2d(data, 8, 3) - loss = paddle.mean(conv) - - beta1 = paddle.static.create_global_var( - shape=[1], value=0.85, dtype='float32', persistable=True - ) - beta2 = paddle.static.create_global_var( - shape=[1], value=0.95, dtype='float32', persistable=True - ) - betas = [beta1, beta2] - opt = paddle.optimizer.Adam( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8, - ) - opt.minimize(loss) - - exe.run(startup) - data_np = np.random.random(shape).astype('float32') - rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) - assert rets[0] is not None - def test_pir_adam_op(self): with paddle.pir_utils.IrGuard(): place = base.CPUPlace() diff --git a/test/deprecated/legacy_test/test_adam_op_deprecated.py b/test/deprecated/legacy_test/test_adam_op_deprecated.py new file mode 100644 index 0000000000000..357d2f9b438a4 --- /dev/null +++ b/test/deprecated/legacy_test/test_adam_op_deprecated.py @@ -0,0 +1,62 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base + +paddle.enable_static() + + +class TestAdamOpV2(unittest.TestCase): + def test_adam_op(self): + place = base.CPUPlace() + shape = [2, 3, 8, 8] + exe = base.Executor(place) + train_prog = base.Program() + startup = base.Program() + with base.program_guard(train_prog, startup): + with base.unique_name.guard(): + data = paddle.static.data(name="data", shape=shape) + conv = paddle.static.nn.conv2d(data, 8, 3) + loss = paddle.mean(conv) + + beta1 = paddle.static.create_global_var( + shape=[1], value=0.85, dtype='float32', persistable=True + ) + beta2 = paddle.static.create_global_var( + shape=[1], value=0.95, dtype='float32', persistable=True + ) + betas = [beta1, beta2] + opt = paddle.optimizer.Adam( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8, + ) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_adamax_api_deprecated.py b/test/deprecated/legacy_test/test_adamax_api_deprecated.py new file mode 100644 index 0000000000000..6f1d806be7eea --- /dev/null +++ b/test/deprecated/legacy_test/test_adamax_api_deprecated.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base + + +class TestAdamaxAPI(unittest.TestCase): + def test_adamax_api(self): + paddle.enable_static() + place = base.CPUPlace() + shape = [2, 3, 8, 8] + exe = base.Executor(place) + train_prog = base.Program() + startup = base.Program() + with base.program_guard(train_prog, startup): + with base.unique_name.guard(): + data = paddle.static.data(name="data", shape=shape) + conv = paddle.static.nn.conv2d(data, 8, 3) + loss = paddle.mean(conv) + beta1 = 0.85 + beta2 = 0.95 + opt = paddle.optimizer.Adamax( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8, + ) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_adamw_op.py b/test/deprecated/legacy_test/test_adamw_op.py index 1c901e8d4baf5..53465c1dff73b 100644 --- a/test/deprecated/legacy_test/test_adamw_op.py +++ b/test/deprecated/legacy_test/test_adamw_op.py @@ -262,41 +262,6 @@ def test_adamw_op_coverage(self): ) assert adam.__str__() is not None - def test_adamw_op(self): - paddle.enable_static() - place = base.CPUPlace() - shape = [2, 3, 8, 8] - exe = base.Executor(place) - train_prog = base.Program() - startup = base.Program() - with base.program_guard(train_prog, startup): - with base.unique_name.guard(): - data = paddle.static.data(name="data", shape=shape) - conv = paddle.static.nn.conv2d(data, 8, 3) - loss = paddle.mean(conv) - - beta1 = paddle.static.create_global_var( - shape=[1], value=0.85, dtype='float32', persistable=True - ) - beta2 = paddle.static.create_global_var( - shape=[1], value=0.95, dtype='float32', persistable=True - ) - betas = [beta1, beta2] - opt = paddle.optimizer.AdamW( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8, - ) - opt.minimize(loss) - - exe.run(startup) - data_np = np.random.random(shape).astype('float32') - rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) - assert rets[0] is not None - paddle.disable_static() - def test_pir_adam_op(self): with paddle.pir_utils.IrGuard(): place = base.CPUPlace() diff --git a/test/deprecated/legacy_test/test_adamw_op_deprecated.py b/test/deprecated/legacy_test/test_adamw_op_deprecated.py new file mode 100644 index 0000000000000..c5f5aa5453bbf --- /dev/null +++ b/test/deprecated/legacy_test/test_adamw_op_deprecated.py @@ -0,0 +1,61 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base + + +class TestAdamWOp(unittest.TestCase): + def test_adamw_op(self): + paddle.enable_static() + place = base.CPUPlace() + shape = [2, 3, 8, 8] + exe = base.Executor(place) + train_prog = base.Program() + startup = base.Program() + with base.program_guard(train_prog, startup): + with base.unique_name.guard(): + data = paddle.static.data(name="data", shape=shape) + conv = paddle.static.nn.conv2d(data, 8, 3) + loss = paddle.mean(conv) + + beta1 = paddle.static.create_global_var( + shape=[1], value=0.85, dtype='float32', persistable=True + ) + beta2 = paddle.static.create_global_var( + shape=[1], value=0.95, dtype='float32', persistable=True + ) + betas = [beta1, beta2] + opt = paddle.optimizer.AdamW( + learning_rate=1e-5, + beta1=beta1, + beta2=beta2, + weight_decay=0.01, + epsilon=1e-8, + ) + opt.minimize(loss) + + exe.run(startup) + data_np = np.random.random(shape).astype('float32') + rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) + assert rets[0] is not None + paddle.disable_static() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_adaptive_avg_pool2d.py b/test/deprecated/legacy_test/test_adaptive_avg_pool2d.py index 880a7cf949a62..5ed16ca8675b1 100644 --- a/test/deprecated/legacy_test/test_adaptive_avg_pool2d.py +++ b/test/deprecated/legacy_test/test_adaptive_avg_pool2d.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from test_attribute_var import UnittestBase +from test_attribute_var_deprecated import UnittestBase import paddle from paddle.base import Program, core, program_guard diff --git a/test/deprecated/legacy_test/test_add_reader_dependency.py b/test/deprecated/legacy_test/test_add_reader_dependency_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_add_reader_dependency.py rename to test/deprecated/legacy_test/test_add_reader_dependency_deprecated.py index 9dfd4a500d1f7..e844d26679dc2 100644 --- a/test/deprecated/legacy_test/test_add_reader_dependency.py +++ b/test/deprecated/legacy_test/test_add_reader_dependency_deprecated.py @@ -21,6 +21,8 @@ from paddle import base from paddle.base.layer_helper import LayerHelper +paddle.enable_static() + def inplace_add(x, bias): helper = LayerHelper('scale', **locals()) diff --git a/test/deprecated/legacy_test/test_arg_min_max_op.py b/test/deprecated/legacy_test/test_arg_min_max_op.py index c35fa9f8f7d39..69b98997aeed5 100644 --- a/test/deprecated/legacy_test/test_arg_min_max_op.py +++ b/test/deprecated/legacy_test/test_arg_min_max_op.py @@ -17,7 +17,7 @@ import numpy as np from op_test import OpTest, convert_float_to_uint16 -from test_attribute_var import UnittestBase +from test_attribute_var_deprecated import UnittestBase import paddle from paddle.base import Program, program_guard diff --git a/test/deprecated/legacy_test/test_argsort_op_deprecated.py b/test/deprecated/legacy_test/test_argsort_op_deprecated.py new file mode 100644 index 0000000000000..96cd761267082 --- /dev/null +++ b/test/deprecated/legacy_test/test_argsort_op_deprecated.py @@ -0,0 +1,355 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core +from paddle.base.backward import append_backward +from paddle.base.executor import Executor +from paddle.base.framework import Program, grad_var_name + +np.random.seed(123) +paddle.enable_static() + + +class PyArgsort: + def __init__(self, input_shape, axis, descending, dtype): + self.x = np.random.random(input_shape).astype(dtype) + self.label = np.random.random(input_shape).astype(dtype) + if axis < 0: + self.axis = axis + len(self.x.shape) + else: + self.axis = axis + self.descending = descending + + def forward(self): + if self.descending: + self.indices = np.flip( + np.argsort(self.x, kind='quicksort', axis=self.axis), self.axis + ) + self.sorted_x = np.flip( + np.sort(self.x, kind='quicksort', axis=self.axis), self.axis + ) + else: + self.indices = np.argsort(self.x, kind='quicksort', axis=self.axis) + self.sorted_x = np.sort(self.x, kind='quicksort', axis=self.axis) + self.loss = self.sorted_x * self.label + self.loss = np.sum(self.loss) + out = ( + np.array(self.indices, dtype=self.indices.dtype), + np.array(self.sorted_x, dtype=self.sorted_x.dtype), + np.array(self.loss, dtype=self.loss.dtype), + ) + return out + + +def create_tensor(np_data, place): + tensor = core.LoDTensor() + tensor.set(np_data, place) + return tensor + + +class TestArgsortOpCPU(unittest.TestCase): + def setup_program(self): + self.main_program = Program() + self.startup_program = Program() + self.init_place() + + def setUp(self): + paddle.enable_static() + self.init_axis() + self.init_datatype() + self.init_direction() + self.init_inputshape() + + self.setup_program() + self.feed_data_field = {"x", "label"} + self.grad_data_field = {"x"} + + self.py_argsort = PyArgsort( + self.input_shape, self.axis, self.descending, self.dtype + ) + + with base.program_guard(self.main_program, self.startup_program): + x = paddle.static.data( + name="x", shape=[-1] + list(self.input_shape), dtype=self.dtype + ) + x.stop_gradient = False + x.desc.set_need_check_feed(False) + label = paddle.static.data( + name="label", + shape=[-1] + list(self.input_shape), + dtype=self.dtype, + ) + label.desc.set_need_check_feed(False) + self.index = paddle.argsort( + x=x, axis=self.axis, descending=self.descending + ) + self.sorted_x = paddle.sort( + x=x, axis=self.axis, descending=self.descending + ) + self.sorted_x.stop_gradient = False + loss = paddle.multiply(self.sorted_x, label) + self.loss = paddle.sum(loss) + + def forward(self): + self.feed_map = { + x: create_tensor(getattr(self.py_argsort, x), self.place) + for x in self.feed_data_field + } + exe = Executor(self.place) + out = exe.run( + self.main_program, + feed=self.feed_map, + fetch_list=[self.index, self.sorted_x, self.loss], + ) + return out + + def backward(self): + self.feed_map = { + x: create_tensor(getattr(self.py_argsort, x), self.place) + for x in self.feed_data_field + } + fetch_list = [ + self.main_program.global_block().var(grad_var_name(x)) + for x in self.grad_data_field + ] + exe = Executor(self.place) + out = exe.run( + self.main_program, + feed=self.feed_map, + fetch_list=fetch_list, + return_numpy=False, + ) + return out + + def test_backward(self, numeric_grad_delta=1e-5, max_relative_error=1e-7): + self.check_forward() + + with base.program_guard(self.main_program, self.startup_program): + append_backward(self.loss) + + ana_grad = [np.array(x) for x in self.backward()] + + num_grad = self.get_numerical_gradient(delta=numeric_grad_delta) + self.assert_is_close( + num_grad, + ana_grad, + 'x', + max_relative_error=max_relative_error, + msg_prefix="Gradient Check On %s" % str(self.place), + ) + + def check_forward(self): + pd_outputs = self.forward() + py_outputs = self.py_argsort.forward() + for pd_output, py_output in zip(pd_outputs, py_outputs): + self.assertEqual(pd_output.shape, py_output.shape) + np.testing.assert_allclose( + pd_output, py_output, rtol=1e-05, atol=0, equal_nan=False + ) + + def get_numerical_gradient(self, delta=1e-7): + if self.dtype == 'float16': + delta = np.array(delta).astype(np.float16) + feed_list = [getattr(self.py_argsort, x) for x in self.grad_data_field] + grad_list = [np.zeros_like(x) for x in feed_list] + for feed, grad in zip(feed_list, grad_list): + for f, g in np.nditer([feed, grad], op_flags=['readwrite']): + o = float(f) + f[...] = o + delta + y_pos = self.forward()[2] + + f[...] = o - delta + y_neg = self.forward()[2] + + f[...] = o + dout_dfeed = (y_pos - y_neg) / (delta * 2) + g[...] = dout_dfeed + + return grad_list + + def assert_is_close( + self, + numeric_grads, + analytic_grads, + names, + max_relative_error, + msg_prefix, + ): + for a, b, name in zip(numeric_grads, analytic_grads, names): + abs_a = np.abs(a) + abs_a[abs_a < 1e-3] = 1 + + diff_mat = np.abs(a - b) / abs_a + max_diff = np.max(diff_mat) + + def err_msg(): + offset = np.argmax(diff_mat > max_relative_error) + return ( + "%s error, %s variable %s max gradient diff %f over limit %f, " + "the first error element is %d, expected %f, but got %f." + ) % ( + 'argsort', + msg_prefix, + name, + max_diff, + max_relative_error, + offset, + a.flatten()[offset], + b.flatten()[offset], + ) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def init_axis(self): + self.axis = -1 + + def init_datatype(self): + self.dtype = "float64" + + def init_direction(self): + self.descending = False + + def init_inputshape(self): + self.input_shape = (2, 2, 2, 2, 3) + + def init_place(self): + self.place = core.CPUPlace() + + +class TestArgsortOpGPU(TestArgsortOpCPU): + def init_place(self): + if core.is_compiled_with_cuda(): + self.place = core.CUDAPlace(0) + else: + self.place = core.CPUPlace() + + +class TestArgsortOpAxis0CPU(TestArgsortOpCPU): + def init_axis(self): + self.axis = 0 + + +class TestArgsortOpAxis0GPU(TestArgsortOpGPU): + def init_axis(self): + self.axis = 0 + + +class TestArgsortOpAxis1CPU(TestArgsortOpCPU): + def init_axis(self): + self.axis = 1 + + +class TestArgsortOpAxis1GPU(TestArgsortOpGPU): + def init_axis(self): + self.axis = 1 + + +class TestArgsortOpAxis2CPU(TestArgsortOpCPU): + def init_axis(self): + self.axis = 2 + + +class TestArgsortOpAxis2GPU(TestArgsortOpGPU): + def init_axis(self): + self.axis = 2 + + +class TestArgsortOpAxisNeg1CPU(TestArgsortOpCPU): + def init_axis(self): + self.axis = -1 + + +class TestArgsortOpAxisNeg1GPU(TestArgsortOpGPU): + def init_axis(self): + self.axis = -1 + + +class TestArgsortOpAxisNeg2CPU(TestArgsortOpCPU): + def init_axis(self): + self.axis = -2 + + +class TestArgsortOpAxisNeg2GPU(TestArgsortOpGPU): + def init_axis(self): + self.axis = -2 + + +class TestArgsortOpDescendingAxisCPU(TestArgsortOpCPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxisGPU(TestArgsortOpGPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis0CPU(TestArgsortOpAxis0CPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis0GPU(TestArgsortOpAxis0GPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis1CPU(TestArgsortOpAxis1CPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis1GPU(TestArgsortOpAxis1GPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis2CPU(TestArgsortOpAxis2CPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxis2GPU(TestArgsortOpAxis2GPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxisNeg1CPU(TestArgsortOpAxisNeg1CPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxisNeg1GPU(TestArgsortOpAxisNeg1GPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxisNeg2CPU(TestArgsortOpAxisNeg2CPU): + def init_direction(self): + self.descending = True + + +class TestArgsortOpDescendingAxisNeg2GPU(TestArgsortOpAxisNeg2GPU): + def init_direction(self): + self.descending = True + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_attribute_var_deprecated.py b/test/deprecated/legacy_test/test_attribute_var_deprecated.py new file mode 100644 index 0000000000000..5f09dff909395 --- /dev/null +++ b/test/deprecated/legacy_test/test_attribute_var_deprecated.py @@ -0,0 +1,107 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy as np + +import paddle +import paddle.inference as paddle_infer +from paddle.base.framework import Program, program_guard + +paddle.enable_static() + + +class UnittestBase(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.init_info() + + def tearDwon(self): + self.temp_dir.cleanup() + + def init_info(self): + self.shapes = None + self.save_path = None + + def path_prefix(self): + return type(self).__name__ + + def infer_prog(self): + config = paddle_infer.Config( + self.save_path + '.pdmodel', self.save_path + '.pdiparams' + ) + config.disable_mkldnn() + predictor = paddle_infer.create_predictor(config) + input_names = predictor.get_input_names() + for i, shape in enumerate(self.shapes): + input_handle = predictor.get_input_handle(input_names[i]) + self.fake_input = np.random.randn(*shape).astype("float32") + input_handle.reshape(shape) + input_handle.copy_from_cpu(self.fake_input) + predictor.run() + output_names = predictor.get_output_names() + res = [] + for out_name in output_names: + output_handle = predictor.get_output_handle(out_name) + output_data = output_handle.copy_to_cpu() + res.append(output_data) + + if len(output_names) == 1: + res = res[0] + + return res + + +class TestDropout(UnittestBase): + def init_info(self): + self.shapes = [[10, 10]] + self.save_path = os.path.join(self.temp_dir.name, 'dropout') + + def test_static(self): + main_prog = Program() + startup_prog = Program() + with program_guard(main_prog, startup_prog): + fc = paddle.nn.Linear(10, 10) + x = paddle.randn(self.shapes[0]) + x.stop_gradient = False + feat = fc(x) + # p is a Variable + p = paddle.randn([1]) + out = paddle.nn.functional.dropout(feat, p=p) + sgd = paddle.optimizer.SGD() + sgd.minimize(paddle.mean(out)) + # test _to_string + self.assertTrue("Var[" in str(main_prog)) + + exe = paddle.static.Executor() + exe.run(startup_prog) + res = exe.run(fetch_list=[x, out]) + # export model + paddle.static.save_inference_model(self.save_path, [x], [out], exe) + + # Test for Inference Predictor + infer_out = self.infer_prog() + self.assertEqual(infer_out.shape, (10, 10)) + + self.assertEqual( + main_prog.block(0).ops[4].all_attrs()['dropout_prob'].name, + p.name, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_auto_parallel_partitioner.py b/test/deprecated/legacy_test/test_auto_parallel_partitioner_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_auto_parallel_partitioner.py rename to test/deprecated/legacy_test/test_auto_parallel_partitioner_deprecated.py diff --git a/test/deprecated/legacy_test/test_auto_parallel_partitioner_gpt.py b/test/deprecated/legacy_test/test_auto_parallel_partitioner_gpt_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_auto_parallel_partitioner_gpt.py rename to test/deprecated/legacy_test/test_auto_parallel_partitioner_gpt_deprecated.py diff --git a/test/deprecated/legacy_test/test_avoid_twice_initialization.py b/test/deprecated/legacy_test/test_avoid_twice_initialization_deprecated.py similarity index 98% rename from test/deprecated/legacy_test/test_avoid_twice_initialization.py rename to test/deprecated/legacy_test/test_avoid_twice_initialization_deprecated.py index 5afb4cd20ccc3..fdca1d76954f3 100644 --- a/test/deprecated/legacy_test/test_avoid_twice_initialization.py +++ b/test/deprecated/legacy_test/test_avoid_twice_initialization_deprecated.py @@ -17,6 +17,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestAvoidTwiceInitialization(unittest.TestCase): def test_avoid_twice_initialization(self): diff --git a/test/deprecated/legacy_test/test_backward.py b/test/deprecated/legacy_test/test_backward_deprecated.py similarity index 88% rename from test/deprecated/legacy_test/test_backward.py rename to test/deprecated/legacy_test/test_backward_deprecated.py index 04aeadc038213..64a3dfe7e778d 100644 --- a/test/deprecated/legacy_test/test_backward.py +++ b/test/deprecated/legacy_test/test_backward_deprecated.py @@ -19,7 +19,8 @@ import paddle import paddle.nn.functional as F from paddle import base, static -from paddle.base import backward + +paddle.enable_static() class BackwardNet: @@ -411,58 +412,6 @@ def test_gradient_with_optimizer(self): self._check_grad_op_name(forward_list, optimized_list) -# TODO(Aurelius84): add conditional network test -class ConditionalNet(BackwardNet): - def __init__(self): - super().__init__() - - -class TestBackwardUninitializedVariable(unittest.TestCase): - """this case is found in yolov5 while to_static. - gradient aggregation may cause sum a invalid variable. - """ - - def test(self): - paddle.enable_static() - main_prg, startup_prg = paddle.static.Program(), paddle.static.Program() - with paddle.static.program_guard(main_prg, startup_prg): - gt = paddle.static.data(name='gt', shape=[4], dtype='float32') - x = paddle.static.data(name='x', shape=[2], dtype='float32') - gt.stop_gradient = True - x.stop_gradient = False - gt = gt.reshape([4, 1]).reshape([4]) - loss = ( - paddle.nn.functional.binary_cross_entropy(x, gt[:2]) - + (gt[2:4] * x).sum() - ) - exe = paddle.static.Executor() - paddle.base.backward.gradients(loss, []) - exe.run(startup_prg) - # Optimizer - out = exe.run( - main_prg, - feed={ - 'gt': np.array([1.0, 1.0, 0.0, 0.0], dtype='float32'), - 'x': np.array([0.5, 0.5], dtype='float32'), - }, - fetch_list=[loss], - ) - print(out) - - -class TestStripGradSuffix(unittest.TestCase): - def test_strip_grad_suffix(self): - cases = ( - ('x@GRAD', 'x'), - ('x@GRAD@GRAD', 'x'), - ('x@GRAD@RENAME@1', 'x'), - ('x@GRAD_slice_0@GRAD', 'x@GRAD_slice_0'), - ('grad/grad/x@GRAD@RENAME@block0@1@GRAD', 'x'), - ) - for input_, desired in cases: - self.assertEqual(backward._strip_grad_suffix_(input_), desired) - - if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/test/deprecated/legacy_test/test_batch_norm_op_deprecated.py b/test/deprecated/legacy_test/test_batch_norm_op_deprecated.py new file mode 100644 index 0000000000000..9c63d513e09d2 --- /dev/null +++ b/test/deprecated/legacy_test/test_batch_norm_op_deprecated.py @@ -0,0 +1,528 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +import numpy as np +from op_test import ( + _set_use_system_allocator, +) + +import paddle +from paddle import base +from paddle.base import core + +paddle.enable_static() + +_set_use_system_allocator(True) + + +def _cal_mean_variance(x, epsilon, data_format): + assert data_format in ['NCHW', 'NHWC'] + x_shape = x.shape + if len(x_shape) == 3: + if data_format == "NCHW": # NCL -> NCL1 + x = np.reshape(x, (x_shape[0], x_shape[1], x_shape[2], 1)) + else: # NLC -> NL1C + x = np.reshape(x, (x_shape[0], x_shape[1], 1, x_shape[2])) + x_square = x * x + axis = (0, 2, 3) if data_format == 'NCHW' else (0, 1, 2) + C = x.shape[1] if data_format == 'NCHW' else x.shape[-1] + x_square_sum = np.sum(x_square, axis) + x_sum = np.sum(x, axis=axis) + element_count = np.size(x) / C + mean = x_sum / element_count + var = x_square_sum / element_count - mean * mean + return mean, var + + +def _reference_training(x, scale, offset, epsilon, data_format): + x_shape = x.shape + + if len(x_shape) == 3: + if data_format == "NCHW": # NCL -> NCL1 + x = np.reshape(x, (x_shape[0], x_shape[1], x_shape[2], 1)) + else: # NLC -> NL1C + x = np.reshape(x, (x_shape[0], x_shape[1], 1, x_shape[2])) + + if data_format == "NCHW": + n, c, h, w = x.shape + x_square = x * x + x_square_sum = np.sum(x_square, (0, 2, 3)) + x_sum = np.sum(x, axis=(0, 2, 3)) + element_count = np.size(x) / int(np.shape(x)[1]) + mean = x_sum / element_count + var = x_square_sum / element_count - mean * mean + mean_tile = np.reshape(mean, (1, c, 1, 1)) + mean_tile = np.tile(mean_tile, (n, 1, h, w)) + var_tile = np.reshape(var, (1, c, 1, 1)) + var_tile = np.tile(var_tile, (n, 1, h, w)) + normalized = (x - mean_tile) / np.sqrt(var_tile + epsilon) + scale_tile = np.reshape(scale, (1, c, 1, 1)) + scale_tile = np.tile(scale_tile, (n, 1, h, w)) + offset_tile = np.reshape(offset, (1, c, 1, 1)) + offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) + y = normalized * scale_tile + offset_tile + elif data_format == "NHWC": + x_square = x * x + x_square_sum = np.sum(x_square, (0, 1, 2)) + x_sum = np.sum(x, axis=(0, 1, 2)) + element_count = np.size(x) / int(np.shape(x)[-1]) + mean = x_sum / element_count + var = x_square_sum / element_count - mean * mean + normalized = (x - mean) / np.sqrt(var + epsilon) + y = normalized * scale + offset + else: + raise ValueError("Unknown data order.") + + if len(x_shape) == 3: + y = np.reshape(y, x_shape) + return y, mean, var + + +def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): + # Use the following formulas to calculate gradients: + # grad_scale = + # sum(grad_y * (x - mean)) * rsqrt(var + epsilon) + # + # grad_offset = sum(output_y) + # + # x_grad = + # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - + # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) + + # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation + if data_format != "NCHW" and data_format != "NHWC": + raise ValueError("Unknown data order.") + + x_shape = x.shape + if len(x_shape) == 3: + if data_format == "NCHW": # NCL -> NCL1 + x = np.reshape(x, (x_shape[0], x_shape[1], x_shape[2], 1)) + y_grad = np.reshape(y_grad, (x_shape[0], x_shape[1], x_shape[2], 1)) + else: # NLC -> NL1C + x = np.reshape(x, (x_shape[0], x_shape[1], 1, x_shape[2])) + y_grad = np.reshape(y_grad, (x_shape[0], x_shape[1], 1, x_shape[2])) + + if data_format == "NCHW": + x = np.transpose(x, (0, 2, 3, 1)) + y_grad = np.transpose(y_grad, (0, 2, 3, 1)) + + x_grad = ( + scale + * ( + y_grad + - np.mean(y_grad, axis=(0, 1, 2)) + - (x - mean) + * np.mean(y_grad * (x - mean), axis=(0, 1, 2)) + / (var + epsilon) + ) + / np.sqrt(var + epsilon) + ) + grad_scale = np.sum( + y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2) + ) + grad_offset = np.sum(y_grad, axis=(0, 1, 2)) + + # transfer back to N, C, H, W + if data_format == "NCHW": + x_grad = np.transpose(x_grad, (0, 3, 1, 2)) + x = np.transpose(x, (0, 3, 1, 2)) + y_grad = np.transpose(y_grad, (0, 3, 1, 2)) + + if len(x_shape) == 3: + x_grad = np.reshape(x_grad, x_shape) + + return x_grad, grad_scale, grad_offset + + +class TestBatchNormOpTraining(unittest.TestCase): + def setUp(self): + self.use_mkldnn = False + self.fuse_with_relu = False + self.data_formats = ["NCHW", "NHWC"] + self.momentum = 0.9 + self.use_momentum_variable = False + self.epsilon = 0.00001 + self.init_kernel_type() + self.init_test_case() + + def init_test_case(self): + self.use_global_stats = False + self.no_grad_set = set() + self.fetch_list = [ + 'y', + 'mean', + 'variance', + 'saved_mean', + 'saved_variance', + 'x@GRAD', + 'scale@GRAD', + 'bias@GRAD', + ] + + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + np.allclose(np.array(tensor), np_array, atol=atol) + + def ref_forward_backward( + self, + x, + y_grad, + scale, + bias, + mean, + variance, + epsilon, + momentum, + shape, + data_layout, + ): + # run forward + y, saved_mean, var_ref = _reference_training( + x, scale, bias, epsilon, data_layout + ) + mean_out = saved_mean * (1.0 - momentum) + momentum * mean + variance_out = var_ref * (1.0 - momentum) + momentum * variance + saved_variance = 1.0 / np.sqrt(var_ref + epsilon) + # run backward + x_grad, scale_grad, bias_grad = _reference_grad( + x, y_grad, scale, saved_mean, var_ref, epsilon, data_layout + ) + + return ( + y, + mean_out, + variance_out, + saved_mean, + saved_variance, + x_grad, + scale_grad, + bias_grad, + ) + + def set_mean_variance(self, scale_shape, x, data_layout): + mean, variance = _cal_mean_variance(x, self.epsilon, data_layout) + mean_pre = np.zeros(scale_shape).astype(np.float32) + variance_pre = np.ones(scale_shape).astype(np.float32) + # computing global mean/variance for one step + if self.use_global_stats: + mom = self.momentum + mean = mean * (1.0 - mom) + mom * mean_pre + variance = variance * (1.0 - mom) + mom * variance_pre + return mean, variance + + def test_forward_backward(self): + def test_with_place(place, data_layout, shape): + # attr + epsilon = self.epsilon + momentum = self.momentum + if data_layout == "NCHW": + n, c, h, w = shape[0], shape[1], shape[2], shape[3] + else: + n, h, w, c = shape[0], shape[1], shape[2], shape[3] + scale_shape = [c] + + np.random.seed(123) + x = np.random.random_sample(shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) + mean, variance = self.set_mean_variance(scale_shape, x, data_layout) + y_grad = np.random.random_sample(shape).astype(np.float32) + momentum_var = np.array([momentum]).astype(np.float32) + + ( + y, + mean_out, + variance_out, + saved_mean, + saved_variance, + x_grad, + scale_grad, + bias_grad, + ) = self.ref_forward_backward( + x, + y_grad, + scale, + bias, + mean, + variance, + epsilon, + momentum, + shape, + data_layout, + ) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + var_dict['x@GRAD'] = x_grad + var_dict['scale@GRAD'] = scale_grad + var_dict['bias@GRAD'] = bias_grad + + var_names = [ + 'x', + 'scale', + 'bias', + 'mean', + 'variance', + 'y', + 'saved_mean', + 'saved_variance', + 'momentum_var', + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = base.Program() + with base.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape, + ) + inputs = { + "X": block.var('x'), + "Scale": block.var('scale'), + "Bias": block.var('bias'), + "Mean": block.var('mean'), + "Variance": block.var('variance'), + } + attrs = { + "epsilon": epsilon, + "is_test": False, + "data_layout": data_layout, + "use_mkldnn": self.use_mkldnn, + "fuse_with_relu": self.fuse_with_relu, + "use_global_stats": self.use_global_stats, + } + if self.use_momentum_variable: + inputs['MomentumTensor'] = block.var('momentum_var') + else: + attrs['momentum'] = momentum + + outputs = { + "Y": block.var('y'), + "MeanOut": block.var('mean'), # share memory + "VarianceOut": block.var('variance'), # share memory + "SavedMean": block.var('saved_mean'), + "SavedVariance": block.var('saved_variance'), + } + block.create_var(name="reserve_space", dtype='float32') + outputs["ReserveSpace"] = block.var('reserve_space') + bn_op = block.append_op( + type="batch_norm", + inputs=inputs, + outputs=outputs, + attrs=attrs, + ) + block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) + + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + bn_op.desc, self.no_grad_set, [] + ) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + program._sync_with_cpp() + + exe = base.Executor(place) + out = exe.run( + program, + feed={ + name: var_dict[name] + for name in [ + 'x', + 'scale', + 'bias', + 'mean', + 'variance', + 'y@GRAD', + 'momentum_var', + ] + }, + fetch_list=self.fetch_list, + ) + + for id, name in enumerate(self.fetch_list): + if name == 'variance': + self.__assert_close( + var_dict[name], out[id], name, atol=1e-3 + ) + continue + self.__assert_close(var_dict[name], out[id], name) + print("op test forward passed: ", str(place), data_layout) + + places = [core.CPUPlace()] + + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + + for place in places: + for data_format in self.data_formats: + test_with_place(place, data_format, [2, 3, 4, 5]) + + def init_kernel_type(self): + pass + + +class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): + def init_test_case(self): + self.use_global_stats = False + self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} + self.fetch_list = ['y', 'mean', 'variance', 'x@GRAD'] + + +class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): + def init_test_case(self): + self.use_global_stats = False + self.no_grad_set = set() + self.fetch_list = [ + 'y', + 'mean', + 'variance', + 'saved_mean', + 'saved_variance', + 'x@GRAD', + 'scale@GRAD', + 'bias@GRAD', + ] + os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = "1" + + +class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): + def init_test_case(self): + self.use_global_stats = False + self.no_grad_set = {'x@GRAD'} + self.fetch_list = ['y', 'mean', 'variance', 'scale@GRAD', 'bias@GRAD'] + + +class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): + def init_test_case(self): + self.use_momentum_variable = True + self.use_global_stats = False + self.no_grad_set = set() + self.fetch_list = [ + 'y', + 'mean', + 'variance', + 'saved_mean', + 'saved_variance', + 'x@GRAD', + 'scale@GRAD', + 'bias@GRAD', + ] + + +class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): + def init_test_case(self): + self.use_global_stats = True + self.no_grad_set = set() + self.fetch_list = [ + 'y', + 'mean', + 'variance', + 'x@GRAD', + 'scale@GRAD', + 'bias@GRAD', + ] + + def reference_grad(self, x, y_grad, scale, mean, var, epsilon, data_format): + if data_format == "NCHW": + x = np.transpose(x, (0, 2, 3, 1)) + y_grad = np.transpose(y_grad, (0, 2, 3, 1)) + + x_grad = scale * y_grad / np.sqrt(var + epsilon) + grad_scale = np.sum( + y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2) + ) + grad_offset = np.sum(y_grad, axis=(0, 1, 2)) + + # transfer back to N, C, H, W + if data_format == "NCHW": + x_grad = np.transpose(x_grad, (0, 3, 1, 2)) + x = np.transpose(x, (0, 3, 1, 2)) + y_grad = np.transpose(y_grad, (0, 3, 1, 2)) + + return x_grad, grad_scale, grad_offset + + def ref_forward_backward( + self, + x, + y_grad, + scale, + bias, + mean, + variance, + epsilon, + momentum, + shape, + data_layout, + ): + if data_layout != "NCHW" and data_layout != "NHWC": + raise ValueError("Unknown data order.") + + if data_layout == "NCHW": + x = np.transpose(x, (0, 2, 3, 1)) + + # run normalizaton + normalized = (x - mean) / np.sqrt(variance + epsilon) + y = normalized * scale + bias + + # transfer back to N, C, H, W + if data_layout == "NCHW": + x = np.transpose(x, (0, 3, 1, 2)) + y = np.transpose(y, (0, 3, 1, 2)) + + mean_out = mean + variance_out = variance + saved_variance = 1.0 / np.sqrt(variance + epsilon) + # run backward + x_grad, scale_grad, bias_grad = self.reference_grad( + x, y_grad, scale, mean, variance, epsilon, data_layout + ) + + return ( + y, + mean_out, + variance_out, + mean, + saved_variance, + x_grad, + scale_grad, + bias_grad, + ) + + +class TestBatchNormOpFreezeStatsAndScaleBiasTraining( + TestBatchNormOpFreezeStatsTraining +): + def init_test_case(self): + self.use_global_stats = True + self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} + self.fetch_list = ['y', 'mean', 'variance', 'x@GRAD'] + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_bilinear_tensor_product_op.py b/test/deprecated/legacy_test/test_bilinear_tensor_product_op_deprecated.py similarity index 64% rename from test/deprecated/legacy_test/test_bilinear_tensor_product_op.py rename to test/deprecated/legacy_test/test_bilinear_tensor_product_op_deprecated.py index 20dcc132e80d6..dee0412af3bc5 100644 --- a/test/deprecated/legacy_test/test_bilinear_tensor_product_op.py +++ b/test/deprecated/legacy_test/test_bilinear_tensor_product_op_deprecated.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, paddle_static_guard +from op_test import paddle_static_guard import paddle from paddle import base @@ -52,37 +52,5 @@ def test_errors(self): ) -class TestBilinearTensorProductOp(OpTest): - def setUp(self): - self.op_type = "bilinear_tensor_product" - self.python_api = paddle.nn.functional.bilinear - batch_size = 6 - size0 = 5 - size1 = 4 - size2 = 5 - dtype = "float32" if base.core.is_compiled_with_rocm() else "float64" - a = np.random.random((batch_size, size0)).astype(dtype) - b = np.random.random((batch_size, size1)).astype(dtype) - w = np.random.random((size2, size0, size1)).astype(dtype) - bias = np.random.random((1, size2)).astype(dtype) - output = np.zeros((batch_size, size2)).astype(dtype) - for i in range(size2): - w_i = w[i, :, :] - output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) - self.inputs = { - 'X': a, - 'Y': b, - 'Weight': w, - 'Bias': bias, - } - self.outputs = {'Out': output + bias} - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') - - if __name__ == "__main__": unittest.main() diff --git a/test/deprecated/legacy_test/test_communicator_geo.py b/test/deprecated/legacy_test/test_communicator_geo_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_communicator_geo.py rename to test/deprecated/legacy_test/test_communicator_geo_deprecated.py diff --git a/test/deprecated/legacy_test/test_compiled_program.py b/test/deprecated/legacy_test/test_compiled_program.py index 1b6f3698afa4d..66b9039212ff1 100644 --- a/test/deprecated/legacy_test/test_compiled_program.py +++ b/test/deprecated/legacy_test/test_compiled_program.py @@ -76,47 +76,5 @@ def test_compiled_program_base(self): np.testing.assert_array_equal(float(loss_data), self.loss) -class TestCompiledProgramError(unittest.TestCase): - def test_program_or_graph_error(self): - self.assertRaises(TypeError, base.CompiledProgram, "program") - - def build_simple_model(self): - img = paddle.static.data( - name='image', shape=[-1, 1, 28, 28], dtype='float32' - ) - label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64') - prediction = paddle.static.nn.fc(x=img, size=10, activation='softmax') - loss = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_loss = paddle.mean(loss) - - def compile_program(self): - with base.program_guard(base.Program()): - # build model - self.build_simple_model() - # compile program - program = base.default_main_program() - compiled_program = base.CompiledProgram(program) - scope = base.global_scope() - place = base.CPUPlace() - compiled_program._compile(scope, place) - return compiled_program, scope, place - - def test_compile_scope_error(self): - compiled_program, _, place = self.compile_program() - new_scope = core.Scope() - with self.assertRaises(ValueError): - compiled_program._compile(new_scope, place) - - def test_compile_place_error(self): - # need create different place - if core.is_compiled_with_cuda(): - compiled_program, scope, _ = self.compile_program() - new_place = base.CUDAPlace(0) - with self.assertRaises(ValueError): - compiled_program._compile(scope, new_place) - - if __name__ == '__main__': unittest.main() diff --git a/test/deprecated/legacy_test/test_compiled_program_deprecated.py b/test/deprecated/legacy_test/test_compiled_program_deprecated.py new file mode 100644 index 0000000000000..597cf6af8c7fd --- /dev/null +++ b/test/deprecated/legacy_test/test_compiled_program_deprecated.py @@ -0,0 +1,70 @@ +# copyright (c) 2020 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import sys +import unittest + +sys.path.append("../../legacy_test") + +import paddle +from paddle import base +from paddle.base import core + +paddle.enable_static() + + +class TestCompiledProgramError(unittest.TestCase): + def test_program_or_graph_error(self): + self.assertRaises(TypeError, base.CompiledProgram, "program") + + def build_simple_model(self): + img = paddle.static.data( + name='image', shape=[-1, 1, 28, 28], dtype='float32' + ) + label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64') + prediction = paddle.static.nn.fc(x=img, size=10, activation='softmax') + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) + avg_loss = paddle.mean(loss) + + def compile_program(self): + with base.program_guard(base.Program()): + # build model + self.build_simple_model() + # compile program + program = base.default_main_program() + compiled_program = base.CompiledProgram(program) + scope = base.global_scope() + place = base.CPUPlace() + compiled_program._compile(scope, place) + return compiled_program, scope, place + + def test_compile_scope_error(self): + compiled_program, _, place = self.compile_program() + new_scope = core.Scope() + with self.assertRaises(ValueError): + compiled_program._compile(new_scope, place) + + def test_compile_place_error(self): + # need create different place + if core.is_compiled_with_cuda(): + compiled_program, scope, _ = self.compile_program() + new_place = base.CUDAPlace(0) + with self.assertRaises(ValueError): + compiled_program._compile(scope, new_place) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_conditional_block.py b/test/deprecated/legacy_test/test_conditional_block_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_conditional_block.py rename to test/deprecated/legacy_test/test_conditional_block_deprecated.py diff --git a/test/deprecated/legacy_test/test_conv3d_transpose_part2_op.py b/test/deprecated/legacy_test/test_conv3d_transpose_part2_op_deprecated.py similarity index 65% rename from test/deprecated/legacy_test/test_conv3d_transpose_part2_op.py rename to test/deprecated/legacy_test/test_conv3d_transpose_part2_op_deprecated.py index 571c961ff4190..02e37f48cda2e 100644 --- a/test/deprecated/legacy_test/test_conv3d_transpose_part2_op.py +++ b/test/deprecated/legacy_test/test_conv3d_transpose_part2_op_deprecated.py @@ -18,93 +18,12 @@ import numpy as np sys.path.append("../../legacy_test") -from test_conv3d_transpose_op import ( - TestConv3DTransposeOp, - create_test_cudnn_bf16_class, - create_test_cudnn_fp16_class, -) import paddle from paddle import base from paddle.base import core - -class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp): - def init_test_case(self): - self.pad = [1, 1, 1] - self.stride = [1, 1, 1] - self.dilations = [1, 1, 1] - self.groups = 1 - self.input_size = [2, 5, 5, 5, 3] # NDHWC - f_c = self.input_size[-1] - self.filter_size = [f_c, 6, 3, 3, 3] - self.data_format = 'NHWC' - - -class TestWithAsymmetricPad_NHWC(TestConv3DTransposeOp): - def init_test_case(self): - self.pad = [1, 0, 1, 0, 1, 2] - self.stride = [1, 1, 1] - self.dilations = [1, 1, 1] - self.groups = 1 - self.input_size = [2, 5, 5, 5, 3] # NDHWC - f_c = self.input_size[-1] - self.filter_size = [f_c, 6, 3, 3, 3] - self.data_format = 'NHWC' - - -class TestWithGroups_NHWC(TestConv3DTransposeOp): - def init_test_case(self): - self.check_no_filter = True - self.pad = [1, 1, 1] - self.stride = [1, 1, 1] - self.dilations = [1, 1, 1] - self.groups = 2 - self.input_size = [2, 5, 5, 5, 4] # NDHWC - f_c = self.input_size[-1] - self.filter_size = [f_c, 3, 3, 3, 3] - self.data_format = 'NHWC' - - -class TestWithStride_NHWC(TestConv3DTransposeOp): - def init_test_case(self): - self.pad = [1, 1, 1] - self.stride = [2, 2, 2] - self.dilations = [1, 1, 1] - self.groups = 1 - self.input_size = [2, 5, 5, 5, 3] # NCDHW - f_c = self.input_size[-1] - self.filter_size = [f_c, 6, 3, 3, 3] - self.data_format = 'NHWC' - - -class TestWithDilation_NHWC(TestConv3DTransposeOp): - def init_test_case(self): - self.check_no_input = True - self.pad = [1, 1, 1] - self.stride = [1, 1, 1] - self.dilations = [2, 2, 2] - self.groups = 1 - self.input_size = [2, 5, 5, 5, 3] # NCDHW - f_c = self.input_size[-1] - self.filter_size = [f_c, 6, 3, 3, 3] - self.data_format = 'NHWC' - - -# ----------------Conv3DTransposeCUDNN fp16---------------- -create_test_cudnn_fp16_class(TestWithSymmetricPad_NHWC) -create_test_cudnn_fp16_class(TestWithAsymmetricPad_NHWC) -create_test_cudnn_fp16_class(TestWithGroups_NHWC) -create_test_cudnn_fp16_class(TestWithStride_NHWC) -create_test_cudnn_fp16_class(TestWithDilation_NHWC) - - -# ----------------Conv3DTransposeCUDNN bf16---------------- -create_test_cudnn_bf16_class(TestWithSymmetricPad_NHWC) -create_test_cudnn_bf16_class(TestWithAsymmetricPad_NHWC) -create_test_cudnn_bf16_class(TestWithGroups_NHWC) -create_test_cudnn_bf16_class(TestWithStride_NHWC) -create_test_cudnn_bf16_class(TestWithDilation_NHWC) +paddle.enable_static() class TestConv3DTransposeAPI(unittest.TestCase): diff --git a/test/deprecated/legacy_test/test_cost_model.py b/test/deprecated/legacy_test/test_cost_model.py index 997a5c0c6c47b..77220b5a0cfba 100644 --- a/test/deprecated/legacy_test/test_cost_model.py +++ b/test/deprecated/legacy_test/test_cost_model.py @@ -33,26 +33,6 @@ def test_profiler_measure_empty_program(self): ) self.assertEqual(cost_data.get_whole_time_ms(), 0) - def test_profiler_measure_program(self): - main_program = paddle.static.Program() - startup_program = paddle.static.Program() - with paddle.static.program_guard(main_program, startup_program): - # TODO(zhhsplendid): support paddle.static.data, which is uninitialized data - data = paddle.ones(name='X', shape=[16, 100], dtype='float32') - hidden = paddle.static.nn.fc(data, 10) - loss = paddle.mean(hidden) - cost_model = core.CostModel() - cost_data = cost_model.profile_measure( - main_program, startup_program, device, ["time"] - ) - fc_op_time = cost_data.get_op_time_ms(0) - mean_op_time = cost_data.get_op_time_ms(1) - self.assertGreater(fc_op_time, 0) - self.assertGreater(mean_op_time, 0) - self.assertGreaterEqual( - cost_data.get_whole_time_ms(), fc_op_time + mean_op_time - ) - def test_static_op_benchmark_cost_model(self): op_name = "abs" cost_model = CostModel() diff --git a/test/deprecated/legacy_test/test_cost_model_deprecated.py b/test/deprecated/legacy_test/test_cost_model_deprecated.py new file mode 100644 index 0000000000000..b86b286ad47db --- /dev/null +++ b/test/deprecated/legacy_test/test_cost_model_deprecated.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle +from paddle.base import core + +paddle.enable_static() + +device = "gpu" if core.is_compiled_with_cuda() else "cpu" + + +class TestCostModel(unittest.TestCase): + def test_profiler_measure_program(self): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard(main_program, startup_program): + # TODO(zhhsplendid): support paddle.static.data, which is uninitialized data + data = paddle.ones(name='X', shape=[16, 100], dtype='float32') + hidden = paddle.static.nn.fc(data, 10) + loss = paddle.mean(hidden) + cost_model = core.CostModel() + cost_data = cost_model.profile_measure( + main_program, startup_program, device, ["time"] + ) + fc_op_time = cost_data.get_op_time_ms(0) + mean_op_time = cost_data.get_op_time_ms(1) + self.assertGreater(fc_op_time, 0) + self.assertGreater(mean_op_time, 0) + self.assertGreaterEqual( + cost_data.get_whole_time_ms(), fc_op_time + mean_op_time + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_data_norm_op_deprecated.py b/test/deprecated/legacy_test/test_data_norm_op_deprecated.py new file mode 100644 index 0000000000000..4019ab0c0bf40 --- /dev/null +++ b/test/deprecated/legacy_test/test_data_norm_op_deprecated.py @@ -0,0 +1,60 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This is unit test of Test data_norm Op.""" + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import Program, program_guard + +paddle.enable_static() + + +class TestDataNormOpErrorr(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + x2 = paddle.static.data(name='x2', shape=[-1, 3, 4], dtype="int32") + # self.assertRaises(TypeError, base.data_norm, x2) + paddle.static.nn.data_norm( + input=x2, param_attr={}, enable_scale_and_shift=True + ) + + # Test input with dimension 1 + paddle.enable_static() + x3 = paddle.static.data("", shape=[0], dtype="float32") + self.assertRaises(ValueError, paddle.static.nn.data_norm, x3) + + # The size of input in data_norm should not be 0. + def test_0_size(): + paddle.enable_static() + x = paddle.static.data(name='x', shape=[0, 3], dtype='float32') + out = paddle.static.nn.data_norm(x, slot_dim=1) + cpu = base.core.CPUPlace() + exe = base.Executor(cpu) + exe.run(base.default_startup_program()) + test_program = base.default_main_program().clone(for_test=True) + exe.run( + test_program, + fetch_list=out, + feed={'x': np.ones([0, 3]).astype('float32')}, + ) + + self.assertRaises(ValueError, test_0_size) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_dataloader_early_reset.py b/test/deprecated/legacy_test/test_dataloader_early_reset_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_dataloader_early_reset.py rename to test/deprecated/legacy_test/test_dataloader_early_reset_deprecated.py index 1c826eb9cb89a..65c1488f66ae0 100644 --- a/test/deprecated/legacy_test/test_dataloader_early_reset.py +++ b/test/deprecated/legacy_test/test_dataloader_early_reset_deprecated.py @@ -19,6 +19,8 @@ import paddle from paddle import base +paddle.enable_static() + def infinite_reader(): num = 0 diff --git a/test/deprecated/legacy_test/test_dataloader_keep_order.py b/test/deprecated/legacy_test/test_dataloader_keep_order_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_dataloader_keep_order.py rename to test/deprecated/legacy_test/test_dataloader_keep_order_deprecated.py index a37e1b4939770..8c05c6a97344c 100644 --- a/test/deprecated/legacy_test/test_dataloader_keep_order.py +++ b/test/deprecated/legacy_test/test_dataloader_keep_order_deprecated.py @@ -20,6 +20,8 @@ import paddle from paddle import base +paddle.enable_static() + def create_reader(shape, batch_number): def __impl__(): diff --git a/test/deprecated/legacy_test/test_dataloader_unkeep_order.py b/test/deprecated/legacy_test/test_dataloader_unkeep_order_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_dataloader_unkeep_order.py rename to test/deprecated/legacy_test/test_dataloader_unkeep_order_deprecated.py index 17e5257dffc01..acc272b766f89 100644 --- a/test/deprecated/legacy_test/test_dataloader_unkeep_order.py +++ b/test/deprecated/legacy_test/test_dataloader_unkeep_order_deprecated.py @@ -21,6 +21,8 @@ from paddle import base from paddle.base.reader import keep_data_loader_order +paddle.enable_static() + keep_data_loader_order(False) diff --git a/test/deprecated/legacy_test/test_dataset.py b/test/deprecated/legacy_test/test_dataset.py index fd4141c1c8b64..6a005c1f28576 100644 --- a/test/deprecated/legacy_test/test_dataset.py +++ b/test/deprecated/legacy_test/test_dataset.py @@ -923,148 +923,6 @@ def setUp(self): self.drop_last = False -class TestDatasetWithFetchHandler(unittest.TestCase): - """ - Test Dataset With Fetch Handler. TestCases. - """ - - def net(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - slots = ["slot1", "slot2", "slot3", "slot4"] - slots_vars = [] - poolings = [] - for slot in slots: - data = paddle.static.data( - name=slot, shape=[-1, 1], dtype="int64", lod_level=1 - ) - var = paddle.cast(x=data, dtype='float32') - pool = paddle.static.nn.sequence_lod.sequence_pool( - input=var, pool_type='AVERAGE' - ) - - slots_vars.append(data) - poolings.append(pool) - - concated = paddle.concat(poolings, axis=1) - fc = paddle.static.nn.fc(x=concated, activation='tanh', size=32) - return slots_vars, fc - - def get_dataset(self, inputs, files): - """ - Test Dataset With Fetch Handler. TestCases. - - Args: - inputs(list): inputs of get_dataset - files(list): files of get_dataset - """ - dataset = paddle.distributed.QueueDataset() - dataset.init( - batch_size=32, thread_num=2, pipe_command="cat", use_var=inputs - ) - dataset.set_filelist(files) - return dataset - - def setUp(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - self.temp_dir = tempfile.TemporaryDirectory() - self.filename1 = os.path.join( - self.temp_dir.name, "test_queue_dataset_run_a.txt" - ) - self.filename2 = os.path.join( - self.temp_dir.name, "test_queue_dataset_run_b.txt" - ) - - with open(self.filename1, "w") as f: - data = "1 1 2 3 3 4 5 5 5 5 1 1\n" - data += "1 2 2 3 4 4 6 6 6 6 1 2\n" - data += "1 3 2 3 5 4 7 7 7 7 1 3\n" - f.write(data) - with open(self.filename2, "w") as f: - data = "1 4 2 3 3 4 5 5 5 5 1 4\n" - data += "1 5 2 3 4 4 6 6 6 6 1 5\n" - data += "1 6 2 3 5 4 7 7 7 7 1 6\n" - data += "1 7 2 3 6 4 8 8 8 8 1 7\n" - f.write(data) - - def tearDown(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - self.temp_dir.cleanup() - - def test_dataset_none(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - slots_vars, out = self.net() - files = [self.filename1, self.filename2] - dataset = self.get_dataset(slots_vars, files) - - exe = base.Executor(base.CPUPlace()) - exe.run(base.default_startup_program()) - - # test dataset->None - try: - exe.train_from_dataset(base.default_main_program(), None) - except ImportError as e: - print("warning: we skip trainer_desc_pb2 import problem in windows") - except RuntimeError as e: - error_msg = "dataset is need and should be initialized" - self.assertEqual(error_msg, str(e)) - except Exception as e: - self.assertTrue(False) - - def test_infer_from_dataset(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - slots_vars, out = self.net() - files = [self.filename1, self.filename2] - dataset = self.get_dataset(slots_vars, files) - - exe = base.Executor(base.CPUPlace()) - exe.run(base.default_startup_program()) - - try: - exe.infer_from_dataset(base.default_main_program(), dataset) - except ImportError as e: - print("warning: we skip trainer_desc_pb2 import problem in windows") - except Exception as e: - self.assertTrue(False) - - def test_fetch_handler(self): - """ - Test Dataset With Fetch Handler. TestCases. - """ - slots_vars, out = self.net() - files = [self.filename1, self.filename2] - dataset = self.get_dataset(slots_vars, files) - - exe = base.Executor(base.CPUPlace()) - exe.run(base.default_startup_program()) - - fh = base.executor.FetchHandler(out.name) - fh.help() - - try: - exe.train_from_dataset( - program=base.default_main_program(), - dataset=dataset, - fetch_handler=fh, - ) - except ImportError as e: - print("warning: we skip trainer_desc_pb2 import problem in windows") - except RuntimeError as e: - error_msg = "dataset is need and should be initialized" - self.assertEqual(error_msg, str(e)) - except Exception as e: - self.assertTrue(False) - - class TestDataset2(unittest.TestCase): """TestCases for Dataset.""" diff --git a/test/deprecated/legacy_test/test_dataset_deprecated.py b/test/deprecated/legacy_test/test_dataset_deprecated.py new file mode 100644 index 0000000000000..f3af35297e284 --- /dev/null +++ b/test/deprecated/legacy_test/test_dataset_deprecated.py @@ -0,0 +1,172 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +TestCases for Dataset, +including create, config, run, etc. +""" + +import os +import tempfile +import unittest + +import paddle +from paddle import base + +paddle.enable_static() + + +class TestDatasetWithFetchHandler(unittest.TestCase): + """ + Test Dataset With Fetch Handler. TestCases. + """ + + def net(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + slots = ["slot1", "slot2", "slot3", "slot4"] + slots_vars = [] + poolings = [] + for slot in slots: + data = paddle.static.data( + name=slot, shape=[-1, 1], dtype="int64", lod_level=1 + ) + var = paddle.cast(x=data, dtype='float32') + pool = paddle.static.nn.sequence_lod.sequence_pool( + input=var, pool_type='AVERAGE' + ) + + slots_vars.append(data) + poolings.append(pool) + + concated = paddle.concat(poolings, axis=1) + fc = paddle.static.nn.fc(x=concated, activation='tanh', size=32) + return slots_vars, fc + + def get_dataset(self, inputs, files): + """ + Test Dataset With Fetch Handler. TestCases. + + Args: + inputs(list): inputs of get_dataset + files(list): files of get_dataset + """ + dataset = paddle.distributed.QueueDataset() + dataset.init( + batch_size=32, thread_num=2, pipe_command="cat", use_var=inputs + ) + dataset.set_filelist(files) + return dataset + + def setUp(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + self.temp_dir = tempfile.TemporaryDirectory() + self.filename1 = os.path.join( + self.temp_dir.name, "test_queue_dataset_run_a.txt" + ) + self.filename2 = os.path.join( + self.temp_dir.name, "test_queue_dataset_run_b.txt" + ) + + with open(self.filename1, "w") as f: + data = "1 1 2 3 3 4 5 5 5 5 1 1\n" + data += "1 2 2 3 4 4 6 6 6 6 1 2\n" + data += "1 3 2 3 5 4 7 7 7 7 1 3\n" + f.write(data) + with open(self.filename2, "w") as f: + data = "1 4 2 3 3 4 5 5 5 5 1 4\n" + data += "1 5 2 3 4 4 6 6 6 6 1 5\n" + data += "1 6 2 3 5 4 7 7 7 7 1 6\n" + data += "1 7 2 3 6 4 8 8 8 8 1 7\n" + f.write(data) + + def tearDown(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + self.temp_dir.cleanup() + + def test_dataset_none(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + slots_vars, out = self.net() + files = [self.filename1, self.filename2] + dataset = self.get_dataset(slots_vars, files) + + exe = base.Executor(base.CPUPlace()) + exe.run(base.default_startup_program()) + + # test dataset->None + try: + exe.train_from_dataset(base.default_main_program(), None) + except ImportError as e: + print("warning: we skip trainer_desc_pb2 import problem in windows") + except RuntimeError as e: + error_msg = "dataset is need and should be initialized" + self.assertEqual(error_msg, str(e)) + except Exception as e: + self.assertTrue(False) + + def test_infer_from_dataset(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + slots_vars, out = self.net() + files = [self.filename1, self.filename2] + dataset = self.get_dataset(slots_vars, files) + + exe = base.Executor(base.CPUPlace()) + exe.run(base.default_startup_program()) + + try: + exe.infer_from_dataset(base.default_main_program(), dataset) + except ImportError as e: + print("warning: we skip trainer_desc_pb2 import problem in windows") + except Exception as e: + self.assertTrue(False) + + def test_fetch_handler(self): + """ + Test Dataset With Fetch Handler. TestCases. + """ + slots_vars, out = self.net() + files = [self.filename1, self.filename2] + dataset = self.get_dataset(slots_vars, files) + + exe = base.Executor(base.CPUPlace()) + exe.run(base.default_startup_program()) + + fh = base.executor.FetchHandler(out.name) + fh.help() + + try: + exe.train_from_dataset( + program=base.default_main_program(), + dataset=dataset, + fetch_handler=fh, + ) + except ImportError as e: + print("warning: we skip trainer_desc_pb2 import problem in windows") + except RuntimeError as e: + error_msg = "dataset is need and should be initialized" + self.assertEqual(error_msg, str(e)) + except Exception as e: + self.assertTrue(False) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_decoupled_py_reader.py b/test/deprecated/legacy_test/test_decoupled_py_reader_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_decoupled_py_reader.py rename to test/deprecated/legacy_test/test_decoupled_py_reader_deprecated.py index a28b2584a5ff6..f9c75dc7dfd59 100644 --- a/test/deprecated/legacy_test/test_decoupled_py_reader.py +++ b/test/deprecated/legacy_test/test_decoupled_py_reader_deprecated.py @@ -20,6 +20,8 @@ import paddle from paddle import base +paddle.enable_static() + EPOCH_NUM = 5 BATCH_SIZE = 16 BATCH_NUM = 10 diff --git a/test/deprecated/legacy_test/test_deform_conv2d.py b/test/deprecated/legacy_test/test_deform_conv2d_deprecated.py similarity index 57% rename from test/deprecated/legacy_test/test_deform_conv2d.py rename to test/deprecated/legacy_test/test_deform_conv2d_deprecated.py index 23208363b5ff9..d8ad41359adfd 100644 --- a/test/deprecated/legacy_test/test_deform_conv2d.py +++ b/test/deprecated/legacy_test/test_deform_conv2d_deprecated.py @@ -19,213 +19,6 @@ import paddle import paddle.nn.initializer as I -from paddle.pir_utils import test_with_pir_api - - -class TestDeformConv2D(TestCase): - batch_size = 4 - spatial_shape = (5, 5) - dtype = "float32" - - def setUp(self): - self.in_channels = 2 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [0, 0] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = True - - def prepare(self): - np.random.seed(1) - paddle.seed(1) - if isinstance(self.kernel_size, int): - filter_shape = (self.kernel_size,) * 2 - else: - filter_shape = tuple(self.kernel_size) - self.filter_shape = filter_shape - - self.weight = np.random.uniform( - -1, - 1, - (self.out_channels, self.in_channels // self.groups) + filter_shape, - ).astype(self.dtype) - if not self.no_bias: - self.bias = np.random.uniform(-1, 1, (self.out_channels,)).astype( - self.dtype - ) - - def out_size( - in_size, pad_size, dilation_size, kernel_size, stride_size - ): - return ( - in_size + 2 * pad_size - (dilation_size * (kernel_size - 1) + 1) - ) / stride_size + 1 - - out_h = int( - out_size( - self.spatial_shape[0], - self.padding[0], - self.dilation[0], - self.kernel_size[0], - self.stride[0], - ) - ) - out_w = int( - out_size( - self.spatial_shape[1], - self.padding[1], - self.dilation[1], - self.kernel_size[1], - self.stride[1], - ) - ) - out_shape = (out_h, out_w) - - self.input_shape = ( - self.batch_size, - self.in_channels, - ) + self.spatial_shape - - self.offset_shape = ( - self.batch_size, - self.deformable_groups * 2 * filter_shape[0] * filter_shape[1], - ) + out_shape - - self.mask_shape = ( - self.batch_size, - self.deformable_groups * filter_shape[0] * filter_shape[1], - ) + out_shape - - self.input = np.random.uniform(-1, 1, self.input_shape).astype( - self.dtype - ) - - self.offset = np.random.uniform(-1, 1, self.offset_shape).astype( - self.dtype - ) - - self.mask = np.random.uniform(-1, 1, self.mask_shape).astype(self.dtype) - - def static_graph_case_dcn(self): - main = paddle.static.Program() - start = paddle.static.Program() - paddle.enable_static() - with paddle.static.program_guard(main, start): - x = paddle.static.data( - "input", (-1, self.in_channels, -1, -1), dtype=self.dtype - ) - offset = paddle.static.data( - "offset", - ( - -1, - self.deformable_groups - * 2 - * self.filter_shape[0] - * self.filter_shape[1], - -1, - -1, - ), - dtype=self.dtype, - ) - mask = paddle.static.data( - "mask", - ( - -1, - self.deformable_groups - * self.filter_shape[0] - * self.filter_shape[1], - -1, - -1, - ), - dtype=self.dtype, - ) - - y_v1 = paddle.vision.ops.DeformConv2D( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=self.filter_shape, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - deformable_groups=self.deformable_groups, - weight_attr=I.Assign(self.weight), - bias_attr=False if self.no_bias else I.Assign(self.bias), - )(x, offset, None) - - y_v2 = paddle.vision.ops.DeformConv2D( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=self.filter_shape, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - deformable_groups=self.deformable_groups, - weight_attr=I.Assign(self.weight), - bias_attr=False if self.no_bias else I.Assign(self.bias), - )(x, offset, mask) - - exe = paddle.static.Executor(self.place) - exe.run(start) - out_v1, out_v2 = exe.run( - main, - feed={ - "input": self.input, - "offset": self.offset, - "mask": self.mask, - }, - fetch_list=[y_v1, y_v2], - ) - return out_v1, out_v2 - - def dygraph_case_dcn(self): - paddle.disable_static() - x = paddle.to_tensor(self.input) - offset = paddle.to_tensor(self.offset) - mask = paddle.to_tensor(self.mask) - - bias = None if self.no_bias else paddle.to_tensor(self.bias) - - deform_conv2d = paddle.vision.ops.DeformConv2D( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=self.kernel_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - deformable_groups=self.deformable_groups, - groups=self.groups, - weight_attr=I.Assign(self.weight), - bias_attr=False if self.no_bias else I.Assign(self.bias), - ) - - y_v1 = deform_conv2d(x, offset) - y_v2 = deform_conv2d(x, offset, mask) - - out_v1 = y_v1.numpy() - out_v2 = y_v2.numpy() - - return out_v1, out_v2 - - @test_with_pir_api - def _test_identity(self): - self.prepare() - static_dcn_v1, static_dcn_v2 = self.static_graph_case_dcn() - dy_dcn_v1, dy_dcn_v2 = self.dygraph_case_dcn() - np.testing.assert_array_almost_equal(static_dcn_v1, dy_dcn_v1) - np.testing.assert_array_almost_equal(static_dcn_v2, dy_dcn_v2) - - def test_identity(self): - self.place = paddle.CPUPlace() - self._test_identity() - - if paddle.is_compiled_with_cuda(): - self.place = paddle.CUDAPlace(0) - self._test_identity() class TestDeformConv2DFunctional(TestCase): @@ -536,98 +329,6 @@ def test_identity(self): self._test_identity() -# testcases for DeformConv2D -class TestDeformConv2DWithPadding(TestDeformConv2D): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [2, 2] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = True - - -class TestDeformConv2DWithBias(TestDeformConv2D): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [2, 2] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = False - - -class TestDeformConv2DWithAsynPadding(TestDeformConv2D): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [1, 2] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = False - - -class TestDeformConv2DWithDilation(TestDeformConv2D): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [1, 1] - self.stride = [1, 1] - self.dilation = [3, 3] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = False - - -class TestDeformConv2DWithStride(TestDeformConv2D): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [1, 1] - self.stride = [2, 2] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 1 - self.no_bias = False - - -class TestDeformConv2DWithDeformable_Groups(TestDeformConv2D): - def setUp(self): - self.in_channels = 5 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [1, 1] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 5 - self.groups = 1 - self.no_bias = False - - -class TestDeformConv2DWithGroups(TestDeformConv2D): - def setUp(self): - self.in_channels = 5 - self.out_channels = 5 - self.kernel_size = [3, 3] - self.padding = [1, 1] - self.stride = [1, 1] - self.dilation = [1, 1] - self.deformable_groups = 1 - self.groups = 5 - self.no_bias = False - - # testcases for deform_conv2d class TestDeformConv2DFunctionalWithPadding(TestDeformConv2DFunctional): def setUp(self): @@ -722,27 +423,5 @@ def setUp(self): self.no_bias = False -class TestDeformConv2DError(unittest.TestCase): - @test_with_pir_api - def test_input_error(self): - def test_input_rank_error(): - paddle.enable_static() - x = paddle.static.data(name='error_x_1', shape=[0], dtype='float32') - offset = paddle.static.data( - name='error_offset_1', shape=[0], dtype='float32' - ) - mask = paddle.static.data( - name='error_mask_1', shape=[0, 0, 0], dtype='float32' - ) - out = paddle.vision.ops.DeformConv2D( - in_channels=0, - out_channels=0, - kernel_size=0, - deformable_groups=0, - )(x, offset, mask) - - self.assertRaises(AssertionError, test_input_rank_error) - - if __name__ == "__main__": unittest.main() diff --git a/test/deprecated/legacy_test/test_deformable_conv_op_deprecated.py b/test/deprecated/legacy_test/test_deformable_conv_op_deprecated.py new file mode 100644 index 0000000000000..04bbc51d48fda --- /dev/null +++ b/test/deprecated/legacy_test/test_deformable_conv_op_deprecated.py @@ -0,0 +1,178 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from itertools import product + +import numpy as np + +import paddle + +paddle.enable_static() + + +def dmc_bilinear(data_im, height, width, h, w): + h_low = int(np.floor(h)) + w_low = int(np.floor(w)) + h_high = h_low + 1 + w_high = w_low + 1 + + lh = h - h_low + lw = w - w_low + hh = 1 - lh + hw = 1 - lw + + v1 = 0 + if h_low >= 0 and w_low >= 0: + v1 = data_im[h_low, w_low] + v2 = 0 + if h_low >= 0 and w_high <= width - 1: + v2 = data_im[h_low, w_high] + v3 = 0 + if h_high <= height - 1 and w_low >= 0: + v3 = data_im[h_high, w_low] + v4 = 0 + if h_high <= height - 1 and w_high <= width - 1: + v4 = data_im[h_high, w_high] + + w1, w2, w3, w4 = hh * hw, hh * lw, lh * hw, lh * lw + val = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4 + + return val + + +def dconv_im2col_gemm(input, offset, mask, filter, group, conv_param): + in_n, in_c, in_h, in_w = input.shape + out_c, f_c, f_h, f_w = filter.shape + + assert offset.shape == (in_n, 2 * f_h * f_w, in_h, in_w) + assert mask.shape == (in_n, f_h * f_w, in_h, in_w) + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + + stride, pad, dilation = ( + conv_param['stride'], + conv_param['pad'], + conv_param['dilation'], + ) + out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) // stride[0] + out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) // stride[1] + assert out_h == in_h + assert out_w == in_w + + col_buffer = np.zeros((in_n, in_c * f_h * f_w, in_h * in_w)) + for n, c, h, w, kh, kw in product( + range(in_n), + range(in_c), + range(out_h), + range(out_w), + range(f_h), + range(f_w), + ): + offset_h_table = offset[n, ::2, h, w].reshape(f_h, f_w) + offset_w_table = offset[n, 1::2, h, w].reshape(f_h, f_w) + mask_table = mask[n, :, h, w].reshape(f_h, f_w) + offset_h = offset_h_table[kh, kw] + offset_w = offset_w_table[kh, kw] + val = 0 + im_h = h * stride[0] + kh * dilation[0] + offset_h - pad[0] + im_w = w * stride[0] + kw * dilation[0] + offset_w - pad[1] + if im_h > -1 and im_w > -1 and im_h < in_h and im_w < in_h: + val = dmc_bilinear(input[n, c], in_h, in_w, im_h, im_w) + val_out = val * mask_table[kh, kw] + col_buffer[n, c * f_h * f_w + kh * f_w + kw, h * in_w + w] = val_out + + out = np.zeros((in_n, group, int(out_c // group), out_h * out_w)) + weight = filter.reshape(group, int(out_c // group), f_c * f_h * f_w) + col_buffer = col_buffer.reshape( + (in_n, group, int(in_c // group * f_h * f_w), in_h * in_w) + ) + for n in range(in_n): + for g in range(group): + out[n, g] = np.matmul(weight[g], col_buffer[n, g]) + out = out.reshape(in_n, out_c, out_h, out_w) + return out + + +class TestModulatedDeformableConvInvalidInput(unittest.TestCase): + def test_error(self): + def test_invalid_input(): + paddle.enable_static() + input = [1, 3, 32, 32] + offset = paddle.static.data( + name='offset', shape=[None, 3, 32, 32], dtype='float32' + ) + mask = paddle.static.data( + name='mask', shape=[None, 3, 32, 32], dtype='float32' + ) + loss = paddle.static.nn.common.deformable_conv( + input, offset, mask, num_filters=4, filter_size=1 + ) + + self.assertRaises(TypeError, test_invalid_input) + + def test_invalid_offset(): + paddle.enable_static() + input = paddle.static.data( + name='input', shape=[None, 3, 32, 32], dtype='int32' + ) + offset = paddle.static.data( + name='offset', shape=[None, 3, 32, 32], dtype='float32' + ) + mask = paddle.static.data( + name='mask', shape=[None, 3, 32, 32], dtype='float32' + ) + loss = paddle.static.nn.common.deformable_conv( + input, offset, mask, num_filters=4, filter_size=1 + ) + + self.assertRaises(TypeError, test_invalid_offset) + + def test_invalid_filter(): + paddle.enable_static() + input = paddle.static.data( + name='input_filter', shape=[None, 3, 32, 32], dtype='float32' + ) + offset = paddle.static.data( + name='offset_filter', shape=[None, 3, 32, 32], dtype='float32' + ) + mask = paddle.static.data( + name='mask_filter', shape=[None, 3, 32, 32], dtype='float32' + ) + loss = paddle.static.nn.common.deformable_conv( + input, offset, mask, num_filters=4, filter_size=0 + ) + + self.assertRaises(ValueError, test_invalid_filter) + + def test_invalid_groups(): + paddle.enable_static() + input = paddle.static.data( + name='input_groups', shape=[1, 1, 1, 1], dtype='float32' + ) + offset = paddle.static.data( + name='offset_groups', shape=[1, 1], dtype='float32' + ) + mask = paddle.static.data( + name='mask_groups', shape=[1], dtype='float32' + ) + paddle.static.nn.deform_conv2d( + input, offset, mask, 1, 1, padding=1, groups=0 + ) + + self.assertRaises(ValueError, test_invalid_groups) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_desc_clone.py b/test/deprecated/legacy_test/test_desc_clone_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_desc_clone.py rename to test/deprecated/legacy_test/test_desc_clone_deprecated.py diff --git a/test/deprecated/legacy_test/test_device_guard.py b/test/deprecated/legacy_test/test_device_guard_deprecated.py similarity index 75% rename from test/deprecated/legacy_test/test_device_guard.py rename to test/deprecated/legacy_test/test_device_guard_deprecated.py index 502cde95f4007..ddc2e1a1e0958 100644 --- a/test/deprecated/legacy_test/test_device_guard.py +++ b/test/deprecated/legacy_test/test_device_guard_deprecated.py @@ -93,57 +93,6 @@ def test_device_guard_with_id(self): execute(main_program, startup_program) - def test_cpu_only_op(self): - main_program = paddle.static.Program() - startup_program = paddle.static.Program() - with paddle.static.program_guard(main_program, startup_program): - x = paddle.full( - shape=[2, 255, 13, 13], fill_value=0.3, dtype='float32' - ) - gt_box = paddle.full( - shape=[2, 6, 4], fill_value=0.5, dtype='float32' - ) - gt_label = paddle.full(shape=[2, 6], fill_value=1.0, dtype='int32') - gt_score = paddle.full( - shape=[2, 6], fill_value=0.5, dtype='float32' - ) - anchors = [ - 10, - 13, - 16, - 30, - 33, - 23, - 30, - 61, - 62, - 45, - 59, - 119, - 116, - 90, - 156, - 198, - 373, - 326, - ] - anchor_mask = [0, 1, 2] - with paddle.static.device_guard("gpu"): - # yolo_loss only has cpu kernel, so its cpu kernel will be executed - loss = paddle.vision.ops.yolo_loss( - x=x, - gt_box=gt_box, - gt_label=gt_label, - gt_score=gt_score, - anchors=anchors, - anchor_mask=anchor_mask, - class_num=80, - ignore_thresh=0.7, - downsample_ratio=32, - ) - - execute(main_program, startup_program) - @test_with_pir_api def test_without_kernel_op(self): main_program = paddle.static.Program() @@ -174,18 +123,6 @@ def test_without_kernel_op(self): execute(main_program, startup_program) - def test_error(self): - def device_attr(): - with paddle.static.device_guard("cpu1"): - out = paddle.full(shape=[1], fill_value=0.2, dtype='float32') - - def device_attr2(): - with paddle.static.device_guard("cpu:1"): - out = paddle.full(shape=[1], fill_value=0.2, dtype='float32') - - self.assertRaises(ValueError, device_attr) - self.assertRaises(ValueError, device_attr2) - # check if op_descs have op_device attr def test_op_descs_device_attr(self): main_program = paddle.static.Program() diff --git a/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py b/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py rename to test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto.py b/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto.py rename to test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py b/test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py rename to test/deprecated/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_decay.py b/test/deprecated/legacy_test/test_dist_fleet_decay_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_decay.py rename to test/deprecated/legacy_test/test_dist_fleet_decay_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_heter_program.py b/test/deprecated/legacy_test/test_dist_fleet_heter_program_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_heter_program.py rename to test/deprecated/legacy_test/test_dist_fleet_heter_program_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps10.py b/test/deprecated/legacy_test/test_dist_fleet_ps10_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps10.py rename to test/deprecated/legacy_test/test_dist_fleet_ps10_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps13.py b/test/deprecated/legacy_test/test_dist_fleet_ps13_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps13.py rename to test/deprecated/legacy_test/test_dist_fleet_ps13_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps2.py b/test/deprecated/legacy_test/test_dist_fleet_ps2_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps2.py rename to test/deprecated/legacy_test/test_dist_fleet_ps2_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps3.py b/test/deprecated/legacy_test/test_dist_fleet_ps3_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps3.py rename to test/deprecated/legacy_test/test_dist_fleet_ps3_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps4.py b/test/deprecated/legacy_test/test_dist_fleet_ps4_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps4.py rename to test/deprecated/legacy_test/test_dist_fleet_ps4_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps5.py b/test/deprecated/legacy_test/test_dist_fleet_ps5_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps5.py rename to test/deprecated/legacy_test/test_dist_fleet_ps5_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps6.py b/test/deprecated/legacy_test/test_dist_fleet_ps6_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps6.py rename to test/deprecated/legacy_test/test_dist_fleet_ps6_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps7.py b/test/deprecated/legacy_test/test_dist_fleet_ps7_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps7.py rename to test/deprecated/legacy_test/test_dist_fleet_ps7_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps8.py b/test/deprecated/legacy_test/test_dist_fleet_ps8_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps8.py rename to test/deprecated/legacy_test/test_dist_fleet_ps8_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps9.py b/test/deprecated/legacy_test/test_dist_fleet_ps9_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps9.py rename to test/deprecated/legacy_test/test_dist_fleet_ps9_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_fleet_ps.py b/test/deprecated/legacy_test/test_dist_fleet_ps_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_fleet_ps.py rename to test/deprecated/legacy_test/test_dist_fleet_ps_deprecated.py diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adagrad.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adagrad.py index 35dd48accd42a..518819cb15591 100644 --- a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adagrad.py +++ b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adagrad.py @@ -14,7 +14,7 @@ import unittest -from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram +from test_dist_sparse_tensor_load_sgd_deprecated import TestSparseLoadProgram import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adam.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adam.py index b5eae0e39807e..88d28dc8cad52 100644 --- a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adam.py +++ b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_adam.py @@ -14,7 +14,7 @@ import unittest -from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram +from test_dist_sparse_tensor_load_sgd_deprecated import TestSparseLoadProgram import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_ftrl.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_ftrl.py index 6a1f0175b1619..a1cde59a3d7e0 100644 --- a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_ftrl.py +++ b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_ftrl.py @@ -14,7 +14,7 @@ import unittest -from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram +from test_dist_sparse_tensor_load_sgd_deprecated import TestSparseLoadProgram import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_momentum.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_momentum.py index b7b590cbb3224..35f8ad09d8f3f 100644 --- a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_momentum.py +++ b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_momentum.py @@ -14,7 +14,7 @@ import unittest -from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram +from test_dist_sparse_tensor_load_sgd_deprecated import TestSparseLoadProgram import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_rmsprop.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_rmsprop.py index 9ce8e211f1e67..0150ae8027a35 100644 --- a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_rmsprop.py +++ b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_rmsprop.py @@ -14,7 +14,7 @@ import unittest -from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram +from test_dist_sparse_tensor_load_sgd_deprecated import TestSparseLoadProgram import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_dist_sparse_tensor_load_sgd.py b/test/deprecated/legacy_test/test_dist_sparse_tensor_load_sgd_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_dist_sparse_tensor_load_sgd.py rename to test/deprecated/legacy_test/test_dist_sparse_tensor_load_sgd_deprecated.py diff --git a/test/deprecated/legacy_test/test_eager_deletion_delete_vars.py b/test/deprecated/legacy_test/test_eager_deletion_delete_vars_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_eager_deletion_delete_vars.py rename to test/deprecated/legacy_test/test_eager_deletion_delete_vars_deprecated.py diff --git a/test/deprecated/legacy_test/test_ema.py b/test/deprecated/legacy_test/test_ema_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_ema.py rename to test/deprecated/legacy_test/test_ema_deprecated.py index acfd4479fe096..9dbe53ba17176 100644 --- a/test/deprecated/legacy_test/test_ema.py +++ b/test/deprecated/legacy_test/test_ema_deprecated.py @@ -19,6 +19,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestExponentialMovingAverage(unittest.TestCase): def setUp(self): diff --git a/test/deprecated/legacy_test/test_ema_fleet.py b/test/deprecated/legacy_test/test_ema_fleet_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_ema_fleet.py rename to test/deprecated/legacy_test/test_ema_fleet_deprecated.py index e5ff36545f818..0d19e5c7dabb0 100644 --- a/test/deprecated/legacy_test/test_ema_fleet.py +++ b/test/deprecated/legacy_test/test_ema_fleet_deprecated.py @@ -19,6 +19,8 @@ import paddle from paddle import static, utils +paddle.enable_static() + def gen_data(): return np.random.random(size=(10, 5)).astype('float32') diff --git a/test/deprecated/legacy_test/test_embedding_id_stop_gradient.py b/test/deprecated/legacy_test/test_embedding_id_stop_gradient_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_embedding_id_stop_gradient.py rename to test/deprecated/legacy_test/test_embedding_id_stop_gradient_deprecated.py index e39d9c4674c67..71eb6c67def0f 100644 --- a/test/deprecated/legacy_test/test_embedding_id_stop_gradient.py +++ b/test/deprecated/legacy_test/test_embedding_id_stop_gradient_deprecated.py @@ -19,6 +19,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestEmbeddingIdStopGradientBase(unittest.TestCase): def setUp(self): diff --git a/test/deprecated/legacy_test/test_entry_attr2.py b/test/deprecated/legacy_test/test_entry_attr2_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_entry_attr2.py rename to test/deprecated/legacy_test/test_entry_attr2_deprecated.py diff --git a/test/deprecated/legacy_test/test_entry_attr.py b/test/deprecated/legacy_test/test_entry_attr_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_entry_attr.py rename to test/deprecated/legacy_test/test_entry_attr_deprecated.py diff --git a/test/deprecated/legacy_test/test_error_clip.py b/test/deprecated/legacy_test/test_error_clip_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_error_clip.py rename to test/deprecated/legacy_test/test_error_clip_deprecated.py diff --git a/test/deprecated/legacy_test/test_executor_and_use_program_cache_deprecated.py b/test/deprecated/legacy_test/test_executor_and_use_program_cache_deprecated.py new file mode 100644 index 0000000000000..818bb1e48c3a6 --- /dev/null +++ b/test/deprecated/legacy_test/test_executor_and_use_program_cache_deprecated.py @@ -0,0 +1,90 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +import numpy as np + +sys.path.append("../../legacy_test") + +import paddle +from paddle import base + +paddle.enable_static() + + +class TestExecutor(unittest.TestCase): + def test_mul(self): + main_program = base.Program() + startup_program = base.Program() + with base.program_guard(main_program, startup_program): + a = paddle.static.data(name='a', shape=[-1, 784], dtype='float32') + b = paddle.static.data(name='b', shape=[784, 100], dtype='float32') + a.desc.set_need_check_feed(False) + b.desc.set_need_check_feed(False) + output = paddle.matmul(x=a, y=b) + + # Compute with numpy + a_np = np.random.random((100, 784)).astype('float32') + b_np = np.random.random((784, 100)).astype('float32') + out_np = np.dot(a_np, b_np) + + place = paddle.CPUPlace() + exe = base.Executor(place) + + def _train(use_program_cache, max_iters=1): + import time + + run_time = 0.0 + for i in range(max_iters): + begin = time.time() + outs = exe.run( + program=main_program, + feed={'a': a_np, 'b': b_np}, + fetch_list=[output], + use_program_cache=use_program_cache, + ) + end = time.time() + run_time += end - begin + out = outs[0] + self.assertEqual((100, 100), out.shape) + np.testing.assert_allclose(out, out_np, rtol=1e-05) + return run_time + + max_iters = 3 + run_time_with_cache = _train( + use_program_cache=True, max_iters=max_iters + ) + print("run time with program cache: %f" % run_time_with_cache) + + run_time_without_cache = _train( + use_program_cache=False, max_iters=max_iters + ) + print("run time without program cache: %f" % run_time_without_cache) + + run_time_with_cache = _train( + use_program_cache=True, max_iters=max_iters + ) + print("run time with program cache: %f" % run_time_with_cache) + + run_time_with_cache = _train( + use_program_cache=True, max_iters=max_iters + ) + print("run time with program cache: %f" % run_time_with_cache) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_executor_check_feed.py b/test/deprecated/legacy_test/test_executor_check_feed_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_executor_check_feed.py rename to test/deprecated/legacy_test/test_executor_check_feed_deprecated.py index 78fb383885ec4..59be3ede8229f 100644 --- a/test/deprecated/legacy_test/test_executor_check_feed.py +++ b/test/deprecated/legacy_test/test_executor_check_feed_deprecated.py @@ -17,6 +17,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestExecutor(unittest.TestCase): def net(self): diff --git a/test/deprecated/legacy_test/test_executor_feed_non_tensor.py b/test/deprecated/legacy_test/test_executor_feed_non_tensor.py index b472ccdc9158c..ff3ff65e9652c 100644 --- a/test/deprecated/legacy_test/test_executor_feed_non_tensor.py +++ b/test/deprecated/legacy_test/test_executor_feed_non_tensor.py @@ -14,130 +14,10 @@ import unittest -import numpy - import paddle from paddle import base -class TestExecutor(unittest.TestCase): - def net(self): - lr = 0.0 - x = paddle.static.data(name="x", shape=[None, 1], dtype='float32') - y = paddle.static.data(name="y", shape=[None, 1], dtype='float32') - y_predict = paddle.static.nn.fc(x, size=1) - - cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = paddle.mean(cost) - - opt = paddle.optimizer.Adam(learning_rate=lr) - opt.minimize(avg_cost) - - return paddle.to_tensor(lr), avg_cost - - def test_program_feed_float(self): - main_program = base.Program() - startup_program = base.Program() - scope = base.Scope() - with base.program_guard(main_program, startup_program): - with base.scope_guard(scope): - cpu = base.CPUPlace() - exe = base.Executor(cpu) - lr, cost = self.net() - exe.run(startup_program) - train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( - 'float32' - ) - y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( - 'float32' - ) - a = 0.01 - _lr, _ = exe.run( - feed={'x': train_data, 'y': y_true, 'lr': a}, - fetch_list=[lr, cost], - return_numpy=False, - ) - self.assertEqual(_lr._dtype(), lr.dtype) - self.assertEqual(_lr._dtype(), paddle.float32) - self.assertEqual(type(a), float) - - def test_program_feed_int(self): - main_program = base.Program() - startup_program = base.Program() - scope = base.Scope() - with base.program_guard(main_program, startup_program): - with base.scope_guard(scope): - cpu = base.CPUPlace() - exe = base.Executor(cpu) - lr, cost = self.net() - exe.run(startup_program) - train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( - 'float32' - ) - y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( - 'float32' - ) - a = 0 - _lr, _ = exe.run( - feed={'x': train_data, 'y': y_true, 'lr': a}, - fetch_list=[lr, cost], - return_numpy=False, - ) - self.assertEqual(_lr._dtype(), lr.dtype) - self.assertEqual(_lr._dtype(), paddle.float32) - self.assertEqual(type(a), int) - - def test_program_feed_list(self): - main_program = base.Program() - startup_program = base.Program() - scope = base.Scope() - with base.program_guard(main_program, startup_program): - with base.scope_guard(scope): - cpu = base.CPUPlace() - exe = base.Executor(cpu) - lr, cost = self.net() - exe.run(startup_program) - train_data = [[1.0], [2.0], [3.0], [4.0]] - y_true = [[2.0], [4.0], [6.0], [8.0]] - a = 0 - _lr, _ = exe.run( - feed={'x': train_data, 'y': y_true, 'lr': a}, - fetch_list=[lr, cost], - return_numpy=False, - ) - self.assertEqual(_lr._dtype(), lr.dtype) - self.assertEqual(_lr._dtype(), paddle.float32) - self.assertEqual(type(y_true), list) - - def test_compiled_program_feed_scalar(self): - main_program = base.Program() - startup_program = base.Program() - scope = base.Scope() - with base.program_guard(main_program, startup_program): - with base.scope_guard(scope): - lr, cost = self.net() - cpu = base.CPUPlace() - exe = base.Executor(cpu) - exe.run(startup_program) - compiled_prog = base.CompiledProgram(main_program) - train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( - 'float32' - ) - y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( - 'float32' - ) - a = 0.01 - _lr, _ = exe.run( - compiled_prog, - feed={'x': train_data, 'y': y_true, 'lr': a}, - fetch_list=[lr, cost], - return_numpy=False, - ) - self.assertEqual(_lr._dtype(), lr.dtype) - self.assertEqual(_lr._dtype(), paddle.float32) - self.assertEqual(type(a), float) - - class TestAsLodTensor(unittest.TestCase): def test_as_lodtensor_int32(self): cpu = base.CPUPlace() diff --git a/test/deprecated/legacy_test/test_executor_feed_non_tensor_deprecated.py b/test/deprecated/legacy_test/test_executor_feed_non_tensor_deprecated.py new file mode 100644 index 0000000000000..cbcdffd11fa2e --- /dev/null +++ b/test/deprecated/legacy_test/test_executor_feed_non_tensor_deprecated.py @@ -0,0 +1,144 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy + +import paddle +from paddle import base + +paddle.enable_static() + + +class TestExecutor(unittest.TestCase): + def net(self): + lr = 0.0 + x = paddle.static.data(name="x", shape=[None, 1], dtype='float32') + y = paddle.static.data(name="y", shape=[None, 1], dtype='float32') + y_predict = paddle.static.nn.fc(x, size=1) + + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) + avg_cost = paddle.mean(cost) + + opt = paddle.optimizer.Adam(learning_rate=lr) + opt.minimize(avg_cost) + + return paddle.to_tensor(lr), avg_cost + + def test_program_feed_float(self): + main_program = base.Program() + startup_program = base.Program() + scope = base.Scope() + with base.program_guard(main_program, startup_program): + with base.scope_guard(scope): + cpu = base.CPUPlace() + exe = base.Executor(cpu) + lr, cost = self.net() + exe.run(startup_program) + train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( + 'float32' + ) + y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( + 'float32' + ) + a = 0.01 + _lr, _ = exe.run( + feed={'x': train_data, 'y': y_true, 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + ) + self.assertEqual(_lr._dtype(), lr.dtype) + self.assertEqual(_lr._dtype(), paddle.float32) + self.assertEqual(type(a), float) + + def test_program_feed_int(self): + main_program = base.Program() + startup_program = base.Program() + scope = base.Scope() + with base.program_guard(main_program, startup_program): + with base.scope_guard(scope): + cpu = base.CPUPlace() + exe = base.Executor(cpu) + lr, cost = self.net() + exe.run(startup_program) + train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( + 'float32' + ) + y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( + 'float32' + ) + a = 0 + _lr, _ = exe.run( + feed={'x': train_data, 'y': y_true, 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + ) + self.assertEqual(_lr._dtype(), lr.dtype) + self.assertEqual(_lr._dtype(), paddle.float32) + self.assertEqual(type(a), int) + + def test_program_feed_list(self): + main_program = base.Program() + startup_program = base.Program() + scope = base.Scope() + with base.program_guard(main_program, startup_program): + with base.scope_guard(scope): + cpu = base.CPUPlace() + exe = base.Executor(cpu) + lr, cost = self.net() + exe.run(startup_program) + train_data = [[1.0], [2.0], [3.0], [4.0]] + y_true = [[2.0], [4.0], [6.0], [8.0]] + a = 0 + _lr, _ = exe.run( + feed={'x': train_data, 'y': y_true, 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + ) + self.assertEqual(_lr._dtype(), lr.dtype) + self.assertEqual(_lr._dtype(), paddle.float32) + self.assertEqual(type(y_true), list) + + def test_compiled_program_feed_scalar(self): + main_program = base.Program() + startup_program = base.Program() + scope = base.Scope() + with base.program_guard(main_program, startup_program): + with base.scope_guard(scope): + lr, cost = self.net() + cpu = base.CPUPlace() + exe = base.Executor(cpu) + exe.run(startup_program) + compiled_prog = base.CompiledProgram(main_program) + train_data = numpy.array([[1.0], [2.0], [3.0], [4.0]]).astype( + 'float32' + ) + y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype( + 'float32' + ) + a = 0.01 + _lr, _ = exe.run( + compiled_prog, + feed={'x': train_data, 'y': y_true, 'lr': a}, + fetch_list=[lr, cost], + return_numpy=False, + ) + self.assertEqual(_lr._dtype(), lr.dtype) + self.assertEqual(_lr._dtype(), paddle.float32) + self.assertEqual(type(a), float) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_eye_op.py b/test/deprecated/legacy_test/test_eye_op.py index 41a4e6aea2f9d..cafbfbd96beb0 100644 --- a/test/deprecated/legacy_test/test_eye_op.py +++ b/test/deprecated/legacy_test/test_eye_op.py @@ -17,7 +17,7 @@ import numpy as np from op_test import OpTest -from test_attribute_var import UnittestBase +from test_attribute_var_deprecated import UnittestBase import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_fc_op.py b/test/deprecated/legacy_test/test_fc_op_deprecated.py similarity index 66% rename from test/deprecated/legacy_test/test_fc_op.py rename to test/deprecated/legacy_test/test_fc_op_deprecated.py index d3d2008c17e15..961fb6e006bad 100644 --- a/test/deprecated/legacy_test/test_fc_op.py +++ b/test/deprecated/legacy_test/test_fc_op_deprecated.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, paddle_static_guard +from op_test import paddle_static_guard import paddle from paddle import base @@ -54,88 +54,6 @@ def __init__(self, mb, ic, oc, h, w, bias_dims=2): self.bias = np.random.random(oc).astype("float32") -class TestFCOp(OpTest): - def config(self): - self.with_bias = True - self.with_relu = True - self.matrix = MatrixGenerate(1, 10, 15, 3, 3, 2) - - def setUp(self): - self.op_type = "fc" - self.config() - - if self.with_bias: - self.inputs = { - 'Input': self.matrix.input, - 'W': self.matrix.weights, - 'Bias': self.matrix.bias, - } - else: - self.inputs = {'Input': self.matrix.input, 'W': self.matrix.weights} - - if self.with_relu: - activation_type = "relu" - else: - activation_type = "" - self.attrs = {'use_mkldnn': False, 'activation_type': activation_type} - - self.outputs = { - 'Out': fc_refer(self.matrix, self.with_bias, self.with_relu) - } - - def test_check_output(self): - self.check_output(check_dygraph=False) - - -class TestFCOpNoBias1(TestFCOp): - def config(self): - self.with_bias = False - self.with_relu = False - self.matrix = MatrixGenerate(2, 8, 10, 1, 1, 2) - - -class TestFCOpNoBias2(TestFCOp): - def config(self): - self.with_bias = False - self.with_relu = False - self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) - - -class TestFCOpNoBias4(TestFCOp): - def config(self): - self.with_bias = False - self.with_relu = False - self.matrix = MatrixGenerate(1, 32, 64, 3, 3, 1) - - -class TestFCOpWithBias1(TestFCOp): - def config(self): - self.with_bias = True - self.with_relu = False - self.matrix = MatrixGenerate(3, 8, 10, 2, 1, 2) - - -class TestFCOpWithBias2(TestFCOp): - def config(self): - self.with_bias = True - self.with_relu = True - self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) - - -class TestFCOpWithBias3(TestFCOp): - def config(self): - self.with_bias = True - self.with_relu = True - self.matrix = MatrixGenerate(1, 64, 32, 3, 3, 1) - - -class TestFCOpWithPadding(TestFCOp): - def config(self): - self.with_bias = True - self.with_relu = True - self.matrix = MatrixGenerate(1, 4, 3, 128, 128, 2) - - class TestFcOp_NumFlattenDims_NegOne(unittest.TestCase): def test_api(self): def run_program(num_flatten_dims): diff --git a/test/deprecated/legacy_test/test_feed_data_check_shape_type.py b/test/deprecated/legacy_test/test_feed_data_check_shape_type_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_feed_data_check_shape_type.py rename to test/deprecated/legacy_test/test_feed_data_check_shape_type_deprecated.py index f89247860ba74..40858d81f65dd 100644 --- a/test/deprecated/legacy_test/test_feed_data_check_shape_type.py +++ b/test/deprecated/legacy_test/test_feed_data_check_shape_type_deprecated.py @@ -22,6 +22,8 @@ from paddle import base from paddle.base import core +paddle.enable_static() + os.environ['CPU_NUM'] = str(4) np.random.seed(123) diff --git a/test/deprecated/legacy_test/test_fleet_base.py b/test/deprecated/legacy_test/test_fleet_base.py index 2ffd8a747c72d..a8b3203b131c2 100644 --- a/test/deprecated/legacy_test/test_fleet_base.py +++ b/test/deprecated/legacy_test/test_fleet_base.py @@ -18,7 +18,6 @@ import numpy as np import paddle -from paddle import base from paddle.distributed import fleet from paddle.distributed.fleet.base import role_maker @@ -184,45 +183,5 @@ def test_dygraph_method(self): final_strategy = fleet._final_strategy() -class TestFleetBaseSingleError(unittest.TestCase): - def setUp(self): - os.environ.pop("PADDLE_TRAINER_ENDPOINTS") - - def gen_data(self): - return { - "x": np.random.random(size=(128, 32)).astype('float32'), - "y": np.random.randint(2, size=(128, 1)).astype('int64'), - } - - def test_single_run_collective_minimize(self): - def test_single_error(): - input_x = paddle.static.data( - name="x", shape=[-1, 32], dtype='float32' - ) - input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') - - fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh') - prediction = paddle.static.nn.fc( - x=fc_1, size=2, activation='softmax' - ) - cost = paddle.nn.functional.cross_entropy( - input=prediction, - label=input_y, - reduction='none', - use_softmax=False, - ) - avg_cost = paddle.mean(x=cost) - fleet.init(is_collective=True) - - # in non_distributed mode(use `python` to launch), raise error if has multi cards - if ( - base.core.is_compiled_with_cuda() - and base.core.get_cuda_device_count() > 1 - ): - self.assertRaises(ValueError, test_single_error) - else: - test_single_error() - - if __name__ == "__main__": unittest.main() diff --git a/test/deprecated/legacy_test/test_fleet_base_2.py b/test/deprecated/legacy_test/test_fleet_base_2_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_fleet_base_2.py rename to test/deprecated/legacy_test/test_fleet_base_2_deprecated.py diff --git a/test/deprecated/legacy_test/test_fleet_base_3.py b/test/deprecated/legacy_test/test_fleet_base_3_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_fleet_base_3.py rename to test/deprecated/legacy_test/test_fleet_base_3_deprecated.py diff --git a/test/deprecated/legacy_test/test_fleet_base_deprecated.py b/test/deprecated/legacy_test/test_fleet_base_deprecated.py new file mode 100644 index 0000000000000..496bae7442061 --- /dev/null +++ b/test/deprecated/legacy_test/test_fleet_base_deprecated.py @@ -0,0 +1,62 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.distributed import fleet + + +class TestFleetBaseSingleError(unittest.TestCase): + def gen_data(self): + return { + "x": np.random.random(size=(128, 32)).astype('float32'), + "y": np.random.randint(2, size=(128, 1)).astype('int64'), + } + + def test_single_run_collective_minimize(self): + def test_single_error(): + input_x = paddle.static.data( + name="x", shape=[-1, 32], dtype='float32' + ) + input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') + + fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh') + prediction = paddle.static.nn.fc( + x=fc_1, size=2, activation='softmax' + ) + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, + ) + avg_cost = paddle.mean(x=cost) + fleet.init(is_collective=True) + + # in non_distributed mode(use `python` to launch), raise error if has multi cards + if ( + base.core.is_compiled_with_cuda() + and base.core.get_cuda_device_count() > 1 + ): + self.assertRaises(ValueError, test_single_error) + else: + test_single_error() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_fleet.py b/test/deprecated/legacy_test/test_fleet_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_fleet.py rename to test/deprecated/legacy_test/test_fleet_deprecated.py index 0e9eb0579cc98..0c39931932649 100644 --- a/test/deprecated/legacy_test/test_fleet.py +++ b/test/deprecated/legacy_test/test_fleet_deprecated.py @@ -16,6 +16,10 @@ import os import unittest +import paddle + +paddle.enable_static() + class TestFleet1(unittest.TestCase): """ diff --git a/test/deprecated/legacy_test/test_fleet_nocvm_1.py b/test/deprecated/legacy_test/test_fleet_nocvm_1_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_fleet_nocvm_1.py rename to test/deprecated/legacy_test/test_fleet_nocvm_1_deprecated.py index d9962c1a27b38..331047d3cb325 100644 --- a/test/deprecated/legacy_test/test_fleet_nocvm_1.py +++ b/test/deprecated/legacy_test/test_fleet_nocvm_1_deprecated.py @@ -18,6 +18,8 @@ import paddle +paddle.enable_static() + class TestFleet1(unittest.TestCase): """ diff --git a/test/deprecated/legacy_test/test_fleet_unitaccessor.py b/test/deprecated/legacy_test/test_fleet_unitaccessor_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_fleet_unitaccessor.py rename to test/deprecated/legacy_test/test_fleet_unitaccessor_deprecated.py index f6e33ed1ee6b3..faf5487e4bf9a 100644 --- a/test/deprecated/legacy_test/test_fleet_unitaccessor.py +++ b/test/deprecated/legacy_test/test_fleet_unitaccessor_deprecated.py @@ -18,6 +18,8 @@ import paddle +paddle.enable_static() + class TestFleet1(unittest.TestCase): """ diff --git a/test/deprecated/legacy_test/test_functional_conv2d.py b/test/deprecated/legacy_test/test_functional_conv2d_deprecated.py similarity index 64% rename from test/deprecated/legacy_test/test_functional_conv2d.py rename to test/deprecated/legacy_test/test_functional_conv2d_deprecated.py index 0cc69fb0281a4..790cebd22423c 100644 --- a/test/deprecated/legacy_test/test_functional_conv2d.py +++ b/test/deprecated/legacy_test/test_functional_conv2d_deprecated.py @@ -22,6 +22,8 @@ import paddle.nn.functional as F from paddle import base +paddle.enable_static() + class TestFunctionalConv2D(TestCase): batch_size = 4 @@ -196,76 +198,6 @@ def test_identity_gpu(self): self._test_identity() -class TestFunctionalConv2DError(TestCase): - batch_size = 4 - spatial_shape = (16, 16) - dtype = "float32" - - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "not_valid" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NHWC" - - def test_exception(self): - self.prepare() - with self.assertRaises(ValueError): - self.static_graph_case() - - def prepare(self): - if isinstance(self.filter_shape, int): - filter_shape = (self.filter_shape,) * 2 - else: - filter_shape = tuple(self.filter_shape) - self.weight_shape = ( - self.out_channels, - self.in_channels // self.groups, - ) + filter_shape - self.bias_shape = (self.out_channels,) - - def static_graph_case(self): - main = base.Program() - start = base.Program() - with base.unique_name.guard(): - with base.program_guard(main, start): - self.channel_last = self.data_format == "NHWC" - if self.channel_last: - x = x = paddle.static.data( - "input", - (-1, -1, -1, self.in_channels), - dtype=self.dtype, - ) - else: - x = paddle.static.data( - "input", - (-1, self.in_channels, -1, -1), - dtype=self.dtype, - ) - weight = paddle.static.data( - "weight", self.weight_shape, dtype=self.dtype - ) - if not self.no_bias: - bias = paddle.static.data( - "bias", self.bias_shape, dtype=self.dtype - ) - y = F.conv2d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - - class TestFunctionalConv2DCase2(TestFunctionalConv2D): def setUp(self): self.in_channels = 3 @@ -371,126 +303,6 @@ def setUp(self): self.data_format = "NCHW" -class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [[0, 0], [1, 2], [3, 4], [5, 6]] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "not_valid" - - -class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 3 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "not_valid" - - -class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [1, 2, 1, 2, 1] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = True - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = -5 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [[0, 0], [0, 0], [3, 2], [1, 2]] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NCHW" - - -class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NHWC" - - -class TestFunctionalConv2DErrorCase11(TestFunctionalConv2DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.use_cudnn = False - self.data_format = "NHCW" - - class TestFunctionalConv2DErrorCase12(TestCase): def setUp(self): self.input = np.array([]) @@ -532,30 +344,6 @@ def static_graph_case(self): (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) return out - def dygraph_case(self): - with dg.guard(): - x = paddle.to_tensor(self.input, dtype=paddle.float32) - w = paddle.to_tensor(self.filter, dtype=paddle.float32) - b = ( - None - if self.bias is None - else paddle.to_tensor(self.bias, dtype=paddle.float32) - ) - y = F.conv2d( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - - def test_dygraph_exception(self): - with self.assertRaises(ValueError): - self.dygraph_case() - def test_static_exception(self): with self.assertRaises(ValueError): self.static_graph_case() diff --git a/test/deprecated/legacy_test/test_functional_conv2d_transpose.py b/test/deprecated/legacy_test/test_functional_conv2d_transpose_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_functional_conv2d_transpose.py rename to test/deprecated/legacy_test/test_functional_conv2d_transpose_deprecated.py index c8d5f88af6de3..8fb2393bbd7a9 100644 --- a/test/deprecated/legacy_test/test_functional_conv2d_transpose.py +++ b/test/deprecated/legacy_test/test_functional_conv2d_transpose_deprecated.py @@ -22,6 +22,8 @@ import paddle.nn.functional as F from paddle import base +paddle.enable_static() + class TestFunctionalConv2D(TestCase): batch_size = 4 diff --git a/test/deprecated/legacy_test/test_functional_conv3d.py b/test/deprecated/legacy_test/test_functional_conv3d_deprecated.py similarity index 66% rename from test/deprecated/legacy_test/test_functional_conv3d.py rename to test/deprecated/legacy_test/test_functional_conv3d_deprecated.py index 6634d0194670b..68e65a7db30b0 100644 --- a/test/deprecated/legacy_test/test_functional_conv3d.py +++ b/test/deprecated/legacy_test/test_functional_conv3d_deprecated.py @@ -22,6 +22,8 @@ import paddle.nn.functional as F from paddle import base +paddle.enable_static() + class TestFunctionalConv3D(TestCase): batch_size = 4 @@ -196,79 +198,6 @@ def test_identity_gpu(self): self._test_identity() -class TestFunctionalConv3DError(TestCase): - batch_size = 4 - spatial_shape = (8, 8, 8) - dtype = "float32" - - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "not_valid" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - def test_exception(self): - self.prepare() - with self.assertRaises(ValueError): - self.static_graph_case() - - def prepare(self): - if isinstance(self.filter_shape, int): - filter_shape = (self.filter_shape,) * 3 - else: - filter_shape = tuple(self.filter_shape) - self.weight_shape = ( - self.out_channels, - self.in_channels // self.groups, - ) + filter_shape - self.bias_shape = (self.out_channels,) - - def static_graph_case(self): - main = base.Program() - start = base.Program() - with base.unique_name.guard(): - with base.program_guard(main, start): - self.channel_last = self.data_format == "NDHWC" - if self.channel_last: - x = x = paddle.static.data( - "input", - (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype, - ) - else: - x = paddle.static.data( - "input", - (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype, - ) - weight = paddle.static.data( - "weight", self.weight_shape, dtype=self.dtype - ) - if not self.no_bias: - bias = paddle.static.data( - "bias", self.bias_shape, dtype=self.dtype - ) - y = F.conv3d( - x, - weight, - None if self.no_bias else bias, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - - if self.act == 'sigmoid': - y = F.sigmoid(y) - - class TestFunctionalConv3DCase2(TestFunctionalConv3D): def setUp(self): self.in_channels = 3 @@ -368,104 +297,6 @@ def setUp(self): self.data_format = "NCDHW" -class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [[0, 0], [1, 1], [1, 2], [3, 4], [5, 6]] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "not_valid" - - -class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 4 - self.out_channels = 3 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "not_valid" - - -class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [1, 2, 1, 2, 1] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = -5 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = [[0, 0], [0, 0], [3, 2], [1, 2], [1, 1]] - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError): - def setUp(self): - self.in_channels = 3 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - class TestFunctionalConv3DErrorCase11(TestCase): def setUp(self): self.input = np.array([]) @@ -507,30 +338,6 @@ def static_graph_case(self): (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) return out - def dygraph_case(self): - with dg.guard(): - x = paddle.to_tensor(self.input, dtype=paddle.float32) - w = paddle.to_tensor(self.filter, dtype=paddle.float32) - b = ( - None - if self.bias is None - else paddle.to_tensor(self.bias, dtype=paddle.float32) - ) - y = F.conv3d( - x, - w, - b, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - - def test_dygraph_exception(self): - with self.assertRaises(ValueError): - self.dygraph_case() - def test_static_exception(self): with self.assertRaises(ValueError): self.static_graph_case() diff --git a/test/deprecated/legacy_test/test_functional_conv3d_transpose.py b/test/deprecated/legacy_test/test_functional_conv3d_transpose.py index a4ea020dd2996..5f5ee23b04cfe 100644 --- a/test/deprecated/legacy_test/test_functional_conv3d_transpose.py +++ b/test/deprecated/legacy_test/test_functional_conv3d_transpose.py @@ -23,179 +23,6 @@ from paddle import base -class TestFunctionalConv3DTranspose(TestCase): - batch_size = 4 - spatial_shape = (8, 8, 8) - dtype = "float32" - output_size = None - - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - def prepare(self): - if isinstance(self.filter_shape, int): - filter_shape = (self.filter_shape,) * 3 - else: - filter_shape = tuple(self.filter_shape) - - self.weight = np.random.uniform( - -1, - 1, - (self.in_channels, self.out_channels // self.groups) + filter_shape, - ).astype(self.dtype) - if not self.no_bias: - self.bias = np.random.uniform(-1, 1, (self.out_channels,)).astype( - self.dtype - ) - - self.channel_last = self.data_format == "NDHWC" - if self.channel_last: - self.input_shape = ( - (self.batch_size,) + self.spatial_shape + (self.in_channels,) - ) - else: - self.input_shape = ( - self.batch_size, - self.in_channels, - ) + self.spatial_shape - - self.input = np.random.uniform(-1, 1, self.input_shape).astype( - self.dtype - ) - - def static_graph_case_1(self): - main = base.Program() - start = base.Program() - with base.unique_name.guard(): - with base.program_guard(main, start): - if self.channel_last: - x = paddle.static.data( - "input", - (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype, - ) - else: - x = paddle.static.data( - "input", - (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype, - ) - y = paddle.static.nn.conv3d_transpose( - x, - self.out_channels, - output_size=self.output_size, - filter_size=self.filter_shape, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - param_attr=paddle.nn.initializer.Assign(self.weight), - bias_attr=False - if self.no_bias - else paddle.nn.initializer.Assign(self.bias), - act=self.act, - data_format=self.data_format, - ) - exe = base.Executor(self.place) - exe.run(start) - (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) - return out - - def static_graph_case_2(self): - main = base.Program() - start = base.Program() - with base.unique_name.guard(): - with base.program_guard(main, start): - if self.channel_last: - x = x = paddle.static.data( - "input", - (-1, -1, -1, -1, self.in_channels), - dtype=self.dtype, - ) - else: - x = paddle.static.data( - "input", - (-1, self.in_channels, -1, -1, -1), - dtype=self.dtype, - ) - weight = paddle.static.data( - "weight", self.weight.shape, dtype=self.dtype - ) - if not self.no_bias: - bias = paddle.static.data( - "bias", self.bias.shape, dtype=self.dtype - ) - y = F.conv3d_transpose( - x, - weight, - None if self.no_bias else bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - if self.act == 'sigmoid': - y = F.sigmoid(y) - exe = base.Executor(self.place) - exe.run(start) - feed_dict = {"input": self.input, "weight": self.weight} - if not self.no_bias: - feed_dict["bias"] = self.bias - (out,) = exe.run(main, feed=feed_dict, fetch_list=[y]) - return out - - def dygraph_case(self): - with dg.guard(self.place): - x = paddle.to_tensor(self.input) - weight = paddle.to_tensor(self.weight) - bias = None if self.no_bias else paddle.to_tensor(self.bias) - y = F.conv3d_transpose( - x, - weight, - bias, - output_size=self.output_size, - padding=self.padding, - stride=self.stride, - dilation=self.dilation, - groups=self.groups, - data_format=self.data_format, - ) - if self.act == 'sigmoid': - y = F.sigmoid(y) - out = y.numpy() - return out - - def _test_identity(self): - self.prepare() - out1 = self.static_graph_case_1() - out2 = self.static_graph_case_2() - out3 = self.dygraph_case() - np.testing.assert_array_almost_equal(out1, out2) - np.testing.assert_array_almost_equal(out2, out3) - - def test_identity_cpu(self): - self.place = base.CPUPlace() - self._test_identity() - - @unittest.skipIf( - not base.core.is_compiled_with_cuda(), "core is not compiled with CUDA" - ) - def test_identity_gpu(self): - self.place = base.CUDAPlace(0) - self._test_identity() - - class TestFunctionalConv3DTransposeError(TestCase): batch_size = 4 spatial_shape = (8, 8, 8) @@ -270,147 +97,6 @@ def static_graph_case(self): y = F.sigmoid(y) -class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 3 - self.out_channels = 5 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = 0 - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - -class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = "same" - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = True - self.act = "sigmoid" - self.data_format = "NDHWC" - - -class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = "valid" - self.stride = (1, 2, 1) - self.dilation = (2, 1, 1) - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - -class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "valid" - self.stride = (1, 2, 1) - self.dilation = 1 - self.groups = 4 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - -class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 4 - self.filter_shape = 3 - self.padding = "valid" - self.output_size = (10, 17, 10) - self.stride = (1, 2, 1) - self.dilation = 1 - self.groups = 1 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = [[0, 0], [1, 2], [1, 2], [2, 1], [0, 0]] - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NDHWC" - - -class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = [[0, 0], [0, 0], [1, 1], [1, 1], [2, 2]] - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = [1, 1, 2, 2, 1, 1] - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - -class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): - def setUp(self): - self.in_channels = 4 - self.out_channels = 6 - self.filter_shape = 3 - self.padding = [1, 2, 1] - self.stride = 1 - self.dilation = 1 - self.groups = 2 - self.no_bias = False - self.act = "sigmoid" - self.data_format = "NCDHW" - - class TestFunctionalConv3DTransposeErrorCase2( TestFunctionalConv3DTransposeError ): @@ -537,34 +223,6 @@ def setUp(self): self.groups = 1 self.data_format = "NCDHW" - def static_graph_case(self): - main = base.Program() - start = base.Program() - with base.unique_name.guard(): - with base.program_guard(main, start): - x = paddle.static.data( - "input", self.input.shape, dtype=paddle.float32 - ) - y = paddle.static.nn.conv3d_transpose( - x, - self.num_filters, - self.filter_size, - stride=self.stride, - padding=self.padding, - dilation=self.dilation, - groups=self.groups, - param_attr=paddle.nn.initializer.Assign(self.filter), - bias_attr=False - if self.bias is None - else paddle.nn.initializer.Assign(self.bias), - act=None, - data_format=self.data_format, - ) - exe = base.Executor() - exe.run(start) - (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) - return out - def dygraph_case(self): with dg.guard(): x = paddle.to_tensor(self.input, dtype=paddle.float32) @@ -589,10 +247,6 @@ def test_dygraph_exception(self): with self.assertRaises(ValueError): self.dygraph_case() - def test_static_exception(self): - with self.assertRaises(ValueError): - self.static_graph_case() - class TestFunctionalConv3DTransposeErrorCase11( TestFunctionalConv3DTransposeErrorCase10 diff --git a/test/deprecated/legacy_test/test_functional_conv3d_transpose_deprecated.py b/test/deprecated/legacy_test/test_functional_conv3d_transpose_deprecated.py new file mode 100644 index 0000000000000..5be6713d7f2e8 --- /dev/null +++ b/test/deprecated/legacy_test/test_functional_conv3d_transpose_deprecated.py @@ -0,0 +1,405 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import TestCase + +import numpy as np + +import paddle +import paddle.base.dygraph as dg +import paddle.nn.functional as F +from paddle import base + +paddle.enable_static() + + +class TestFunctionalConv3DTranspose(TestCase): + batch_size = 4 + spatial_shape = (8, 8, 8) + dtype = "float32" + output_size = None + + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + def prepare(self): + if isinstance(self.filter_shape, int): + filter_shape = (self.filter_shape,) * 3 + else: + filter_shape = tuple(self.filter_shape) + + self.weight = np.random.uniform( + -1, + 1, + (self.in_channels, self.out_channels // self.groups) + filter_shape, + ).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform(-1, 1, (self.out_channels,)).astype( + self.dtype + ) + + self.channel_last = self.data_format == "NDHWC" + if self.channel_last: + self.input_shape = ( + (self.batch_size,) + self.spatial_shape + (self.in_channels,) + ) + else: + self.input_shape = ( + self.batch_size, + self.in_channels, + ) + self.spatial_shape + + self.input = np.random.uniform(-1, 1, self.input_shape).astype( + self.dtype + ) + + def static_graph_case_1(self): + main = base.Program() + start = base.Program() + with base.unique_name.guard(): + with base.program_guard(main, start): + if self.channel_last: + x = paddle.static.data( + "input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype, + ) + else: + x = paddle.static.data( + "input", + (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype, + ) + y = paddle.static.nn.conv3d_transpose( + x, + self.out_channels, + output_size=self.output_size, + filter_size=self.filter_shape, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + param_attr=paddle.nn.initializer.Assign(self.weight), + bias_attr=False + if self.no_bias + else paddle.nn.initializer.Assign(self.bias), + act=self.act, + data_format=self.data_format, + ) + exe = base.Executor(self.place) + exe.run(start) + (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) + return out + + def static_graph_case_2(self): + main = base.Program() + start = base.Program() + with base.unique_name.guard(): + with base.program_guard(main, start): + if self.channel_last: + x = x = paddle.static.data( + "input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype, + ) + else: + x = paddle.static.data( + "input", + (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype, + ) + weight = paddle.static.data( + "weight", self.weight.shape, dtype=self.dtype + ) + if not self.no_bias: + bias = paddle.static.data( + "bias", self.bias.shape, dtype=self.dtype + ) + y = F.conv3d_transpose( + x, + weight, + None if self.no_bias else bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + if self.act == 'sigmoid': + y = F.sigmoid(y) + exe = base.Executor(self.place) + exe.run(start) + feed_dict = {"input": self.input, "weight": self.weight} + if not self.no_bias: + feed_dict["bias"] = self.bias + (out,) = exe.run(main, feed=feed_dict, fetch_list=[y]) + return out + + def dygraph_case(self): + with dg.guard(self.place): + x = paddle.to_tensor(self.input) + weight = paddle.to_tensor(self.weight) + bias = None if self.no_bias else paddle.to_tensor(self.bias) + y = F.conv3d_transpose( + x, + weight, + bias, + output_size=self.output_size, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + if self.act == 'sigmoid': + y = F.sigmoid(y) + out = y.numpy() + return out + + def _test_identity(self): + self.prepare() + out1 = self.static_graph_case_1() + out2 = self.static_graph_case_2() + out3 = self.dygraph_case() + np.testing.assert_array_almost_equal(out1, out2) + np.testing.assert_array_almost_equal(out2, out3) + + def test_identity_cpu(self): + self.place = base.CPUPlace() + self._test_identity() + + @unittest.skipIf( + not base.core.is_compiled_with_cuda(), "core is not compiled with CUDA" + ) + def test_identity_gpu(self): + self.place = base.CUDAPlace(0) + self._test_identity() + + +class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = True + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = "valid" + self.stride = (1, 2, 1) + self.dilation = (2, 1, 1) + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "valid" + self.stride = (1, 2, 1) + self.dilation = 1 + self.groups = 4 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "valid" + self.output_size = (10, 17, 10) + self.stride = (1, 2, 1) + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = [[0, 0], [1, 2], [1, 2], [2, 1], [0, 0]] + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = [[0, 0], [0, 0], [1, 1], [1, 1], [2, 2]] + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = [1, 1, 2, 2, 1, 1] + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose): + def setUp(self): + self.in_channels = 4 + self.out_channels = 6 + self.filter_shape = 3 + self.padding = [1, 2, 1] + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DTransposeErrorCase10(TestCase): + def setUp(self): + self.input = np.array([]) + self.filter = np.array([]) + self.num_filters = 0 + self.filter_size = 0 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCDHW" + + def static_graph_case(self): + main = base.Program() + start = base.Program() + with base.unique_name.guard(): + with base.program_guard(main, start): + x = paddle.static.data( + "input", self.input.shape, dtype=paddle.float32 + ) + y = paddle.static.nn.conv3d_transpose( + x, + self.num_filters, + self.filter_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + param_attr=paddle.nn.initializer.Assign(self.filter), + bias_attr=False + if self.bias is None + else paddle.nn.initializer.Assign(self.bias), + act=None, + data_format=self.data_format, + ) + exe = base.Executor() + exe.run(start) + (out,) = exe.run(main, feed={"input": self.input}, fetch_list=[y]) + return out + + def test_static_exception(self): + with self.assertRaises(ValueError): + self.static_graph_case() + + +class TestFunctionalConv3DTransposeErrorCase11( + TestFunctionalConv3DTransposeErrorCase10 +): + def setUp(self): + self.input = np.random.randn(1, 3, 3, 3, 3) + self.filter = np.random.randn(3, 3, 1, 1, 1) + self.num_filters = 3 + self.filter_size = 1 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 0 + self.data_format = "NCDHW" + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_fuse_bn_act_pass.py b/test/deprecated/legacy_test/test_fuse_bn_act_pass_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_fuse_bn_act_pass.py rename to test/deprecated/legacy_test/test_fuse_bn_act_pass_deprecated.py index 6faaff59b51ef..958cfe70dcc0d 100644 --- a/test/deprecated/legacy_test/test_fuse_bn_act_pass.py +++ b/test/deprecated/legacy_test/test_fuse_bn_act_pass_deprecated.py @@ -17,6 +17,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestFuseBatchNormActPass(unittest.TestCase): def build_program(self, main_program, startup_program, use_cuda, seed=1): diff --git a/test/deprecated/legacy_test/test_get_inputs_outputs_in_block.py b/test/deprecated/legacy_test/test_get_inputs_outputs_in_block_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_get_inputs_outputs_in_block.py rename to test/deprecated/legacy_test/test_get_inputs_outputs_in_block_deprecated.py diff --git a/test/deprecated/legacy_test/test_gradient_clip.py b/test/deprecated/legacy_test/test_gradient_clip_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_gradient_clip.py rename to test/deprecated/legacy_test/test_gradient_clip_deprecated.py diff --git a/test/deprecated/legacy_test/test_hsigmoid_op_deprecated.py b/test/deprecated/legacy_test/test_hsigmoid_op_deprecated.py new file mode 100644 index 0000000000000..574bc03172a4f --- /dev/null +++ b/test/deprecated/legacy_test/test_hsigmoid_op_deprecated.py @@ -0,0 +1,113 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import unittest + +import numpy as np + +import paddle +from paddle import base + +paddle.enable_static() +np.random.seed(100) + + +class TestHSigmoidOpWithSparseGrad(unittest.TestCase): + def hs_net_conf(self, is_sparse): + input_word = paddle.static.data(name="x", shape=[-1, 1], dtype='int64') + path_table = paddle.static.data( + name='path_table', shape=[-1, 3], dtype='int64' + ) + path_code = paddle.static.data( + name='path_code', shape=[-1, 3], dtype='int64' + ) + label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64') + + data_list = [input_word, path_table, path_code, label] + + emb = paddle.static.nn.embedding( + input=input_word, + is_sparse=is_sparse, + size=[3, 3], + param_attr=base.ParamAttr( + initializer=paddle.nn.initializer.Normal(std=1 / math.sqrt(3)) + ), + ) + + loss = paddle.nn.HSigmoidLoss( + feature_size=emb.shape[1], + num_classes=3, + bias_attr=True, + is_custom=True, + is_sparse=is_sparse, + ) + + cost = loss( + input=emb, + label=label, + path_table=path_table, + path_code=path_code, + ) + + avg_cost = paddle.mean(cost) + + return avg_cost, data_list + + def training_test(self, is_sparse): + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + paddle.seed(1) + start_up = paddle.static.default_startup_program() + x = np.arange(6).reshape(6) + path_table = np.array([(1, 2, -1), (1, 2, -1)]).astype('int64') + path_code = np.array([(1, 0, -1), (0, 0, -1)]).astype('int64') + label = np.array([1, 4]).astype('int64') + + loss, data_list = self.hs_net_conf(is_sparse) + optimizer = paddle.optimizer.SGD(learning_rate=1e-3) + optimizer.minimize(loss) + + main_program = paddle.static.default_main_program() + place = base.CPUPlace() + feeder = base.DataFeeder(feed_list=data_list, place=place) + exe = paddle.static.Executor(place) + + exe.run(start_up) + result = [] + for i in range(10): + data = [ + ( + [[x[i % 2]]], + [list(path_table[i % 2])], + [list(path_code[i % 2])], + [label[i % 2]], + ) + ] + + loss_val = exe.run( + main_program, feed=feeder.feed(data), fetch_list=[loss] + ) + result.append(loss_val) + return result + + def test_hs_grad_with_sparse(self): + dense_result = self.training_test(is_sparse=False) + sparse_result = self.training_test(is_sparse=True) + assert dense_result == sparse_result + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_image_classification_layer.py b/test/deprecated/legacy_test/test_image_classification_layer.py index cacffb437bad0..c786db344d47d 100644 --- a/test/deprecated/legacy_test/test_image_classification_layer.py +++ b/test/deprecated/legacy_test/test_image_classification_layer.py @@ -38,21 +38,6 @@ def conv_block(input, num_filter, groups, dropouts): class TestLayer(unittest.TestCase): - def test_batch_norm_layer(self): - main_program = Program() - startup_program = Program() - with base.program_guard(main_program, startup_program): - images = paddle.static.data( - name='pixel', shape=[-1, 3, 48, 48], dtype='float32' - ) - hidden1 = paddle.static.nn.batch_norm(input=images) - hidden2 = paddle.static.nn.fc( - x=hidden1, size=128, activation='relu' - ) - paddle.static.nn.batch_norm(input=hidden2) - - print(str(main_program)) - def test_dropout_layer(self): main_program = Program() startup_program = Program() diff --git a/test/deprecated/legacy_test/test_image_classification_layer_deprecated.py b/test/deprecated/legacy_test/test_image_classification_layer_deprecated.py new file mode 100644 index 0000000000000..a977388a35283 --- /dev/null +++ b/test/deprecated/legacy_test/test_image_classification_layer_deprecated.py @@ -0,0 +1,60 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +sys.path.append("../../legacy_test") +import nets + +import paddle +from paddle import base +from paddle.base.framework import Program + +paddle.enable_static() + + +def conv_block(input, num_filter, groups, dropouts): + return nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max', + ) + + +class TestLayer(unittest.TestCase): + def test_batch_norm_layer(self): + main_program = Program() + startup_program = Program() + with base.program_guard(main_program, startup_program): + images = paddle.static.data( + name='pixel', shape=[-1, 3, 48, 48], dtype='float32' + ) + hidden1 = paddle.static.nn.batch_norm(input=images) + hidden2 = paddle.static.nn.fc( + x=hidden1, size=128, activation='relu' + ) + paddle.static.nn.batch_norm(input=hidden2) + + print(str(main_program)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_imperative_double_grad.py b/test/deprecated/legacy_test/test_imperative_double_grad.py index eca85b0cbf58e..0baf4369b0716 100644 --- a/test/deprecated/legacy_test/test_imperative_double_grad.py +++ b/test/deprecated/legacy_test/test_imperative_double_grad.py @@ -567,21 +567,6 @@ def model_f(input): np.testing.assert_array_equal(grad_1, grad_2) -class TestRaiseNoDoubleGradOp(TestCase): - def test_no_grad_op(self): - with base.dygraph.guard(): - x = paddle.ones(shape=[2, 3, 2, 2], dtype='float32') - x.stop_gradient = False - y = paddle.static.nn.group_norm(x, groups=1) - - dx = base.dygraph.grad( - outputs=[y], inputs=[x], create_graph=True, retain_graph=True - )[0] - - loss = paddle.mean(dx) - loss.backward() - - class TestDoubleGradResNet(TestCase): def setUp(self): paddle.seed(123) diff --git a/test/deprecated/legacy_test/test_imperative_double_grad_deprecated.py b/test/deprecated/legacy_test/test_imperative_double_grad_deprecated.py new file mode 100644 index 0000000000000..9fda4f4d3dc1f --- /dev/null +++ b/test/deprecated/legacy_test/test_imperative_double_grad_deprecated.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import TestCase + +import paddle +from paddle import base + + +class TestRaiseNoDoubleGradOp(TestCase): + def test_no_grad_op(self): + with base.dygraph.guard(): + x = paddle.ones(shape=[2, 3, 2, 2], dtype='float32') + x.stop_gradient = False + y = paddle.static.nn.group_norm(x, groups=1) + + dx = base.dygraph.grad( + outputs=[y], inputs=[x], create_graph=True, retain_graph=True + )[0] + + loss = paddle.mean(dx) + loss.backward() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_imperative_load_static_param.py b/test/deprecated/legacy_test/test_imperative_load_static_param_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_imperative_load_static_param.py rename to test/deprecated/legacy_test/test_imperative_load_static_param_deprecated.py diff --git a/test/deprecated/legacy_test/test_imperative_lod_tensor_to_selected_rows.py b/test/deprecated/legacy_test/test_imperative_lod_tensor_to_selected_rows_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_imperative_lod_tensor_to_selected_rows.py rename to test/deprecated/legacy_test/test_imperative_lod_tensor_to_selected_rows_deprecated.py diff --git a/test/deprecated/legacy_test/test_infer_no_need_buffer_slots.py b/test/deprecated/legacy_test/test_infer_no_need_buffer_slots_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_infer_no_need_buffer_slots.py rename to test/deprecated/legacy_test/test_infer_no_need_buffer_slots_deprecated.py index 1ba17a9270c50..d1286cbd02aba 100644 --- a/test/deprecated/legacy_test/test_infer_no_need_buffer_slots.py +++ b/test/deprecated/legacy_test/test_infer_no_need_buffer_slots_deprecated.py @@ -18,6 +18,8 @@ from paddle import base from paddle.base import core, framework +paddle.enable_static() + class TestInferNoNeedBufferSlots(unittest.TestCase): def net(self): diff --git a/test/deprecated/legacy_test/test_inference_api.py b/test/deprecated/legacy_test/test_inference_api_deprecated.py similarity index 81% rename from test/deprecated/legacy_test/test_inference_api.py rename to test/deprecated/legacy_test/test_inference_api_deprecated.py index b6f5456ee4796..aba8f4cf82b86 100644 --- a/test/deprecated/legacy_test/test_inference_api.py +++ b/test/deprecated/legacy_test/test_inference_api_deprecated.py @@ -20,7 +20,6 @@ import numpy as np from paddle import base -from paddle.base.core import PaddleDType, PaddleTensor from paddle.framework import core from paddle.inference import ( Config, @@ -30,49 +29,6 @@ ) -class TestInferenceApi(unittest.TestCase): - def test_inference_api(self): - tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') - paddletensor32 = PaddleTensor(tensor32) - dtype32 = paddletensor32.dtype - self.assertEqual(dtype32, PaddleDType.INT32) - self.assertEqual( - paddletensor32.data.tolist('int32'), tensor32.ravel().tolist() - ) - paddletensor32.data.reset(tensor32) - self.assertEqual( - paddletensor32.as_ndarray().ravel().tolist(), - tensor32.ravel().tolist(), - ) - - tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') - paddletensor64 = PaddleTensor(tensor64) - dtype64 = paddletensor64.dtype - self.assertEqual(dtype64, PaddleDType.INT64) - self.assertEqual( - paddletensor64.data.tolist('int64'), tensor64.ravel().tolist() - ) - paddletensor64.data.reset(tensor64) - self.assertEqual( - paddletensor64.as_ndarray().ravel().tolist(), - tensor64.ravel().tolist(), - ) - - tensor_float = np.random.randn(20, 2).astype('float32') - paddletensor_float = PaddleTensor(tensor_float) - dtype_float = paddletensor_float.dtype - self.assertEqual(dtype_float, PaddleDType.FLOAT32) - self.assertEqual( - paddletensor_float.data.tolist('float32'), - tensor_float.ravel().tolist(), - ) - paddletensor_float.data.reset(tensor_float) - self.assertEqual( - paddletensor_float.as_ndarray().ravel().tolist(), - tensor_float.ravel().tolist(), - ) - - def get_sample_model(): place = base.CPUPlace() exe = base.Executor(place) diff --git a/test/deprecated/legacy_test/test_inference_model_io.py b/test/deprecated/legacy_test/test_inference_model_io_deprecated.py similarity index 92% rename from test/deprecated/legacy_test/test_inference_model_io.py rename to test/deprecated/legacy_test/test_inference_model_io_deprecated.py index 2e179cf90276e..c01bd2d92d9d0 100644 --- a/test/deprecated/legacy_test/test_inference_model_io.py +++ b/test/deprecated/legacy_test/test_inference_model_io_deprecated.py @@ -29,6 +29,7 @@ load_inference_model_distributed, save_persistables, ) +from paddle.pir_utils import test_with_pir_api from paddle.static.io import load_inference_model, save_inference_model paddle.enable_static() @@ -161,14 +162,15 @@ def test_fit_line_inference_model(self): class TestSaveInferenceModel(unittest.TestCase): + @test_with_pir_api def test_save_inference_model(self): root_path = tempfile.TemporaryDirectory() MODEL_DIR = os.path.join(root_path.name, "inference_model2") - init_program = Program() - program = Program() + init_program = paddle.static.Program() + program = paddle.static.Program() # fake program without feed/fetch - with program_guard(program, init_program): + with paddle.static.program_guard(program, init_program): x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32') @@ -188,14 +190,15 @@ def test_save_inference_model(self): ) root_path.cleanup() + @test_with_pir_api def test_save_inference_model_with_auc(self): root_path = tempfile.TemporaryDirectory() MODEL_DIR = os.path.join(root_path.name, "inference_model4") - init_program = Program() - program = Program() + init_program = paddle.static.Program() + program = paddle.static.Program() # fake program without feed/fetch - with program_guard(program, init_program): + with paddle.static.program_guard(program, init_program): x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') y = paddle.static.data(name='y', shape=[-1, 1], dtype='int32') predict = paddle.static.nn.fc(x, size=2, activation='softmax') @@ -223,14 +226,15 @@ def test_save_inference_model_with_auc(self): class TestInstance(unittest.TestCase): + # @test_with_pir_api def test_save_inference_model(self): root_path = tempfile.TemporaryDirectory() MODEL_DIR = os.path.join(root_path.name, "inference_model3") - init_program = Program() - program = Program() + init_program = paddle.static.Program() + program = paddle.static.Program() # fake program without feed/fetch - with program_guard(program, init_program): + with paddle.static.program_guard(program, init_program): x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32') @@ -261,14 +265,15 @@ def test_save_inference_model(self): class TestSaveInferenceModelNew(unittest.TestCase): + # @test_with_pir_api def test_save_and_load_inference_model(self): root_path = tempfile.TemporaryDirectory() MODEL_DIR = os.path.join(root_path.name, "inference_model5") - init_program = base.default_startup_program() - program = base.default_main_program() + init_program = paddle.static.default_startup_program() + program = paddle.static.default_main_program() # fake program without feed/fetch - with program_guard(program, init_program): + with paddle.static.program_guard(program, init_program): x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32') @@ -283,7 +288,7 @@ def test_save_and_load_inference_model(self): sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() - exe = executor.Executor(place) + exe = base.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) tensor_x = np.array([[1, 1], [1, 2], [5, 2]]).astype("float32") @@ -344,7 +349,12 @@ def test_save_and_load_inference_model(self): exe, ) - model_path = MODEL_DIR + "_isdir.pdmodel" + if paddle.framework.in_pir_mode(): + MODEL_SUFFIX = ".json" + else: + MODEL_SUFFIX = ".pdmodel" + + model_path = MODEL_DIR + "_isdir" + MODEL_SUFFIX os.makedirs(model_path) self.assertRaises( ValueError, @@ -356,7 +366,7 @@ def test_save_and_load_inference_model(self): ) os.rmdir(model_path) - params_path = MODEL_DIR + "_isdir.pdmodel" + params_path = MODEL_DIR + "_isdir" + MODEL_SUFFIX os.makedirs(params_path) self.assertRaises( ValueError, @@ -372,7 +382,7 @@ def test_save_and_load_inference_model(self): MODEL_DIR, [x, y], [avg_cost], exe ) - self.assertTrue(os.path.exists(MODEL_DIR + ".pdmodel")) + self.assertTrue(os.path.exists(MODEL_DIR + MODEL_SUFFIX)) self.assertTrue(os.path.exists(MODEL_DIR + ".pdiparams")) expected = exe.run( @@ -405,7 +415,7 @@ def test_save_and_load_inference_model(self): unsupported_param=None, ) self.assertRaises( - (TypeError, ValueError), + (TypeError, RuntimeError, ValueError), paddle.static.load_inference_model, None, exe, @@ -435,7 +445,7 @@ def test_save_and_load_inference_model(self): self.assertRaises(ValueError, paddle.static.io.save_to_file, '', 123) # test _get_valid_program self.assertRaises(TypeError, paddle.static.io._get_valid_program, 0) - p = Program() + p = paddle.static.Program() cp = CompiledProgram(p) paddle.static.io._get_valid_program(cp) self.assertTrue(paddle.static.io._get_valid_program(cp) is p) @@ -491,12 +501,13 @@ def test_serialize_program_and_persistables(self): None, ) + @test_with_pir_api def test_normalize_program(self): - init_program = base.default_startup_program() - program = base.default_main_program() + init_program = paddle.static.default_startup_program() + program = paddle.static.default_main_program() # fake program without feed/fetch - with program_guard(program, init_program): + with paddle.static.program_guard(program, init_program): x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32') @@ -525,7 +536,7 @@ def test_normalize_program(self): # test if return type of serialize_program is bytes res = paddle.static.normalize_program(program, [x, y], [avg_cost]) - self.assertTrue(isinstance(res, Program)) + self.assertTrue(isinstance(res, paddle.static.Program)) # test program type self.assertRaises( TypeError, paddle.static.normalize_program, None, [x, y], [avg_cost] @@ -544,20 +555,5 @@ def test_normalize_program(self): ) -class TestLoadInferenceModelError(unittest.TestCase): - def test_load_model_not_exist(self): - place = core.CPUPlace() - exe = executor.Executor(place) - self.assertRaises( - ValueError, load_inference_model, './test_not_exist_dir/model', exe - ) - self.assertRaises( - ValueError, - load_inference_model_distributed, - './test_not_exist_dir', - exe, - ) - - if __name__ == '__main__': unittest.main() diff --git a/test/deprecated/legacy_test/test_initializer.py b/test/deprecated/legacy_test/test_initializer.py index 5910a9c4297e0..c55940afe5903 100644 --- a/test/deprecated/legacy_test/test_initializer.py +++ b/test/deprecated/legacy_test/test_initializer.py @@ -20,7 +20,6 @@ from utils import dygraph_guard, static_guard import paddle -from paddle import base from paddle.base import framework from paddle.base.core import VarDesc from paddle.pir_utils import test_with_pir_api @@ -1448,65 +1447,6 @@ def test_numpy_array_initializer_bf16(self): self.assertTrue(check_cast_op_pir(cast_op)) -class TestSetGlobalInitializer(unittest.TestCase): - def test_set_global_weight_initializer(self): - """Test Set Global Param initializer with UniformInitializer""" - main_prog = framework.Program() - startup_prog = framework.Program() - base.set_global_initializer( - paddle.nn.initializer.Uniform(low=-0.5, high=0.5) - ) - with base.program_guard(main_prog, startup_prog): - x = paddle.static.data(name="x", shape=[1, 3, 32, 32]) - # default initializer of param in layers.conv2d is NormalInitializer - conv = paddle.static.nn.conv2d(x, 5, 3) - - block = startup_prog.global_block() - self.assertEqual(len(block.ops), 2) - - # init weight is the first op, and bias is the second - bias_init_op = block.ops[1] - self.assertEqual(bias_init_op.type, 'fill_constant') - self.assertAlmostEqual(bias_init_op.attr('value'), 0.0, delta=DELTA) - - param_init_op = block.ops[0] - self.assertEqual(param_init_op.type, 'uniform_random') - self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) - self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) - self.assertEqual(param_init_op.attr('seed'), 0) - base.set_global_initializer(None) - - def test_set_global_bias_initializer(self): - """Test Set Global Bias initializer with NormalInitializer""" - main_prog = framework.Program() - startup_prog = framework.Program() - base.set_global_initializer( - paddle.nn.initializer.Uniform(low=-0.5, high=0.5), - bias_init=paddle.nn.initializer.Normal(0.0, 2.0), - ) - with base.program_guard(main_prog, startup_prog): - x = paddle.static.data(name="x", shape=[1, 3, 32, 32]) - # default initializer of bias in layers.conv2d is ConstantInitializer - conv = paddle.static.nn.conv2d(x, 5, 3) - - block = startup_prog.global_block() - self.assertEqual(len(block.ops), 2) - - # init weight is the first op, and bias is the second - bias_init_op = block.ops[1] - self.assertEqual(bias_init_op.type, 'gaussian_random') - self.assertAlmostEqual(bias_init_op.attr('mean'), 0.0, delta=DELTA) - self.assertAlmostEqual(bias_init_op.attr('std'), 2.0, delta=DELTA) - self.assertEqual(bias_init_op.attr('seed'), 0) - - param_init_op = block.ops[0] - self.assertEqual(param_init_op.type, 'uniform_random') - self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) - self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) - self.assertEqual(param_init_op.attr('seed'), 0) - base.set_global_initializer(None) - - class TestUniformInitializerDygraph(unittest.TestCase): def test_uniform_initializer(self, dtype="float32"): """ @@ -2192,22 +2132,6 @@ def test_error(self): paddle.nn.Conv2D(5, 9, (3, 3), weight_attr=self.weight_attr) -class TestKaimingUniform(unittest.TestCase): - def func_kaiminguniform_initializer_fan_in_zero(self): - paddle.enable_static() - x = paddle.static.data(name='x', shape=[1, 0, 0], dtype='float32') - - kaiming = paddle.nn.initializer.KaimingUniform(0) - param_attr = paddle.ParamAttr(initializer=kaiming) - - paddle.static.nn.prelu(x, 'all', param_attr=param_attr) - - def test_type_error(self): - self.assertRaises( - ZeroDivisionError, self.func_kaiminguniform_initializer_fan_in_zero - ) - - class TestTruncatedNormalInitializerDygraph(unittest.TestCase): def _trunc_normal_numpy(self, tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf diff --git a/test/deprecated/legacy_test/test_initializer_deprecated.py b/test/deprecated/legacy_test/test_initializer_deprecated.py new file mode 100644 index 0000000000000..75473cee68b7a --- /dev/null +++ b/test/deprecated/legacy_test/test_initializer_deprecated.py @@ -0,0 +1,101 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle +from paddle import base +from paddle.base import framework + +DELTA = 0.00001 + + +class TestSetGlobalInitializer(unittest.TestCase): + def test_set_global_weight_initializer(self): + """Test Set Global Param initializer with UniformInitializer""" + main_prog = framework.Program() + startup_prog = framework.Program() + base.set_global_initializer( + paddle.nn.initializer.Uniform(low=-0.5, high=0.5) + ) + with base.program_guard(main_prog, startup_prog): + x = paddle.static.data(name="x", shape=[1, 3, 32, 32]) + # default initializer of param in layers.conv2d is NormalInitializer + conv = paddle.static.nn.conv2d(x, 5, 3) + + block = startup_prog.global_block() + self.assertEqual(len(block.ops), 2) + + # init weight is the first op, and bias is the second + bias_init_op = block.ops[1] + self.assertEqual(bias_init_op.type, 'fill_constant') + self.assertAlmostEqual(bias_init_op.attr('value'), 0.0, delta=DELTA) + + param_init_op = block.ops[0] + self.assertEqual(param_init_op.type, 'uniform_random') + self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) + self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) + self.assertEqual(param_init_op.attr('seed'), 0) + base.set_global_initializer(None) + + def test_set_global_bias_initializer(self): + """Test Set Global Bias initializer with NormalInitializer""" + main_prog = framework.Program() + startup_prog = framework.Program() + base.set_global_initializer( + paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_init=paddle.nn.initializer.Normal(0.0, 2.0), + ) + with base.program_guard(main_prog, startup_prog): + x = paddle.static.data(name="x", shape=[1, 3, 32, 32]) + # default initializer of bias in layers.conv2d is ConstantInitializer + conv = paddle.static.nn.conv2d(x, 5, 3) + + block = startup_prog.global_block() + self.assertEqual(len(block.ops), 2) + + # init weight is the first op, and bias is the second + bias_init_op = block.ops[1] + self.assertEqual(bias_init_op.type, 'gaussian_random') + self.assertAlmostEqual(bias_init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(bias_init_op.attr('std'), 2.0, delta=DELTA) + self.assertEqual(bias_init_op.attr('seed'), 0) + + param_init_op = block.ops[0] + self.assertEqual(param_init_op.type, 'uniform_random') + self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) + self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) + self.assertEqual(param_init_op.attr('seed'), 0) + base.set_global_initializer(None) + + +class TestKaimingUniform(unittest.TestCase): + def func_kaiminguniform_initializer_fan_in_zero(self): + paddle.enable_static() + x = paddle.static.data(name='x', shape=[1, 0, 0], dtype='float32') + + kaiming = paddle.nn.initializer.KaimingUniform(0) + param_attr = paddle.ParamAttr(initializer=kaiming) + + paddle.static.nn.prelu(x, 'all', param_attr=param_attr) + + def test_type_error(self): + self.assertRaises( + ZeroDivisionError, self.func_kaiminguniform_initializer_fan_in_zero + ) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_instance_norm_op.py b/test/deprecated/legacy_test/test_instance_norm_op.py index 2e9f9855d1033..b266e67dfd334 100644 --- a/test/deprecated/legacy_test/test_instance_norm_op.py +++ b/test/deprecated/legacy_test/test_instance_norm_op.py @@ -21,7 +21,7 @@ import paddle from paddle import base, nn -from paddle.base import Program, core, program_guard +from paddle.base import core def _reference_instance_norm_naive(x, scale, bias, epsilon, mean, var): @@ -722,184 +722,6 @@ def init_test_case(self): ) -class TestInstanceNormOpTraining(unittest.TestCase): - def setUp(self): - self.epsilon = 1e-5 - self.init_test_case() - - def init_test_case(self): - self.shape = [2, 3, 4, 5] - self.no_grad_set = set() - self.fetch_list = [ - 'y', - 'saved_mean', - 'saved_variance', - 'x@GRAD', - 'scale@GRAD', - 'bias@GRAD', - ] - - def __assert_close(self, tensor, np_array, msg, atol=1e-4): - np.testing.assert_allclose( - np.array(tensor), np_array, rtol=1e-05, atol=atol, err_msg=msg - ) - - def set_global_mean_var(self, mean_shape, x): - mean, variance = _cal_mean_variance(x, self.epsilon, mean_shape) - return mean, variance - - def test_forward_backward(self): - def test_with_place(place, shape): - paddle.enable_static() - epsilon = self.epsilon - n, c, h, w = shape[0], shape[1], shape[2], shape[3] - scale_shape = [c] - mean_shape = [n * c] - - np.random.seed() - x = np.random.random_sample(shape).astype(np.float32) - scale = np.random.random_sample(scale_shape).astype(np.float32) - bias = np.random.random_sample(scale_shape).astype(np.float32) - mean, variance = self.set_global_mean_var(mean_shape, x) - d_y = np.random.random_sample(shape).astype(np.float32) - - y, saved_mean, variance_tmp = _reference_instance_norm_naive( - x, scale, bias, epsilon, mean, variance - ) - - saved_variance = 1 / np.sqrt(variance_tmp + epsilon) - - d_x, d_scale, d_bias = _reference_instance_norm_grad( - x, d_y, scale, saved_mean, saved_variance, epsilon - ) - - var_dict = locals() - var_dict['y@GRAD'] = d_y - var_dict['x@GRAD'] = d_x - var_dict['scale@GRAD'] = d_scale - var_dict['bias@GRAD'] = d_bias - - var_names = [ - 'x', - 'scale', - 'bias', - 'y', - 'saved_mean', - 'saved_variance', - ] - ground_truth = {name: var_dict[name] for name in var_names} - - program = base.Program() - with base.program_guard(program): - block = program.global_block() - for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape, - ) - in_op = block.append_op( - type="instance_norm", - inputs={ - "X": block.var("x"), - "Scale": block.var("scale"), - "Bias": block.var("bias"), - }, - outputs={ - "Y": block.var("y"), - "SavedMean": block.var("saved_mean"), - "SavedVariance": block.var("saved_variance"), - }, - attrs={ - "epsilon": epsilon, - }, - ) - - block.create_var(name="y@GRAD", dtype='float32', shape=y.shape) - - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( - in_op.desc, self.no_grad_set, [] - ) - grad_op_desc = grad_op_desc_list[0] - new_op_desc = block.desc.append_op() - new_op_desc.copy_from(grad_op_desc) - for var_name in grad_op_desc.output_arg_names(): - block.desc.var(var_name.encode("ascii")) - grad_op_desc.infer_var_type(block.desc) - grad_op_desc.infer_shape(block.desc) - for arg in grad_op_desc.output_arg_names(): - grad_var = block.desc.find_var(arg.encode("ascii")) - grad_var.set_dtype(core.VarDesc.VarType.FP32) - - program._sync_with_cpp() - - exe = base.Executor(place) - out = exe.run( - program, - feed={ - name: var_dict[name] - for name in ['x', 'scale', 'bias', 'y@GRAD'] - }, - fetch_list=self.fetch_list, - ) - - for id, name in enumerate(self.fetch_list): - self.__assert_close(var_dict[name], out[id], name) - print("op test forward passes: ", str(place)) - paddle.disable_static() - - places = [core.CPUPlace()] - - if core.is_compiled_with_cuda() and core.op_support_gpu( - "instance_norm" - ): - places.append(core.CUDAPlace(0)) - for place in places: - test_with_place(place, self.shape) - - -class TestInstanceNormOpTrainingCase1(TestInstanceNormOpTraining): - def init_test_case(self): - self.shape = [2, 3, 4, 5] - self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} - self.fetch_list = ['y', 'saved_mean', 'saved_variance', 'x@GRAD'] - - -class TestInstanceNormOpTrainingCase2(TestInstanceNormOpTraining): - def init_test_case(self): - self.shape = [20, 50, 4, 5] - self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} - self.fetch_list = ['y', 'saved_mean', 'saved_variance', 'x@GRAD'] - - -class TestInstanceNormOpError(unittest.TestCase): - def test_errors(self): - paddle.enable_static() - with program_guard(Program(), Program()): - # the input of instance_norm must be Variable. - x1 = base.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() - ) - self.assertRaises(TypeError, paddle.static.nn.instance_norm, x1) - - # the input dtype of instance_norm must be float32 or float64 - x2 = paddle.static.data( - name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32" - ) - self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2) - paddle.disable_static() - - -class TestInstanceNormOpErrorCase1(unittest.TestCase): - def test_errors(self): - paddle.enable_static() - with program_guard(Program(), Program()): - # the first dimension of input for instance_norm must between [2d, 5d] - x = paddle.static.data(name='x', shape=[3], dtype="float32") - self.assertRaises(ValueError, paddle.static.nn.instance_norm, x) - paddle.disable_static() - - class TestElasticNormOp(unittest.TestCase): def init_test_case(self): self.epsilon = 1e-5 diff --git a/test/deprecated/legacy_test/test_instance_norm_op_deprecated.py b/test/deprecated/legacy_test/test_instance_norm_op_deprecated.py new file mode 100644 index 0000000000000..cc8e56b8be5e8 --- /dev/null +++ b/test/deprecated/legacy_test/test_instance_norm_op_deprecated.py @@ -0,0 +1,271 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import Program, core, program_guard + + +def _reference_instance_norm_naive(x, scale, bias, epsilon, mean, var): + x_shape = x.shape + if len(x_shape) == 2: + x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) + n, c, h, w = x.shape + + mean_tile = np.reshape(mean, (n, c, 1, 1)) + mean_tile = np.tile(mean_tile, (1, 1, h, w)) + var_tile = np.reshape(var, (n, c, 1, 1)) + var_tile = np.tile(var_tile, (1, 1, h, w)) + + x_norm = (x - mean_tile) / np.sqrt(var_tile + epsilon) + scale_tile = np.reshape(scale, (1, c, 1, 1)) + scale_tile = np.tile(scale_tile, (n, 1, h, w)) + bias_tile = np.reshape(bias, (1, c, 1, 1)) + bias_tile = np.tile(bias_tile, (n, 1, h, w)) + y = scale_tile * x_norm + bias_tile + if len(x_shape) == 2: + y = np.reshape(y, x_shape) + return y, mean, var + + +def _reference_instance_norm_grad(x, d_y, scale, mean, var, epsilon): + # d_scale = sum(d_y * (x-mean) / sqrt(var+epsilon)) + # d_offset = sum(d_y) + # d_x = scale / sqrt(var+epsilon) * (d_y - np.mean(d_y, axis=(2,3)) - (x-mean)/sqrt(var+epsilon)* np.mean(y_grad * (x-mean)/sqrt(var+epsilon), axis=(2,3))) + n, c, h, w = x.shape + + d_bias = np.sum(d_y, axis=(0, 2, 3)) + + mean_tile = np.reshape(mean, (n, c, 1, 1)) + mean_tile = np.tile(mean_tile, (1, 1, h, w)) + var_tile = np.reshape(var, (n, c, 1, 1)) + var_tile = np.tile(var_tile, (1, 1, h, w)) + + d_scale = np.sum(d_y * (x - mean_tile) * var_tile, axis=(0, 2, 3)) + var_inv = var_tile + scale_tile = np.reshape(scale, (1, c, 1, 1)) + scale_tile = np.tile(scale_tile, (n, 1, h, w)) + + d_x = ( + scale_tile + * var_inv + * ( + d_y + - np.mean(d_y, axis=(2, 3), keepdims=True) + - (x - mean_tile) + * var_inv + * np.mean( + d_y * (x - mean_tile) * var_inv, axis=(2, 3), keepdims=True + ) + ) + ) + return d_x, d_scale, d_bias + + +def _cal_mean_variance(x, epsilon, mean_shape): + mean = np.reshape(np.mean(x, axis=(2, 3)), mean_shape) + var = np.reshape(np.var(x, axis=(2, 3)), mean_shape) + return mean, var + + +def instance_norm_wrapper(x, weight=None, bias=None, esp=1e-05): + return paddle.nn.functional.instance_norm( + x, None, None, weight, bias, True, 0.9, esp + ) + + +class TestInstanceNormOpTraining(unittest.TestCase): + def setUp(self): + self.epsilon = 1e-5 + self.init_test_case() + + def init_test_case(self): + self.shape = [2, 3, 4, 5] + self.no_grad_set = set() + self.fetch_list = [ + 'y', + 'saved_mean', + 'saved_variance', + 'x@GRAD', + 'scale@GRAD', + 'bias@GRAD', + ] + + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + np.testing.assert_allclose( + np.array(tensor), np_array, rtol=1e-05, atol=atol, err_msg=msg + ) + + def set_global_mean_var(self, mean_shape, x): + mean, variance = _cal_mean_variance(x, self.epsilon, mean_shape) + return mean, variance + + def test_forward_backward(self): + def test_with_place(place, shape): + paddle.enable_static() + epsilon = self.epsilon + n, c, h, w = shape[0], shape[1], shape[2], shape[3] + scale_shape = [c] + mean_shape = [n * c] + + np.random.seed() + x = np.random.random_sample(shape).astype(np.float32) + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) + mean, variance = self.set_global_mean_var(mean_shape, x) + d_y = np.random.random_sample(shape).astype(np.float32) + + y, saved_mean, variance_tmp = _reference_instance_norm_naive( + x, scale, bias, epsilon, mean, variance + ) + + saved_variance = 1 / np.sqrt(variance_tmp + epsilon) + + d_x, d_scale, d_bias = _reference_instance_norm_grad( + x, d_y, scale, saved_mean, saved_variance, epsilon + ) + + var_dict = locals() + var_dict['y@GRAD'] = d_y + var_dict['x@GRAD'] = d_x + var_dict['scale@GRAD'] = d_scale + var_dict['bias@GRAD'] = d_bias + + var_names = [ + 'x', + 'scale', + 'bias', + 'y', + 'saved_mean', + 'saved_variance', + ] + ground_truth = {name: var_dict[name] for name in var_names} + + program = base.Program() + with base.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape, + ) + in_op = block.append_op( + type="instance_norm", + inputs={ + "X": block.var("x"), + "Scale": block.var("scale"), + "Bias": block.var("bias"), + }, + outputs={ + "Y": block.var("y"), + "SavedMean": block.var("saved_mean"), + "SavedVariance": block.var("saved_variance"), + }, + attrs={ + "epsilon": epsilon, + }, + ) + + block.create_var(name="y@GRAD", dtype='float32', shape=y.shape) + + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + in_op.desc, self.no_grad_set, [] + ) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + program._sync_with_cpp() + + exe = base.Executor(place) + out = exe.run( + program, + feed={ + name: var_dict[name] + for name in ['x', 'scale', 'bias', 'y@GRAD'] + }, + fetch_list=self.fetch_list, + ) + + for id, name in enumerate(self.fetch_list): + self.__assert_close(var_dict[name], out[id], name) + print("op test forward passes: ", str(place)) + paddle.disable_static() + + places = [core.CPUPlace()] + + if core.is_compiled_with_cuda() and core.op_support_gpu( + "instance_norm" + ): + places.append(core.CUDAPlace(0)) + for place in places: + test_with_place(place, self.shape) + + +class TestInstanceNormOpTrainingCase1(TestInstanceNormOpTraining): + def init_test_case(self): + self.shape = [2, 3, 4, 5] + self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} + self.fetch_list = ['y', 'saved_mean', 'saved_variance', 'x@GRAD'] + + +class TestInstanceNormOpTrainingCase2(TestInstanceNormOpTraining): + def init_test_case(self): + self.shape = [20, 50, 4, 5] + self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} + self.fetch_list = ['y', 'saved_mean', 'saved_variance', 'x@GRAD'] + + +class TestInstanceNormOpError(unittest.TestCase): + def test_errors(self): + paddle.enable_static() + with program_guard(Program(), Program()): + # the input of instance_norm must be Variable. + x1 = base.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() + ) + self.assertRaises(TypeError, paddle.static.nn.instance_norm, x1) + + # the input dtype of instance_norm must be float32 or float64 + x2 = paddle.static.data( + name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32" + ) + self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2) + paddle.disable_static() + + +class TestInstanceNormOpErrorCase1(unittest.TestCase): + def test_errors(self): + paddle.enable_static() + with program_guard(Program(), Program()): + # the first dimension of input for instance_norm must between [2d, 5d] + x = paddle.static.data(name='x', shape=[3], dtype="float32") + self.assertRaises(ValueError, paddle.static.nn.instance_norm, x) + paddle.disable_static() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_inverse_op.py b/test/deprecated/legacy_test/test_inverse_op.py index 22810eecee07d..54f8466bd4d02 100644 --- a/test/deprecated/legacy_test/test_inverse_op.py +++ b/test/deprecated/legacy_test/test_inverse_op.py @@ -35,6 +35,12 @@ def setUp(self): np.random.seed(123) mat = np.random.random(self.matrix_shape).astype(self.dtype) + if self.dtype == 'complex64' or self.dtype == 'complex128': + mat = ( + np.random.random(self.matrix_shape) + + 1j * np.random.random(self.matrix_shape) + ).astype(self.dtype) + inverse = np.linalg.inv(mat) self.inputs = {'Input': mat} @@ -92,6 +98,26 @@ def config(self): self.python_api = paddle.tensor.math.inverse +class TestInverseOpComplex64(TestInverseOp): + def config(self): + self.matrix_shape = [10, 10] + self.dtype = "complex64" + self.python_api = paddle.tensor.math.inverse + + def test_grad(self): + self.check_grad(['Input'], 'Output', check_pir=True) + + +class TestInverseOpComplex128(TestInverseOp): + def config(self): + self.matrix_shape = [10, 10] + self.dtype = "complex128" + self.python_api = paddle.tensor.math.inverse + + def test_grad(self): + self.check_grad(['Input'], 'Output', check_pir=True) + + class TestInverseAPI(unittest.TestCase): def setUp(self): np.random.seed(123) diff --git a/test/deprecated/legacy_test/test_layer_norm_op_deprecated.py b/test/deprecated/legacy_test/test_layer_norm_op_deprecated.py new file mode 100644 index 0000000000000..a4ab6d9e116c8 --- /dev/null +++ b/test/deprecated/legacy_test/test_layer_norm_op_deprecated.py @@ -0,0 +1,387 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import reduce +from operator import mul + +import numpy as np +from op_test import _set_use_system_allocator + +import paddle +from paddle import base +from paddle.base import core + +paddle.enable_static() + +np.random.seed(123) +paddle.seed(123) + +_set_use_system_allocator(True) + + +def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): + x_shape = x.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1) + x.shape = [N, D] + + mean = np.mean(x, axis=1) + var = np.var(x, axis=1) + epsilon + output = np.divide( + (x - mean.reshape([N, 1])), (np.sqrt(var)).reshape([N, 1]) + ) + if scale is not None: + output = scale.reshape([1, D]) * output + if beta is not None: + output = output + beta.reshape([1, D]) + + x.shape, output.shape = x_shape, x_shape + return output, mean, var + + +def _reference_layer_norm_grad( + x, grad_y, scale, bias, mean, var, begin_norm_axis=1 +): + x_shape = x.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1) + + if scale is not None: + scale_shape = scale.shape + scale.shape = [1, D] + x.shape, grad_y.shape = [N, D], [N, D] + var.shape, mean.shape = [N, 1], [N, 1] + + # d_bias + if bias is not None: + d_bias = np.sum(grad_y, axis=0).reshape([1, D]) + else: + d_bias = None + # d_scale + if scale is not None: + d_scale = np.sum( + ((x - mean) * np.sqrt(1 / var)) * grad_y, axis=0 + ).reshape([1, D]) + else: + d_scale = None + # dx + if scale is not None: + dx_end = scale * np.sqrt(1.0 / var) * grad_y + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape( + [N, 1] + ) # the second part equals to zero. + d_mean = 1.0 / D * d_mean_0 + d_std = np.sum( + -(1.0 / var) * (x - mean) * grad_y * scale, axis=1 + ).reshape([N, 1]) * ( + 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean) + ) + else: + dx_end = 1.0 * np.sqrt(1.0 / var) * grad_y + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * 1.0, axis=1).reshape( + [N, 1] + ) # the second part equals to zero. + d_mean = 1.0 / D * d_mean_0 + d_std = np.sum( + -(1.0 / var) * (x - mean) * grad_y * 1.0, axis=1 + ).reshape([N, 1]) * ( + 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean) + ) + + grad_x = dx_end + d_mean + d_std + + grad_x.shape, x.shape, grad_y.shape = x_shape, x_shape, x_shape + var.shape, mean.shape = [N], [N] + + if scale is not None: + scale.shape = scale_shape + return grad_x, d_scale, d_bias + + +def layer_norm_wrapper( + x, scale=None, bias=None, epsilon=1e-05, begin_norm_axis=1 +): + input_shape = list(x.shape) + normalized_shape = input_shape[begin_norm_axis:] + return paddle.nn.functional.layer_norm( + x, normalized_shape, weight=scale, bias=bias, epsilon=epsilon + ) + + +class TestLayerNormOp(unittest.TestCase): + def setUp(self): + self.use_cudnn = True + paddle.enable_static() + + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + np.testing.assert_allclose( + np.array(tensor).flatten(), + np_array.flatten(), + rtol=1e-3, + atol=atol, + err_msg=msg, + ) + + def check_forward_backward( + self, + shape, + begin_norm_axis, + has_scale=True, + has_bias=True, + y_grad_scale=1.0, + use_mkldnn=False, + ): + def test_with_place( + place, shape, begin_norm_axis, use_mkldnn=use_mkldnn + ): + # attr + epsilon = 0.00001 + x_shape = shape + D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1) + scale_shape = [D] + + np.random.seed(123) + x = np.random.random_sample(x_shape).astype(np.float32) + scale = ( + np.random.random_sample(scale_shape).astype(np.float32) + if has_scale + else None + ) + bias = ( + np.random.random_sample(scale_shape).astype(np.float32) + if has_bias + else None + ) + y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( + np.float32 + ) + + # reference forward & backward + y, mean, variance = _reference_layer_norm_naive( + x, scale, bias, epsilon, begin_norm_axis + ) + x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( + x, y_grad, scale, bias, mean, variance, begin_norm_axis + ) + + var_dict = locals() + var_dict['y@GRAD'] = y_grad + var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD'] + if has_scale: + var_names += ['scale'] + if has_bias: + var_names += ['bias'] + ground_truth = {name: var_dict[name] for name in var_names} + + program = base.Program() + with base.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape, + ) + inputs = {"X": block.var('x')} + fetch_list = [ + 'y', + 'mean', + 'variance', + 'x@GRAD', + ] + if has_scale: + inputs["Scale"] = block.var('scale') + fetch_list += ['scale@GRAD'] + if has_bias: + inputs["Bias"] = block.var('bias') + fetch_list += ['bias@GRAD'] + layer_norm_op = block.append_op( + type="layer_norm", + inputs=inputs, + outputs={ + "Y": block.var('y'), + "Mean": block.var('mean'), # share the same memory + "Variance": block.var( + 'variance' + ), # share the same memory + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis, + "use_mkldnn": use_mkldnn, + }, + ) + # generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + layer_norm_op.desc, set(), [] + ) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + program._sync_with_cpp() + exe = base.Executor(place) + name_list = ['x', 'y@GRAD'] + if has_scale: + name_list += ['scale'] + if has_bias: + name_list += ['bias'] + + out = exe.run( + program, + feed={name: var_dict[name] for name in name_list}, + fetch_list=fetch_list, + ) + # print(y) + # print(out[0]) + self.__assert_close(y, out[0], "y") + self.__assert_close(mean, out[1], "mean") + self.__assert_close(variance, out[2], "variance", 1e-3) + self.__assert_close(x_grad, out[3], "x_grad") + if has_scale: + self.__assert_close( + scale_grad, + out[fetch_list.index('scale@GRAD')], + "scale_grad", + 1e-3, + ) + if has_bias: + self.__assert_close( + bias_grad, + out[fetch_list.index('bias@GRAD')], + "bias_grad", + ) + + places = [core.CPUPlace()] + if ( + core.is_compiled_with_cuda() + and core.op_support_gpu("layer_norm") + and self.use_cudnn + ): + places.append(core.CUDAPlace(0)) + + for place in places: + test_with_place(place, shape, begin_norm_axis) + + def test_check_forward_backward_with_scale_and_bias(self): + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) + self.check_forward_backward(shape=[1, 3, 4, 5], begin_norm_axis=1) + self.check_forward_backward( + shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=True, + ) + self.check_forward_backward( + shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=True, + has_bias=False, + ) + self.check_forward_backward( + shape=[2, 3, 4, 5], + begin_norm_axis=1, + has_scale=False, + has_bias=False, + ) + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) + self.check_forward_backward( + shape=[92, 513, 129], begin_norm_axis=2, y_grad_scale=0.1 + ) + self.check_forward_backward(shape=[3, 34, 1134], begin_norm_axis=2) + self.check_forward_backward(shape=[3, 2, 1133], begin_norm_axis=2) + self.check_forward_backward( + shape=[92, 513, 1134], begin_norm_axis=2, y_grad_scale=0.1 + ) + self.check_forward_backward( + shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=True, + y_grad_scale=0.1, + ) + self.check_forward_backward( + shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=True, + has_bias=False, + y_grad_scale=0.1, + ) + self.check_forward_backward( + shape=[92, 513, 1134], + begin_norm_axis=2, + has_scale=False, + has_bias=False, + y_grad_scale=0.1, + ) + self.check_forward_backward( + shape=[512, 1024], begin_norm_axis=1, has_scale=True, has_bias=True + ) + self.check_forward_backward( + shape=[1, 128, 256, 256], + begin_norm_axis=3, + has_scale=True, + has_bias=True, + ) + self.check_forward_backward( + shape=[1, 256, 384], + begin_norm_axis=2, + has_scale=True, + has_bias=True, + ) + + +class TestLayerNormAPI(unittest.TestCase): + def test_case(self): + x = paddle.static.data(name='x', shape=[64, 32, 256], dtype='float32') + x = paddle.static.nn.layer_norm( + x, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + ) + x = paddle.static.nn.layer_norm( + x, + scale=False, + shift=False, + begin_norm_axis=1, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + ) + x = paddle.static.nn.layer_norm( + x, + scale=True, + shift=True, + begin_norm_axis=1, + epsilon=1e-05, + param_attr="scale", + bias_attr="shift", + ) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_math_op_patch.py b/test/deprecated/legacy_test/test_math_op_patch.py index fe0708098fb72..49331d3fc0955 100644 --- a/test/deprecated/legacy_test/test_math_op_patch.py +++ b/test/deprecated/legacy_test/test_math_op_patch.py @@ -19,8 +19,6 @@ import paddle from paddle import base -from paddle.framework import in_pir_mode -from paddle.pir_utils import test_with_pir_api class TestMathOpPatches(unittest.TestCase): @@ -232,32 +230,6 @@ def test_equal(self): np.testing.assert_array_equal(c_np, a_np == b_np) self.assertEqual(c.dtype, paddle.bool) - @prog_scope() - @test_with_pir_api - def test_equal_and_cond(self): - a = paddle.static.data(name="a", shape=[-1, 1], dtype='float32') - b = paddle.static.data(name="b", shape=[-1, 1], dtype='float32') - if not in_pir_mode(): - a.desc.set_need_check_feed(False) - b.desc.set_need_check_feed(False) - one = paddle.ones(shape=[1], dtype='int32') - zero = paddle.zeros(shape=[1], dtype='int32') - cond = one == zero - c = paddle.static.nn.cond(cond, lambda: a + b, lambda: a - b) - - place = base.CPUPlace() - exe = base.Executor(place) - a_np = np.array([3, 4, 10, 14, 9, 18]).astype('float32') - b_np = np.array([3, 4, 11, 15, 8, 18]).astype('float32') - - (c_np,) = exe.run( - paddle.static.default_main_program(), - feed={"a": a_np, "b": b_np}, - fetch_list=[c], - ) - - np.testing.assert_array_equal(c_np, a_np - b_np) - @prog_scope() def test_neg(self): a = paddle.static.data(name="a", shape=[-1, 10, 1], dtype='float32') diff --git a/test/deprecated/legacy_test/test_math_op_patch_deprecated.py b/test/deprecated/legacy_test/test_math_op_patch_deprecated.py new file mode 100644 index 0000000000000..982c439d99828 --- /dev/null +++ b/test/deprecated/legacy_test/test_math_op_patch_deprecated.py @@ -0,0 +1,60 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from decorator_helper import prog_scope + +import paddle +from paddle import base +from paddle.framework import in_pir_mode +from paddle.pir_utils import test_with_pir_api + + +class TestMathOpPatches(unittest.TestCase): + @classmethod + def setUp(self): + np.random.seed(1024) + paddle.enable_static() + + @prog_scope() + @test_with_pir_api + def test_equal_and_cond(self): + a = paddle.static.data(name="a", shape=[-1, 1], dtype='float32') + b = paddle.static.data(name="b", shape=[-1, 1], dtype='float32') + if not in_pir_mode(): + a.desc.set_need_check_feed(False) + b.desc.set_need_check_feed(False) + one = paddle.ones(shape=[1], dtype='int32') + zero = paddle.zeros(shape=[1], dtype='int32') + cond = one == zero + c = paddle.static.nn.cond(cond, lambda: a + b, lambda: a - b) + + place = base.CPUPlace() + exe = base.Executor(place) + a_np = np.array([3, 4, 10, 14, 9, 18]).astype('float32') + b_np = np.array([3, 4, 11, 15, 8, 18]).astype('float32') + + (c_np,) = exe.run( + paddle.static.default_main_program(), + feed={"a": a_np, "b": b_np}, + fetch_list=[c], + ) + + np.testing.assert_array_equal(c_np, a_np - b_np) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_merged_momentum_op.py b/test/deprecated/legacy_test/test_merged_momentum_op_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_merged_momentum_op.py rename to test/deprecated/legacy_test/test_merged_momentum_op_deprecated.py diff --git a/test/deprecated/legacy_test/test_momentum_op.py b/test/deprecated/legacy_test/test_momentum_op.py index de8ba1886598e..c48601326f4bd 100644 --- a/test/deprecated/legacy_test/test_momentum_op.py +++ b/test/deprecated/legacy_test/test_momentum_op.py @@ -14,7 +14,6 @@ import unittest -import numpy import numpy as np from op import Operator from op_test import OpTest @@ -1035,80 +1034,6 @@ def test_main(self): self._check_with_param_group(place, use_amp) -class TestMultiTensorMomentumStatic(unittest.TestCase): - def _momentum_optimize_static( - self, place, use_amp=False, use_multi_tensor=False - ): - paddle.enable_static() - paddle.seed(10) - np.random.seed(10) - if place == 'cpu': - use_amp = False - exe = paddle.static.Executor(place=place) - train_program = paddle.static.Program() - startup_program = paddle.static.Program() - optimizer = paddle.optimizer.Momentum( - multi_precision=use_amp, use_multi_tensor=use_multi_tensor - ) - if use_amp: - optimizer = paddle.static.amp.decorate( - optimizer, - init_loss_scaling=128.0, - use_dynamic_loss_scaling=True, - use_pure_fp16=True, - use_fp16_guard=False, - ) - with paddle.static.program_guard(train_program, startup_program): - if use_amp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16' - ) - else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32' - ) - hidden = paddle.static.nn.fc(x=data, size=10) - loss = paddle.mean(hidden) - optimizer.minimize(loss) - exe.run(startup_program) - if use_amp: - optimizer.amp_init( - place=paddle.CUDAPlace(0), scope=paddle.static.global_scope() - ) - x = numpy.random.random(size=(2, 2)).astype('float16') - else: - x = numpy.random.random(size=(2, 2)).astype('float32') - out = [] - for idx in range(5): - (loss_data,) = exe.run( - train_program, feed={"X": x}, fetch_list=[loss] - ) - out.append(loss_data) - return out - - def _get_places(self): - places = ['cpu'] - if paddle.is_compiled_with_cuda(): - places.append('gpu') - return places - - def _check_with_place_amp(self, place, use_amp): - output1 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=True - ) - output2 = self._momentum_optimize_static( - place=place, use_amp=use_amp, use_multi_tensor=False - ) - for idx in range(len(output1)): - np.testing.assert_allclose(output1[idx], output2[idx], rtol=1e-05) - - def test_main(self): - for place in self._get_places(): - use_amp_list = [True, False] - for use_amp in use_amp_list: - self._check_with_place_amp(place, use_amp) - - if __name__ == "__main__": paddle.enable_static() unittest.main() diff --git a/test/deprecated/legacy_test/test_momentum_op_deprecated.py b/test/deprecated/legacy_test/test_momentum_op_deprecated.py new file mode 100644 index 0000000000000..65c5e584d0c5f --- /dev/null +++ b/test/deprecated/legacy_test/test_momentum_op_deprecated.py @@ -0,0 +1,157 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy +import numpy as np + +import paddle + + +def calculate_momentum_by_numpy( + param, + grad, + mu, + velocity, + use_nesterov, + learning_rate, + regularization_method=None, + regularization_coeff=1.0, +): + if regularization_method == "l2_decay": + grad = grad + regularization_coeff * param + + velocity_out = mu * velocity + grad + if use_nesterov: + param_out = param - (grad + velocity_out * mu) * learning_rate + else: + param_out = param - learning_rate * velocity_out + else: + velocity_out = mu * velocity + grad + if use_nesterov: + param_out = ( + param - grad * learning_rate - velocity_out * mu * learning_rate + ) + else: + param_out = param - learning_rate * velocity_out + + return param_out, velocity_out + + +def momentum_wrapper( + param, + grad, + velocity, + learning_rate=1.0, + master_param=None, + mu=0.0, + use_nesterov=False, + regularization_method="", + regularization_coeff=0.0, + multi_precision=False, + rescale_grad=1.0, +): + return paddle._C_ops.momentum_( + param, + grad, + velocity, + learning_rate, + master_param, + mu, + use_nesterov, + regularization_method, + regularization_coeff, + multi_precision, + rescale_grad, + ) + + +class TestMultiTensorMomentumStatic(unittest.TestCase): + def _momentum_optimize_static( + self, place, use_amp=False, use_multi_tensor=False + ): + paddle.enable_static() + paddle.seed(10) + np.random.seed(10) + if place == 'cpu': + use_amp = False + exe = paddle.static.Executor(place=place) + train_program = paddle.static.Program() + startup_program = paddle.static.Program() + optimizer = paddle.optimizer.Momentum( + multi_precision=use_amp, use_multi_tensor=use_multi_tensor + ) + if use_amp: + optimizer = paddle.static.amp.decorate( + optimizer, + init_loss_scaling=128.0, + use_dynamic_loss_scaling=True, + use_pure_fp16=True, + use_fp16_guard=False, + ) + with paddle.static.program_guard(train_program, startup_program): + if use_amp: + data = paddle.static.data( + shape=[2, 2], name='X', dtype='float16' + ) + else: + data = paddle.static.data( + shape=[2, 2], name='X', dtype='float32' + ) + hidden = paddle.static.nn.fc(x=data, size=10) + loss = paddle.mean(hidden) + optimizer.minimize(loss) + exe.run(startup_program) + if use_amp: + optimizer.amp_init( + place=paddle.CUDAPlace(0), scope=paddle.static.global_scope() + ) + x = numpy.random.random(size=(2, 2)).astype('float16') + else: + x = numpy.random.random(size=(2, 2)).astype('float32') + out = [] + for idx in range(5): + (loss_data,) = exe.run( + train_program, feed={"X": x}, fetch_list=[loss] + ) + out.append(loss_data) + return out + + def _get_places(self): + places = ['cpu'] + if paddle.is_compiled_with_cuda(): + places.append('gpu') + return places + + def _check_with_place_amp(self, place, use_amp): + output1 = self._momentum_optimize_static( + place=place, use_amp=use_amp, use_multi_tensor=True + ) + output2 = self._momentum_optimize_static( + place=place, use_amp=use_amp, use_multi_tensor=False + ) + for idx in range(len(output1)): + np.testing.assert_allclose(output1[idx], output2[idx], rtol=1e-05) + + def test_main(self): + for place in self._get_places(): + use_amp_list = [True, False] + for use_amp in use_amp_list: + self._check_with_place_amp(place, use_amp) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_multinomial_op.py b/test/deprecated/legacy_test/test_multinomial_op.py index f6fc6e281193b..48c00ed5506e5 100644 --- a/test/deprecated/legacy_test/test_multinomial_op.py +++ b/test/deprecated/legacy_test/test_multinomial_op.py @@ -17,7 +17,7 @@ import numpy as np from op_test import OpTest, convert_float_to_uint16 -from test_attribute_var import UnittestBase +from test_attribute_var_deprecated import UnittestBase import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_name_scope.py b/test/deprecated/legacy_test/test_name_scope_deprecated.py similarity index 98% rename from test/deprecated/legacy_test/test_name_scope.py rename to test/deprecated/legacy_test/test_name_scope_deprecated.py index 4b3e5dd0ff9df..e0822313ef27a 100644 --- a/test/deprecated/legacy_test/test_name_scope.py +++ b/test/deprecated/legacy_test/test_name_scope_deprecated.py @@ -17,6 +17,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestNameScope(unittest.TestCase): def test_name_scope(self): diff --git a/test/deprecated/legacy_test/test_nce.py b/test/deprecated/legacy_test/test_nce_deprecated.py similarity index 79% rename from test/deprecated/legacy_test/test_nce.py rename to test/deprecated/legacy_test/test_nce_deprecated.py index 1091f706d1935..fbfea5a4359cd 100644 --- a/test/deprecated/legacy_test/test_nce.py +++ b/test/deprecated/legacy_test/test_nce_deprecated.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, paddle_static_guard +from op_test import paddle_static_guard import paddle from paddle import base @@ -66,94 +66,6 @@ def nce( ) -class TestNCE(OpTest): - def generate_data( - self, - dim, - batch_size, - num_classes, - num_true_class, - num_neg_samples, - is_sparse, - ): - input = np.random.randn(batch_size, dim).astype(np.float32) - weight = np.random.randn(num_classes, dim).astype(np.float32) - bias = np.random.randn(num_classes).astype(np.float32) - sample_weight = np.random.randn(batch_size).astype(np.float32) - labels = np.random.randint( - 0, num_classes, (batch_size, num_true_class) - ).astype("int64") - self.attrs = { - 'num_total_classes': num_classes, - 'num_neg_samples': num_neg_samples, - 'custom_neg_classes': list(range(num_neg_samples)), - 'seed': 0, - 'sampler': 0, - 'is_sparse': is_sparse, - 'is_test': self.is_test, - } - self.inputs = { - 'Input': input, - 'Label': labels, - 'Weight': weight, - 'Bias': bias, - 'SampleWeight': sample_weight, - } - - def set_is_test(self): - self.is_test = False - - def set_data(self): - self.generate_data(5, 25, 100, 1, 2, False) - - def compute(self): - out = nce( - self.inputs['Input'], - self.inputs['Weight'], - self.inputs['Bias'], - self.inputs['SampleWeight'], - self.inputs['Label'], - self.attrs['num_total_classes'], - self.attrs['num_neg_samples'], - ) - if self.is_test: - self.outputs = {'Cost': out[0]} - else: - self.outputs = { - 'Cost': out[0], - 'SampleLogits': out[1], - 'SampleLabels': out[2], - } - - def setUp(self): - self.op_type = 'nce' - self.set_is_test() - self.set_data() - self.compute() - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad( - ["Input", "Weight", "Bias"], "Cost", max_relative_error=0.02 - ) - - -class TestNCECase1Tensor(TestNCE): - def set_data(self): - self.generate_data(10, 20, 100, 2, 5, False) - - -class TestNCETensorIsTest(TestNCE): - # if is_test = True, there's no need to calculate grad - def set_is_test(self): - self.is_test = True - - def test_check_grad(self): - pass - - class TestNCECase1SelectedRows(unittest.TestCase): def setUp(self): self.base_lr = 0.0001 diff --git a/test/deprecated/legacy_test/test_one_hot_v2_op.py b/test/deprecated/legacy_test/test_one_hot_v2_op.py index 760d96858c3fa..b19bb8b6d2fb8 100644 --- a/test/deprecated/legacy_test/test_one_hot_v2_op.py +++ b/test/deprecated/legacy_test/test_one_hot_v2_op.py @@ -54,6 +54,37 @@ def test_check_output(self): self.check_output(check_cinn=True, check_prim_pir=True) +class TestOneHotOp_dims(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + self.prim_op_type = "comp" + self.python_api = one_hot_wrapper + self.public_python_api = one_hot_wrapper + self.python_out_sig = ['Out'] + depth = 10 + depth_np = np.array(10).astype('int32') + x_shape = [5, 10, 7, 3] + x = [np.random.randint(0, depth - 1) for i in range(np.prod(x_shape))] + x = np.array(x).astype('int32').reshape(x_shape) + + out = np.zeros(shape=(np.prod(x.shape), depth)).astype('float32') + + r_x = np.reshape(x, np.prod(x.shape)) + for i in range(np.prod(x.shape)): + out[i, r_x[i]] = 1.0 + + shape_np = list(x.shape) + shape_np.append(depth) + out = np.reshape(out, shape_np) + + self.inputs = {'X': x, 'depth_tensor': depth_np} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output(check_cinn=True, check_prim_pir=True) + + class TestOneHotOp_attr(OpTest): def setUp(self): self.op_type = 'one_hot_v2' diff --git a/test/deprecated/legacy_test/test_optimizer.py b/test/deprecated/legacy_test/test_optimizer_deprecated.py similarity index 88% rename from test/deprecated/legacy_test/test_optimizer.py rename to test/deprecated/legacy_test/test_optimizer_deprecated.py index c7e6d21124176..f87f348d456ae 100644 --- a/test/deprecated/legacy_test/test_optimizer.py +++ b/test/deprecated/legacy_test/test_optimizer_deprecated.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import tempfile import unittest -import numpy import numpy as np import paddle @@ -25,10 +22,10 @@ from paddle.base.backward import append_backward from paddle.base.framework import ( Program, - convert_np_dtype_to_dtype_, program_guard, ) -from paddle.io import Dataset + +paddle.enable_static() class TestOptimizer(unittest.TestCase): @@ -1012,142 +1009,6 @@ def test_program_desc( ) -class TestOptimizerDtype(unittest.TestCase): - ''' - The dtype of optimizer should be inferred by parameters, and the learning rate - is cteated with the same dtype. - ''' - - def check_with_dtype(self, dtype): - class MyLayer(paddle.nn.Layer): - def __init__(self, dtype): - super().__init__() - self._w = self.create_parameter([2, 3], dtype=dtype) - self._b = self.create_parameter([2, 3], dtype=dtype) - - def forward(self, x): - return x * self._w + self._b - - with paddle.base.dygraph.guard(): - model = MyLayer(dtype) - x = paddle.rand([10, 2, 3], dtype=dtype) - loss = model(x) - adam = paddle.optimizer.Adam(parameters=model.parameters()) - loss.backward() - adam.step() - self.assertEqual(adam._dtype, convert_np_dtype_to_dtype_(dtype)) - - def test_float64(self): - self.check_with_dtype('float64') - - def test_float32(self): - self.check_with_dtype('float32') - - -@unittest.skipIf( - not core.is_compiled_with_cuda() - or paddle.device.cuda.get_device_capability()[0] < 7.0, - "run test when gpu's compute capability is at least 7.0.", -) -class TestMasterWeightSaveForFP16(unittest.TestCase): - ''' - For Amp-O2, some optimizer(Momentum, Adam ...) will create master weights for parameters to improve the accuracy. - Master weights will be saved by optimizer::state_dict. - ''' - - def setUp(self): - self.temp_dir = tempfile.TemporaryDirectory() - - def tearDown(self): - self.temp_dir.cleanup() - - def check_with_opt_state_dict(self, use_save_load=True): - paddle.seed(100) - numpy.random.seed(100) - - class SimpleNet(paddle.nn.Layer): - def __init__(self, input_size, output_size): - super().__init__() - self.linears = paddle.nn.LayerList( - [ - paddle.nn.Linear(input_size, output_size) - for i in range(1) - ] - ) - - def forward(self, x): - for i, l in enumerate(self.linears): - x = self.linears[i](x) - return x - - input_size = 2 # 设为较大的值 - output_size = 2 # 设为较大的值 - batch_size = 2 # batch_size 为8的倍数 - nums_batch = 10 - - class RandomDataset(Dataset): - def __init__(self, num_samples): - self.num_samples = num_samples - - def __getitem__(self, idx): - data = numpy.random.random([input_size]).astype('float16') - label = numpy.random.random([output_size]).astype('float16') - return data, label - - def __len__(self): - return self.num_samples - - dataset = RandomDataset(nums_batch * batch_size) - loader = paddle.io.DataLoader( - dataset, - batch_size=batch_size, - shuffle=False, - drop_last=True, - num_workers=0, - ) - - mse = paddle.nn.MSELoss() - model = SimpleNet(input_size, output_size) # 定义模型 - optimizer = paddle.optimizer.Momentum( - learning_rate=0.0001, - parameters=model.parameters(), - multi_precision=True, - ) # 定义优化器 - scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - model = paddle.amp.decorate(models=model, level='O2') - - for i, (data, label) in enumerate(loader): - with paddle.amp.auto_cast(level='O2'): - output = model(data) - loss = mse(output, label) - scaled = scaler.scale(loss) - scaled.backward() - scaler.step(optimizer) - scaler.update() - optimizer.clear_grad(set_to_zero=False) - - if use_save_load and i == 5: - model_path = os.path.join(self.temp_dir.name, "model.pdparams") - optimizer_path = os.path.join(self.temp_dir.name, "opt.pdopt") - paddle.save(model.state_dict(), model_path) - paddle.save(optimizer.state_dict(), optimizer_path) - model.set_state_dict(paddle.load(model_path)) - optimizer.set_state_dict(paddle.load(optimizer_path)) - - return loss.numpy() - - def test_with_state_dict(self): - if core.is_compiled_with_cuda(): - with base.dygraph.guard(): - out_use_state_dict = self.check_with_opt_state_dict( - use_save_load=True - ) - out_no_state_dict = self.check_with_opt_state_dict( - use_save_load=False - ) - np.testing.assert_array_equal(out_use_state_dict, out_no_state_dict) - - if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/test/deprecated/legacy_test/test_optimizer_in_control_flow.py b/test/deprecated/legacy_test/test_optimizer_in_control_flow_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_optimizer_in_control_flow.py rename to test/deprecated/legacy_test/test_optimizer_in_control_flow_deprecated.py diff --git a/test/deprecated/legacy_test/test_prelu_op_deprecated.py b/test/deprecated/legacy_test/test_prelu_op_deprecated.py new file mode 100644 index 0000000000000..f329a58ecd15f --- /dev/null +++ b/test/deprecated/legacy_test/test_prelu_op_deprecated.py @@ -0,0 +1,86 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import Program, core + +paddle.enable_static() + + +def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): + helper = base.layer_helper.LayerHelper('prelu', **locals()) + alpha_shape = [1, x.shape[1], 1, 1] + dtype = helper.input_dtype(input_param_name='x') + alpha = helper.create_parameter( + attr=helper.param_attr, + shape=alpha_shape, + dtype='float32', + is_bias=False, + default_initializer=paddle.nn.initializer.Constant(0.25), + ) + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type="prelu", + inputs={"X": x, 'Alpha': alpha}, + attrs={"mode": mode, 'data_format': data_format}, + outputs={"Out": out}, + ) + return out + + +# error message test if mode is not one of 'all', 'channel', 'element' +class TestModeError(unittest.TestCase): + def setUp(self): + self.place = ( + paddle.CUDAPlace(0) + if core.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + self.x_np = np.ones([1, 2, 3, 4]).astype('float32') + + def test_mode_error(self): + main_program = Program() + with base.program_guard(main_program, Program()): + x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) + try: + y = prelu_t(x, 'any') + except Exception as e: + assert e.args[0].find('InvalidArgument') != -1 + + def test_data_format_error1(self): + main_program = Program() + with base.program_guard(main_program, Program()): + x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) + try: + y = prelu_t(x, 'channel', data_format='N') + except Exception as e: + assert e.args[0].find('InvalidArgument') != -1 + + def test_data_format_error2(self): + main_program = Program() + with base.program_guard(main_program, Program()): + x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) + try: + y = paddle.static.nn.prelu(x, 'channel', data_format='N') + except ValueError as e: + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_program_code.py b/test/deprecated/legacy_test/test_program_code_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_program_code.py rename to test/deprecated/legacy_test/test_program_code_deprecated.py diff --git a/test/deprecated/legacy_test/test_program.py b/test/deprecated/legacy_test/test_program_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_program.py rename to test/deprecated/legacy_test/test_program_deprecated.py diff --git a/test/deprecated/legacy_test/test_program_prune_backward.py b/test/deprecated/legacy_test/test_program_prune_backward_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_program_prune_backward.py rename to test/deprecated/legacy_test/test_program_prune_backward_deprecated.py index 8bf154f934b96..dcd514b471415 100755 --- a/test/deprecated/legacy_test/test_program_prune_backward.py +++ b/test/deprecated/legacy_test/test_program_prune_backward_deprecated.py @@ -26,6 +26,8 @@ from paddle.base import core from paddle.dataset import wmt16 +paddle.enable_static() + DeviceType = core.DeviceType diff --git a/test/deprecated/legacy_test/test_program_to_string.py b/test/deprecated/legacy_test/test_program_to_string_deprecated.py similarity index 98% rename from test/deprecated/legacy_test/test_program_to_string.py rename to test/deprecated/legacy_test/test_program_to_string_deprecated.py index c6524d9cf5d92..52768d4600785 100644 --- a/test/deprecated/legacy_test/test_program_to_string.py +++ b/test/deprecated/legacy_test/test_program_to_string_deprecated.py @@ -17,6 +17,8 @@ import paddle from paddle import base +paddle.enable_static() + class TestProgram(unittest.TestCase): def test_program_to_string(self): diff --git a/test/deprecated/legacy_test/test_prune.py b/test/deprecated/legacy_test/test_prune_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_prune.py rename to test/deprecated/legacy_test/test_prune_deprecated.py index f82a4d4331b09..47f0d3d749701 100644 --- a/test/deprecated/legacy_test/test_prune.py +++ b/test/deprecated/legacy_test/test_prune_deprecated.py @@ -21,6 +21,8 @@ from paddle import base from paddle.base import framework +paddle.enable_static() + class TestPruneBase(unittest.TestCase): def run_net(self, net): diff --git a/test/deprecated/legacy_test/test_py_func_op.py b/test/deprecated/legacy_test/test_py_func_op_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_py_func_op.py rename to test/deprecated/legacy_test/test_py_func_op_deprecated.py index 3fa249935406f..89ad64aa7d4ab 100644 --- a/test/deprecated/legacy_test/test_py_func_op.py +++ b/test/deprecated/legacy_test/test_py_func_op_deprecated.py @@ -20,6 +20,8 @@ import paddle from paddle import base +paddle.enable_static() + dev_cnt = 2 if base.core.is_compiled_with_cuda(): dev_cnt = base.core.get_cuda_device_count() diff --git a/test/deprecated/legacy_test/test_random_seed_deprecated.py b/test/deprecated/legacy_test/test_random_seed_deprecated.py new file mode 100644 index 0000000000000..ee1dd64b81ee3 --- /dev/null +++ b/test/deprecated/legacy_test/test_random_seed_deprecated.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test cloud role maker.""" + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core + + +class TestGeneratorSeed(unittest.TestCase): + # """ + # Test cases for cpu generator seed. + # """ + def test_gen_TruncatedNormal_initializer(self): + base.disable_dygraph() + + gen = paddle.seed(123123143) + cur_state = gen.get_state() + + startup_program = base.Program() + train_program = base.Program() + with base.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + x = paddle.uniform(shape=[2, 10]) + result_1 = paddle.static.nn.fc( + x, + size=10, + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 + ), + ) + result_2 = paddle.static.nn.fc( + x, + size=10, + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 + ), + ) + + exe = base.Executor(base.CPUPlace()) + exe.run(startup_program) + out1 = exe.run( + train_program, feed={}, fetch_list=[result_1, result_2] + ) + + gen.manual_seed(123123143) + with base.program_guard(train_program, startup_program): + exe.run(startup_program) + out2 = exe.run( + train_program, feed={}, fetch_list=[result_1, result_2] + ) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if not core.is_compiled_with_cuda(): + print(">>>>>>> sampling id static >>>>>>>") + np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) + np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_regularizer_api_deprecated.py b/test/deprecated/legacy_test/test_regularizer_api_deprecated.py new file mode 100644 index 0000000000000..f42e07d3ae0cc --- /dev/null +++ b/test/deprecated/legacy_test/test_regularizer_api_deprecated.py @@ -0,0 +1,180 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import random +import unittest +from functools import partial + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core + + +def bow_net( + data, + label, + dict_dim, + is_sparse=False, + emb_dim=8, + hid_dim=8, + hid_dim2=6, + class_dim=2, +): + """ + BOW net + This model is from https://github.com/PaddlePaddle/models: + base/PaddleNLP/text_classification/nets.py + """ + emb = paddle.static.nn.embedding( + input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] + ) + bow = paddle.static.nn.sequence_lod.sequence_pool( + input=emb, pool_type='sum' + ) + bow_tanh = paddle.tanh(bow) + fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh") + fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh") + prediction = paddle.static.nn.fc( + x=[fc_2], size=class_dim, activation="softmax" + ) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) + avg_cost = paddle.mean(x=cost) + + return avg_cost + + +class TestRegularizer(unittest.TestCase): + def setUp(self): + self.word_len = 1500 + self.train_data = [ + [(random.sample(range(1000), 10), [0])] for _ in range(2) + ] + + def get_places(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places + + @contextlib.contextmanager + def scope_prog_guard(self, main_prog, startup_prog): + scope = base.core.Scope() + with base.unique_name.guard(): + with base.scope_guard(scope): + with base.program_guard(main_prog, startup_prog): + yield + + def run_program(self, place, feed_list): + exe = base.Executor(place) + feeder = base.DataFeeder(feed_list=feed_list, place=place) + exe.run(base.default_startup_program()) + + main_prog = base.default_main_program() + param_list = [var.name for var in main_prog.block(0).all_parameters()] + + param_sum = [] + for data in self.train_data: + out = exe.run( + main_prog, feed=feeder.feed(data), fetch_list=param_list + ) + p_sum = 0 + for v in out: + p_sum += np.sum(np.abs(v)) + param_sum.append(p_sum) + return param_sum + + def check_l2decay_regularizer(self, place, model): + paddle.seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = paddle.static.Program() + startup_prog = paddle.static.Program() + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog + ): + data = paddle.static.data( + name="words", shape=[-1, 1], dtype="int64", lod_level=1 + ) + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + + avg_cost = model(data, label, self.word_len) + + optimizer = paddle.optimizer.Adagrad( + learning_rate=0.1, + weight_decay=paddle.regularizer.L2Decay(1.0), + ) + optimizer.minimize(avg_cost) + param_sum = self.run_program(place, [data, label]) + return param_sum + + def check_l2decay(self, place, model): + paddle.seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = base.framework.Program() + startup_prog = base.framework.Program() + + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog + ): + data = paddle.static.data( + name="words", shape=[-1, 1], dtype="int64", lod_level=1 + ) + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + + avg_cost_l2 = model(data, label, self.word_len) + + param_list = base.default_main_program().block(0).all_parameters() + para_sum = [] + for para in param_list: + para_mul = paddle.square(x=para) + para_sum.append(paddle.sum(para_mul)) + avg_cost_l2 += paddle.add_n(para_sum) * 0.5 + + optimizer = paddle.optimizer.Adagrad(learning_rate=0.1) + optimizer.minimize(avg_cost_l2) + param_sum = self.run_program(place, [data, label]) + return param_sum + + def test_l2(self): + paddle.enable_static() + for place in self.get_places(): + dense_sparse_p_sum = [] + for sparse in [True, False]: + model = partial(bow_net, is_sparse=sparse) + framework_l2 = self.check_l2decay_regularizer(place, model) + l2 = self.check_l2decay(place, model) + assert len(l2) == len(framework_l2) + for i in range(len(l2)): + assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5) + dense_sparse_p_sum.append(framework_l2) + + assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1]) + for i in range(len(dense_sparse_p_sum[0])): + assert np.isclose( + a=dense_sparse_p_sum[0][i], + b=dense_sparse_p_sum[1][i], + rtol=5e-5, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_regularizer.py b/test/deprecated/legacy_test/test_regularizer_deprecated.py similarity index 70% rename from test/deprecated/legacy_test/test_regularizer.py rename to test/deprecated/legacy_test/test_regularizer_deprecated.py index 8a3cd3da9a049..1727fe8b4f5d0 100644 --- a/test/deprecated/legacy_test/test_regularizer.py +++ b/test/deprecated/legacy_test/test_regularizer_deprecated.py @@ -23,7 +23,6 @@ from paddle import base, regularizer from paddle.base import core, framework from paddle.base.backward import append_backward -from paddle.pir_utils import test_with_pir_api class TestL2Decay(unittest.TestCase): @@ -112,40 +111,6 @@ def test_l2decay_regularizer(self): self.assertEqual(block.ops[-2].type, 'scale') self.assertEqual(block.ops[-3].type, 'sign') - def test_l1decay_regularizer(self): - with paddle.pir_utils.IrGuard(): - main_program = paddle.static.Program() - with paddle.static.program_guard(main_program): - block = main_program.global_block() - mul_x = paddle.pir.core.create_parameter( - dtype="float32", - shape=[5, 10], - name="mul.x", - regularizer=regularizer.L1Decay(0.5), - initializer=paddle.nn.initializer.Constant(1), - ) - self.assertIsNotNone(mul_x.regularizer) - self.assertTrue( - isinstance(mul_x.regularizer, regularizer.L1Decay) - ) - - mul_y = paddle.static.data( - dtype="float32", shape=[10, 8], name="mul.y" - ) - mul_out = paddle.matmul(mul_x, mul_y) - mean_out = paddle.mean(mul_out) - grads = paddle.autograd.ir_backward.grad(mean_out, [mul_x]) - params_grads = [(mul_x, grads[0])] - self.assertEqual(len(params_grads), 1) - count_ops = len(block.ops) - optimizer = paddle.optimizer.Adam() - params_grads = optimizer.append_regularization_ops(params_grads) - self.assertEqual(len(params_grads), 1) - self.assertEqual(len(block.ops), count_ops + 5) - self.assertEqual(block.ops[-1].name(), 'pd_op.add_n') - self.assertEqual(block.ops[-3].name(), 'pd_op.scale') - self.assertEqual(block.ops[-5].name(), 'pd_op.sign') - def bow_net( data, @@ -296,63 +261,6 @@ def test_l2(self): rtol=5e-5, ) - @test_with_pir_api - def test_repeated_regularization(self): - l1 = paddle.regularizer.L1Decay(coeff=0.1) - l2 = paddle.regularizer.L2Decay(coeff=0.01) - fc_param_attr = paddle.ParamAttr( - regularizer=paddle.regularizer.L1Decay() - ) - with paddle.static.program_guard( - paddle.static.Program(), paddle.static.Program() - ): - x = paddle.uniform([2, 2, 3]) - linear = paddle.nn.Linear(3, 5, weight_attr=fc_param_attr) - out = linear(x) - loss = paddle.sum(out) - sgd = paddle.optimizer.SGD(learning_rate=0.1, weight_decay=l2) - sgd.minimize(loss) - with base.dygraph.guard(): - input = paddle.to_tensor(np.random.randn(3, 2).astype('float32')) - paddle.seed(1) - paddle.framework.random._manual_program_seed(1) - - linear1 = paddle.nn.Linear( - 2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr - ) - linear2 = paddle.nn.Linear( - 2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr - ) - - loss1 = linear1(input) - loss1.backward() - # set l2 regularizer in optimizer, but l1 in base.ParamAttr - - paddle.optimizer.SGD( - parameters=linear1.parameters(), - learning_rate=1e-2, - weight_decay=l2, - ).minimize(loss1) - # only set l1 in base.ParamAttr - loss2 = linear2(input) - loss2.backward() - paddle.optimizer.SGD( - parameters=linear2.parameters(), learning_rate=1e-2 - ).minimize(loss2) - # they should both be applied by l1, and keep the same - np.testing.assert_allclose( - linear1.weight.numpy(), - linear2.weight.numpy(), - rtol=1e-05, - err_msg='weight should use the regularization in base.ParamAttr!', - ) - np.testing.assert_allclose( - linear1.bias.numpy(), - linear2.bias.numpy(), - rtol=1e-05, - err_msg='bias should use the regularization in base.ParamAttr!', - ) - if __name__ == '__main__': unittest.main() diff --git a/test/deprecated/legacy_test/test_run_program_op.py b/test/deprecated/legacy_test/test_run_program_op_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_run_program_op.py rename to test/deprecated/legacy_test/test_run_program_op_deprecated.py diff --git a/test/deprecated/legacy_test/test_select_input_output_op.py b/test/deprecated/legacy_test/test_select_input_output_op_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_select_input_output_op.py rename to test/deprecated/legacy_test/test_select_input_output_op_deprecated.py diff --git a/test/deprecated/legacy_test/test_sgd_op_deprecated.py b/test/deprecated/legacy_test/test_sgd_op_deprecated.py new file mode 100644 index 0000000000000..11d899f755526 --- /dev/null +++ b/test/deprecated/legacy_test/test_sgd_op_deprecated.py @@ -0,0 +1,214 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base + +paddle.enable_static() + + +def sgd_wrapper( + param, learning_rate, grad, master_param=None, multi_precision=False +): + paddle._C_ops.sgd_( + param, learning_rate, grad, master_param, multi_precision + ) + + +class TestSGDOpWithLargeInput(unittest.TestCase): + def runTest(self): + paddle.enable_static() + data = paddle.tensor.fill_constant(shape=[1], value=128, dtype='int64') + label = paddle.tensor.fill_constant( + shape=[1, 150], value=0.5, dtype='float32' + ) + emb = paddle.static.nn.embedding( + input=data, size=(10000000, 150), dtype='float32' + ) + out = paddle.nn.functional.normalize(x=emb, axis=-1) + + cost = paddle.nn.functional.square_error_cost(input=out, label=label) + avg_cost = paddle.mean(cost) + sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + place = base.CPUPlace() + exe = base.Executor(place) + exe.run(base.default_startup_program()) + compiled_prog = base.compiler.CompiledProgram( + base.default_main_program() + ) + result = exe.run(compiled_prog, fetch_list=[avg_cost]) + + +class TestSGDV2(unittest.TestCase): + def test_sgd(self): + paddle.enable_static() + + def check_sgd_optimizer(optimizer_attr): + init_program = paddle.static.Program() + program = paddle.static.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr=optimizer_attr, + ) + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y" + ) + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out" + ) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out" + ) + block.append_op( + type="mul", + inputs={"X": mul_x, "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}, + ) + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out} + ) + sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01) + opts, _ = sgd_optimizer.minimize(mean_out, init_program) + return opts + + opts = check_sgd_optimizer({'learning_rate': 1.1}) + self.assertEqual(len(opts), 2) + self.assertEqual([op.type for op in opts], ["scale", "sgd"]) + + opts = check_sgd_optimizer({'learning_rate': 1.0}) + self.assertEqual(len(opts), 1) + self.assertEqual([op.type for op in opts], ["sgd"]) + + +class TestSGDMultiPrecision2_0(unittest.TestCase): + def dygraph_sgd_mp(self, mp): + paddle.disable_static() + paddle.seed(10) + paddle.set_device('gpu') + input = paddle.randn((2, 2)) + model = paddle.nn.Linear(2, 2) + optimizer = paddle.optimizer.SGD( + parameters=model.parameters(), multi_precision=mp + ) + if mp: + model = paddle.amp.decorate(models=model, level='O2') + scaler = paddle.amp.GradScaler(init_loss_scaling=1024) + + for idx in range(5): + if mp: + with paddle.amp.auto_cast(level='O2'): + output = model(input) + loss = paddle.mean(output) + scaled = scaler.scale(loss) + scaled.backward() + scaler.minimize(optimizer, scaled) + optimizer.clear_grad() + else: + output = model(input) + loss = paddle.mean(output) + optimizer.step() + optimizer.clear_grad() + + return output, model.parameters() + + def static_sgd_mp(self, mp): + paddle.enable_static() + paddle.seed(10) + np.random.seed(10) + exe = paddle.static.Executor('gpu') + train_program = paddle.static.Program() + startup_program = paddle.static.Program() + optimizer = paddle.optimizer.SGD(multi_precision=mp) + + if mp: + optimizer = paddle.static.amp.decorate( + optimizer, + init_loss_scaling=128.0, + use_dynamic_loss_scaling=True, + use_pure_fp16=True, + use_fp16_guard=False, + ) + with paddle.static.program_guard(train_program, startup_program): + if mp: + data = paddle.static.data( + shape=[2, 2], name='X', dtype='float16' + ) + else: + data = paddle.static.data( + shape=[2, 2], name='X', dtype='float32' + ) + hidden = paddle.static.nn.fc(x=data, size=10) + loss = paddle.mean(hidden) + optimizer.minimize(loss) + exe.run(startup_program) + + if mp: + optimizer.amp_init( + place=paddle.CUDAPlace(0), scope=paddle.static.global_scope() + ) + x = np.random.random(size=(2, 2)).astype('float16') + else: + x = np.random.random(size=(2, 2)).astype('float32') + out = [] + for idx in range(5): + (loss_data,) = exe.run( + train_program, feed={"X": x}, fetch_list=[loss] + ) + out.append(loss_data) + return out + + def test_main(self): + if not paddle.is_compiled_with_cuda(): + return + "Test dygraph mode" + output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True) + output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False) + np.testing.assert_allclose( + output1_dy.astype('float32').numpy(), + output2_dy.astype('float32').numpy(), + rtol=1e-05, + atol=0.1, + ) + for idx in range(len(params1_dy)): + np.testing.assert_allclose( + params1_dy[idx].astype('float32').numpy(), + params2_dy[idx].astype('float32').numpy(), + rtol=1e-05, + atol=0.1, + ) + "Test static graph mode" + output1_st = self.static_sgd_mp(mp=True) + output2_st = self.static_sgd_mp(mp=False) + for idx in range(len(output1_st)): + np.testing.assert_allclose( + output1_st[idx].astype('float32'), + output2_st[idx].astype('float32'), + rtol=1e-05, + atol=0.1, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_slice_op.py b/test/deprecated/legacy_test/test_slice_op.py index 3a91882cac51e..1040eb676952a 100644 --- a/test/deprecated/legacy_test/test_slice_op.py +++ b/test/deprecated/legacy_test/test_slice_op.py @@ -1011,15 +1011,6 @@ def test_float_in_index(): class TestInferShape(unittest.TestCase): - def test(self): - with paddle_static_guard(): - x = paddle.ones(shape=[3, 4, 5]) - x.desc.set_shape([3, -1, 5]) - self.assertEqual(x.shape, (3, -1, 5)) - - out0 = paddle.slice(x, axes=[1], starts=[0], ends=[3]) - self.assertEqual(out0.shape, (3, -1, 5)) - def test_pir(self): with paddle.pir_utils.IrGuard(): x = paddle.static.data('x', shape=[3, -1, 5]) diff --git a/test/deprecated/legacy_test/test_slice_op_deprecated.py b/test/deprecated/legacy_test/test_slice_op_deprecated.py new file mode 100644 index 0000000000000..a9ba98f3dba72 --- /dev/null +++ b/test/deprecated/legacy_test/test_slice_op_deprecated.py @@ -0,0 +1,37 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from op_test import paddle_static_guard + +import paddle + +paddle.enable_static() + + +class TestInferShape(unittest.TestCase): + def test(self): + with paddle_static_guard(): + x = paddle.ones(shape=[3, 4, 5]) + x.desc.set_shape([3, -1, 5]) + self.assertEqual(x.shape, (3, -1, 5)) + + out0 = paddle.slice(x, axes=[1], starts=[0], ends=[3]) + self.assertEqual(out0.shape, (3, -1, 5)) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_squared_l2_norm_op.py b/test/deprecated/legacy_test/test_squared_l2_norm_op.py deleted file mode 100755 index df36c81097051..0000000000000 --- a/test/deprecated/legacy_test/test_squared_l2_norm_op.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np -from numpy import linalg as LA -from op_test import OpTest - -import paddle -import paddle.distributed as dist -from paddle import _C_ops, _legacy_C_ops -from paddle.framework import in_dynamic_mode - - -def test_squared_l2_norm(x): - if in_dynamic_mode(): - return _C_ops.squared_l2_norm(x) - else: - return _legacy_C_ops.squared_l2_norm(x) - - -class TestSquaredL2NormF16Op(unittest.TestCase): - def init_test_case(self): - X = np.random.uniform(-0.1, 0.1, (8, 5, 10)).astype('float32') - return X - - def check_main(self, x_np, dtype): - paddle.disable_static() - x = paddle.to_tensor(x_np) - - x.stop_gradient = False - y = test_squared_l2_norm(x) - x_g = paddle.grad(y, [x]) - - paddle.enable_static() - return y, x_g - - def test_main(self): - x_np = self.init_test_case() - y_np_1, x_g_np_1 = self.check_main(x_np, 'float32') - y_np_2, x_g_np_2 = self.check_main(x_np, 'float16') - - def assert_equal(x, y): - np.testing.assert_allclose(x, y, rtol=1e-05, atol=0.0) - - assert_equal(y_np_1, y_np_2) - assert_equal(x_g_np_1, x_g_np_2) - - -class TestSquaredL2NormF16Op1(TestSquaredL2NormF16Op): - def init_test_case(self): - X = np.random.uniform(-2.0, 2.0, (30, 10)).astype('float32') - return X - - -class TestSquaredL2NormF16Op2(TestSquaredL2NormF16Op): - def init_test_case(self): - X = np.random.uniform(-5.0, 5.0, (20, 10, 20)).astype('float32') - return X - - -class TestL2LossOp(OpTest): - """Test squared_l2_norm""" - - def config(self): - self.x_shape = (13, 19) - self.check_auto_parallel = False - - def setUp(self): - self.config() - self.python_api = test_squared_l2_norm - self.op_type = "squared_l2_norm" - self.max_relative_error = 0.05 - - X = np.random.uniform(-1, 1, self.x_shape).astype("float32") - X[np.abs(X) < self.max_relative_error] = 0.1 - self.inputs = {'X': X} - self.outputs = {'Out': np.array([np.square(LA.norm(X))])} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad( - ['X'], - 'Out', - max_relative_error=self.max_relative_error, - check_auto_parallel=self.check_auto_parallel, - ) - - -class TestSquaredL2NormAutoParallel_1(TestL2LossOp): - def config(self): - self.x_shape = (14, 18) - self.check_auto_parallel = True - self.placements = { - 'X': [dist.Replicate()], - } - - -class TestSquaredL2NormAutoParallel_2(TestL2LossOp): - def config(self): - self.x_shape = (14, 18) - self.check_auto_parallel = True - self.placements = { - 'X': [dist.Shard(0)], - } - - -class TestSquaredL2NormAutoParallel_3(TestL2LossOp): - def config(self): - self.x_shape = (14, 18) - self.check_auto_parallel = True - self.placements = { - 'X': [dist.Shard(1)], - } - - -class TestL2LossDeterministic(unittest.TestCase): - def check_place(self, place): - with paddle.base.dygraph.guard(place): - x_np = np.random.rand(5, 11, 13).astype('float32') - x = paddle.to_tensor(x_np) - y1 = _legacy_C_ops.squared_l2_norm(x) - y2 = _legacy_C_ops.squared_l2_norm(x) - np.testing.assert_array_equal(y1.numpy(), y2.numpy()) - - def test_main(self): - self.check_place(paddle.CPUPlace()) - if paddle.is_compiled_with_cuda(): - self.check_place(paddle.CUDAPlace(0)) - - -if __name__ == "__main__": - paddle.enable_static() - unittest.main() diff --git a/test/deprecated/legacy_test/test_squeeze2_op_rename.py b/test/deprecated/legacy_test/test_squeeze2_op_rename.py index ed347eda7350b..02e63c0cb2459 100644 --- a/test/deprecated/legacy_test/test_squeeze2_op_rename.py +++ b/test/deprecated/legacy_test/test_squeeze2_op_rename.py @@ -15,7 +15,7 @@ import os import unittest -from test_attribute_var import UnittestBase +from test_attribute_var_deprecated import UnittestBase import paddle from paddle.base.framework import Program, program_guard diff --git a/test/deprecated/legacy_test/test_static_pylayer_block.py b/test/deprecated/legacy_test/test_static_pylayer_block_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_static_pylayer_block.py rename to test/deprecated/legacy_test/test_static_pylayer_block_deprecated.py diff --git a/test/deprecated/legacy_test/test_static_pylayer.py b/test/deprecated/legacy_test/test_static_pylayer_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_static_pylayer.py rename to test/deprecated/legacy_test/test_static_pylayer_deprecated.py index ec0e655d3b6a9..e15ba4ee363be 100644 --- a/test/deprecated/legacy_test/test_static_pylayer.py +++ b/test/deprecated/legacy_test/test_static_pylayer_deprecated.py @@ -13,10 +13,15 @@ # limitations under the License. import functools +import sys import unittest +sys.path.append(".") import numpy as np -from test_prune import TestExecutorRunAutoPrune, TestPruneBase +from test_prune_deprecated import ( + TestExecutorRunAutoPrune, + TestPruneBase, +) import paddle from paddle import base diff --git a/test/deprecated/legacy_test/test_switch.py b/test/deprecated/legacy_test/test_switch_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_switch.py rename to test/deprecated/legacy_test/test_switch_deprecated.py index 3c90ba5260542..d8b2e2fd061ad 100644 --- a/test/deprecated/legacy_test/test_switch.py +++ b/test/deprecated/legacy_test/test_switch_deprecated.py @@ -19,6 +19,8 @@ from paddle.base.executor import Executor from paddle.base.framework import default_startup_program +paddle.enable_static() + class TestSwitch(unittest.TestCase): def check_switch(self, value): diff --git a/test/deprecated/legacy_test/test_tensor_array_to_tensor.py b/test/deprecated/legacy_test/test_tensor_array_to_tensor.py index 91310fc2880fb..7d043ad52dac7 100644 --- a/test/deprecated/legacy_test/test_tensor_array_to_tensor.py +++ b/test/deprecated/legacy_test/test_tensor_array_to_tensor.py @@ -43,122 +43,6 @@ def test_list_Variable(): self.assertRaises(TypeError, test_list_Variable) -class TestLoDTensorArrayConcat(unittest.TestCase): - """Test case for concat mode of tensor_array_to_tensor.""" - - def setUp(self): - self.op_type = "tensor_array_to_tensor" - self.attrs = {"axis": 0} - self.outputs = ["Out"] - - def test_get_set(self): - scope = core.Scope() - program = base.Program() - block = program.global_block() - - input_arr = block.create_var( - name="tmp_lod_tensor_array", - type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - ) - input_arr.persistable = True - input_arr_var = scope.var('tmp_lod_tensor_array') - input_tensor_array = input_arr_var.get_lod_tensor_array() - self.assertEqual(0, len(input_tensor_array)) - - cpu = core.CPUPlace() - for i in range(10): - t = core.LoDTensor() - if i == 0: - t.set(np.array([[i], [i]], dtype='float32'), cpu) - else: - t.set(np.array([[i]], dtype='float32'), cpu) - input_tensor_array.append(t) - - self.assertEqual(10, len(input_tensor_array)) - - random_grad = np.random.random_sample([11]).astype(np.float32) - - y_out = block.create_var(name="Out") - y_out.persistable = True - y_out_index = block.create_var(name="OutIndex") - y_out_index.persistable = True - - y_grad_arr = block.create_var( - name='Out@GRAD', dtype='float32', shape=[11] - ) - y_grad_arr.persistable = True - y_grad = scope.var('Out@GRAD') - y_grad_tensor = y_grad.get_tensor() - y_grad_tensor.set(random_grad, cpu) - - op = block.append_op( - type=self.op_type, - inputs={"X": input_arr}, - outputs={"Out": y_out, "OutIndex": y_out_index}, - attrs=self.attrs, - ) - - out_grad = block.create_var( - name="tmp_lod_tensor_array@GRAD", - type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - ) - out_grad.persistable = True - - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( - op.desc, set(), [] - ) - grad_op_desc = grad_op_desc_list[0] - new_op_desc = block.desc.append_op() - new_op_desc.copy_from(grad_op_desc) - for var_name in grad_op_desc.output_arg_names(): - block.desc.var(var_name.encode("ascii")) - - grad_op_desc.infer_var_type(block.desc) - grad_op_desc.infer_shape(block.desc) - for arg in grad_op_desc.output_arg_names(): - grad_var = block.desc.find_var(arg.encode("ascii")) - grad_var.set_dtype(core.VarDesc.VarType.FP32) - - fetch_list = [] - fetch_list.append(block.var('Out')) - fetch_list.append(block.var('OutIndex')) - - exe = base.Executor(base.CPUPlace()) - out = exe.run(program, fetch_list=fetch_list, scope=scope) - # print ("index: ", np.array(out[1])) - - # test forward - tensor_res = np.array(out[0]) - tensor_gt = np.array( - [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32' - ) - - self.assertEqual(len(tensor_res), len(tensor_gt)) - - for i in range(len(tensor_res)): - self.assertEqual(tensor_res[i], tensor_gt[i]) - - # test backward - grad_tensor = scope.var('tmp_lod_tensor_array@GRAD') - grad_tensor_array = grad_tensor.get_lod_tensor_array() - - self.assertEqual(10, len(grad_tensor_array)) - - for i in range(len(grad_tensor_array)): - if i == 0: - self.assertEqual( - np.array(grad_tensor_array[i])[0], np.array(random_grad[i]) - ) - self.assertEqual( - np.array(grad_tensor_array[i])[1], - np.array(random_grad[i + 1]), - ) - if i == 1: - self.assertEqual( - np.array(grad_tensor_array[i]), np.array(random_grad[i + 1]) - ) - - class TestLoDTensorArrayStack(unittest.TestCase): """Test case for stack mode of tensor_array_to_tensor.""" diff --git a/test/deprecated/legacy_test/test_tensor_array_to_tensor_deprecated.py b/test/deprecated/legacy_test/test_tensor_array_to_tensor_deprecated.py new file mode 100644 index 0000000000000..959eddee79fe6 --- /dev/null +++ b/test/deprecated/legacy_test/test_tensor_array_to_tensor_deprecated.py @@ -0,0 +1,143 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core + +paddle.enable_static() + + +class TestLoDTensorArrayConcat(unittest.TestCase): + """Test case for concat mode of tensor_array_to_tensor.""" + + def setUp(self): + self.op_type = "tensor_array_to_tensor" + self.attrs = {"axis": 0} + self.outputs = ["Out"] + + def test_get_set(self): + scope = core.Scope() + program = base.Program() + block = program.global_block() + + input_arr = block.create_var( + name="tmp_lod_tensor_array", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + ) + input_arr.persistable = True + input_arr_var = scope.var('tmp_lod_tensor_array') + input_tensor_array = input_arr_var.get_lod_tensor_array() + self.assertEqual(0, len(input_tensor_array)) + + cpu = core.CPUPlace() + for i in range(10): + t = core.LoDTensor() + if i == 0: + t.set(np.array([[i], [i]], dtype='float32'), cpu) + else: + t.set(np.array([[i]], dtype='float32'), cpu) + input_tensor_array.append(t) + + self.assertEqual(10, len(input_tensor_array)) + + random_grad = np.random.random_sample([11]).astype(np.float32) + + y_out = block.create_var(name="Out") + y_out.persistable = True + y_out_index = block.create_var(name="OutIndex") + y_out_index.persistable = True + + y_grad_arr = block.create_var( + name='Out@GRAD', dtype='float32', shape=[11] + ) + y_grad_arr.persistable = True + y_grad = scope.var('Out@GRAD') + y_grad_tensor = y_grad.get_tensor() + y_grad_tensor.set(random_grad, cpu) + + op = block.append_op( + type=self.op_type, + inputs={"X": input_arr}, + outputs={"Out": y_out, "OutIndex": y_out_index}, + attrs=self.attrs, + ) + + out_grad = block.create_var( + name="tmp_lod_tensor_array@GRAD", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + ) + out_grad.persistable = True + + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( + op.desc, set(), [] + ) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + fetch_list = [] + fetch_list.append(block.var('Out')) + fetch_list.append(block.var('OutIndex')) + + exe = base.Executor(base.CPUPlace()) + out = exe.run(program, fetch_list=fetch_list, scope=scope) + # print ("index: ", np.array(out[1])) + + # test forward + tensor_res = np.array(out[0]) + tensor_gt = np.array( + [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32' + ) + + self.assertEqual(len(tensor_res), len(tensor_gt)) + + for i in range(len(tensor_res)): + self.assertEqual(tensor_res[i], tensor_gt[i]) + + # test backward + grad_tensor = scope.var('tmp_lod_tensor_array@GRAD') + grad_tensor_array = grad_tensor.get_lod_tensor_array() + + self.assertEqual(10, len(grad_tensor_array)) + + for i in range(len(grad_tensor_array)): + if i == 0: + self.assertEqual( + np.array(grad_tensor_array[i])[0], np.array(random_grad[i]) + ) + self.assertEqual( + np.array(grad_tensor_array[i])[1], + np.array(random_grad[i + 1]), + ) + if i == 1: + self.assertEqual( + np.array(grad_tensor_array[i]), np.array(random_grad[i + 1]) + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_trainable.py b/test/deprecated/legacy_test/test_trainable_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_trainable.py rename to test/deprecated/legacy_test/test_trainable_deprecated.py diff --git a/test/deprecated/legacy_test/test_truncated_gaussian_random_op.py b/test/deprecated/legacy_test/test_truncated_gaussian_random_op_deprecated.py similarity index 100% rename from test/deprecated/legacy_test/test_truncated_gaussian_random_op.py rename to test/deprecated/legacy_test/test_truncated_gaussian_random_op_deprecated.py diff --git a/test/deprecated/legacy_test/test_variable.py b/test/deprecated/legacy_test/test_variable.py index cbe52f6ef103e..5b33f4c6a4cc8 100644 --- a/test/deprecated/legacy_test/test_variable.py +++ b/test/deprecated/legacy_test/test_variable.py @@ -112,125 +112,6 @@ def test_step_scopes(self): ) self.assertEqual(core.VarDesc.VarType.STEP_SCOPES, var.type) - def _test_slice(self, place): - b = default_main_program().current_block() - w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) - - for i in range(3): - nw = w[i] - self.assertEqual((100, 100), nw.shape) - - nw = w[:] - self.assertEqual((784, 100, 100), nw.shape) - - nw = w[:, :] - self.assertEqual((784, 100, 100), nw.shape) - - nw = w[:, :, -1] - self.assertEqual((784, 100), nw.shape) - - nw = w[1, 1, 1] - - self.assertEqual(len(nw.shape), 0) - - nw = w[:, :, :-1] - self.assertEqual((784, 100, 99), nw.shape) - - self.assertEqual(0, nw.lod_level) - - main = base.Program() - with base.program_guard(main): - exe = base.Executor(place) - tensor_array = np.array( - [ - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - [[10, 11, 12], [13, 14, 15], [16, 17, 18]], - [[19, 20, 21], [22, 23, 24], [25, 26, 27]], - ] - ).astype('float32') - var = paddle.assign(tensor_array) - var1 = var[0, 1, 1] - var2 = var[1:] - var3 = var[0:1] - var4 = var[::-1] - var5 = var[1, 1:, 1:] - var_reshape = paddle.reshape(var, [3, -1, 3]) - var6 = var_reshape[:, :, -1] - var7 = var[:, :, :-1] - var8 = var[:1, :1, :1] - var9 = var[:-1, :-1, :-1] - var10 = var[::-1, :1, :-1] - var11 = var[:-1, ::-1, -1:] - var12 = var[1:2, 2:, ::-1] - var13 = var[2:10, 2:, -2:-1] - var14 = var[1:-1, 0:2, ::-1] - var15 = var[::-1, ::-1, ::-1] - - x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32') - y = paddle.static.nn.fc(x, size=1, activation=None) - y_1 = y[:, 0] - feeder = base.DataFeeder(place=place, feed_list=[x]) - data = [] - data.append(np.random.randint(10, size=[13]).astype('float32')) - exe.run(base.default_startup_program()) - - local_out = exe.run( - main, - feed=feeder.feed([data]), - fetch_list=[ - var, - var1, - var2, - var3, - var4, - var5, - var6, - var7, - var8, - var9, - var10, - var11, - var12, - var13, - var14, - var15, - ], - ) - - np.testing.assert_array_equal(local_out[1], tensor_array[0, 1, 1:2]) - np.testing.assert_array_equal(local_out[2], tensor_array[1:]) - np.testing.assert_array_equal(local_out[3], tensor_array[0:1]) - np.testing.assert_array_equal(local_out[4], tensor_array[::-1]) - np.testing.assert_array_equal(local_out[5], tensor_array[1, 1:, 1:]) - np.testing.assert_array_equal( - local_out[6], tensor_array.reshape((3, -1, 3))[:, :, -1] - ) - np.testing.assert_array_equal(local_out[7], tensor_array[:, :, :-1]) - np.testing.assert_array_equal( - local_out[8], tensor_array[:1, :1, :1] - ) - np.testing.assert_array_equal( - local_out[9], tensor_array[:-1, :-1, :-1] - ) - np.testing.assert_array_equal( - local_out[10], tensor_array[::-1, :1, :-1] - ) - np.testing.assert_array_equal( - local_out[11], tensor_array[:-1, ::-1, -1:] - ) - np.testing.assert_array_equal( - local_out[12], tensor_array[1:2, 2:, ::-1] - ) - np.testing.assert_array_equal( - local_out[13], tensor_array[2:10, 2:, -2:-1] - ) - np.testing.assert_array_equal( - local_out[14], tensor_array[1:-1, 0:2, ::-1] - ) - np.testing.assert_array_equal( - local_out[15], tensor_array[::-1, ::-1, ::-1] - ) - def _test_slice_index_tensor(self, place): data = np.random.rand(2, 3).astype("float32") prog = paddle.static.Program() @@ -391,7 +272,6 @@ def test_slice(self): places.append(core.CUDAPlace(0)) for place in places: - self._test_slice(place) self._test_slice_index_tensor(place) self._test_slice_index_list(place) self._test_slice_index_ellipsis(place) diff --git a/test/deprecated/legacy_test/test_variable_deprecated.py b/test/deprecated/legacy_test/test_variable_deprecated.py new file mode 100644 index 0000000000000..6d416d1a20344 --- /dev/null +++ b/test/deprecated/legacy_test/test_variable_deprecated.py @@ -0,0 +1,162 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import base +from paddle.base import core +from paddle.base.framework import ( + default_main_program, +) + +paddle.enable_static() + + +class TestVariable(unittest.TestCase): + def setUp(self): + np.random.seed(2022) + + def _test_slice(self, place): + b = default_main_program().current_block() + w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) + + for i in range(3): + nw = w[i] + self.assertEqual((100, 100), nw.shape) + + nw = w[:] + self.assertEqual((784, 100, 100), nw.shape) + + nw = w[:, :] + self.assertEqual((784, 100, 100), nw.shape) + + nw = w[:, :, -1] + self.assertEqual((784, 100), nw.shape) + + nw = w[1, 1, 1] + + self.assertEqual(len(nw.shape), 0) + + nw = w[:, :, :-1] + self.assertEqual((784, 100, 99), nw.shape) + + self.assertEqual(0, nw.lod_level) + + main = base.Program() + with base.program_guard(main): + exe = base.Executor(place) + tensor_array = np.array( + [ + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[10, 11, 12], [13, 14, 15], [16, 17, 18]], + [[19, 20, 21], [22, 23, 24], [25, 26, 27]], + ] + ).astype('float32') + var = paddle.assign(tensor_array) + var1 = var[0, 1, 1] + var2 = var[1:] + var3 = var[0:1] + var4 = var[::-1] + var5 = var[1, 1:, 1:] + var_reshape = paddle.reshape(var, [3, -1, 3]) + var6 = var_reshape[:, :, -1] + var7 = var[:, :, :-1] + var8 = var[:1, :1, :1] + var9 = var[:-1, :-1, :-1] + var10 = var[::-1, :1, :-1] + var11 = var[:-1, ::-1, -1:] + var12 = var[1:2, 2:, ::-1] + var13 = var[2:10, 2:, -2:-1] + var14 = var[1:-1, 0:2, ::-1] + var15 = var[::-1, ::-1, ::-1] + + x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32') + y = paddle.static.nn.fc(x, size=1, activation=None) + y_1 = y[:, 0] + feeder = base.DataFeeder(place=place, feed_list=[x]) + data = [] + data.append(np.random.randint(10, size=[13]).astype('float32')) + exe.run(base.default_startup_program()) + + local_out = exe.run( + main, + feed=feeder.feed([data]), + fetch_list=[ + var, + var1, + var2, + var3, + var4, + var5, + var6, + var7, + var8, + var9, + var10, + var11, + var12, + var13, + var14, + var15, + ], + ) + + np.testing.assert_array_equal(local_out[1], tensor_array[0, 1, 1:2]) + np.testing.assert_array_equal(local_out[2], tensor_array[1:]) + np.testing.assert_array_equal(local_out[3], tensor_array[0:1]) + np.testing.assert_array_equal(local_out[4], tensor_array[::-1]) + np.testing.assert_array_equal(local_out[5], tensor_array[1, 1:, 1:]) + np.testing.assert_array_equal( + local_out[6], tensor_array.reshape((3, -1, 3))[:, :, -1] + ) + np.testing.assert_array_equal(local_out[7], tensor_array[:, :, :-1]) + np.testing.assert_array_equal( + local_out[8], tensor_array[:1, :1, :1] + ) + np.testing.assert_array_equal( + local_out[9], tensor_array[:-1, :-1, :-1] + ) + np.testing.assert_array_equal( + local_out[10], tensor_array[::-1, :1, :-1] + ) + np.testing.assert_array_equal( + local_out[11], tensor_array[:-1, ::-1, -1:] + ) + np.testing.assert_array_equal( + local_out[12], tensor_array[1:2, 2:, ::-1] + ) + np.testing.assert_array_equal( + local_out[13], tensor_array[2:10, 2:, -2:-1] + ) + np.testing.assert_array_equal( + local_out[14], tensor_array[1:-1, 0:2, ::-1] + ) + np.testing.assert_array_equal( + local_out[15], tensor_array[::-1, ::-1, ::-1] + ) + + def test_slice(self): + places = [base.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + + for place in places: + self._test_slice(place) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_weight_normalization.py b/test/deprecated/legacy_test/test_weight_normalization_deprecated.py similarity index 99% rename from test/deprecated/legacy_test/test_weight_normalization.py rename to test/deprecated/legacy_test/test_weight_normalization_deprecated.py index f8793aef3052e..6e799166b4d42 100644 --- a/test/deprecated/legacy_test/test_weight_normalization.py +++ b/test/deprecated/legacy_test/test_weight_normalization_deprecated.py @@ -22,6 +22,8 @@ from paddle.base import core from paddle.base.param_attr import WeightNormParamAttr +paddle.enable_static() + class TestWeightNormalization(unittest.TestCase): batch_size = 3 diff --git a/test/deprecated/prim/composite_ops/test_composite_layer_norm.py b/test/deprecated/prim/composite_ops/test_composite_layer_norm_deprecated.py similarity index 100% rename from test/deprecated/prim/composite_ops/test_composite_layer_norm.py rename to test/deprecated/prim/composite_ops/test_composite_layer_norm_deprecated.py diff --git a/test/deprecated/prim/pir_prim/CMakeLists.txt b/test/deprecated/prim/pir_prim/CMakeLists.txt index 340b94fc53c95..15d788ccff424 100644 --- a/test/deprecated/prim/pir_prim/CMakeLists.txt +++ b/test/deprecated/prim/pir_prim/CMakeLists.txt @@ -1,5 +1,6 @@ -set(TEST_PRIM_TRANS_PIR_CASES test_custom_vjp_trait test_decomp_op - test_decompose_op test_vjp_prim) +set(TEST_PRIM_TRANS_PIR_CASES + test_custom_vjp_trait test_decomp_op test_decompose_op test_vjp_prim + test_batch_norm_shape_check) foreach(target ${TEST_PRIM_TRANS_PIR_CASES}) py_test_modules(${target} MODULES ${target} ENVS GLOG_v=1 diff --git a/test/deprecated/prim/pir_prim/test_batch_norm_shape_check.py b/test/deprecated/prim/pir_prim/test_batch_norm_shape_check.py new file mode 100644 index 0000000000000..045a88695d9e3 --- /dev/null +++ b/test/deprecated/prim/pir_prim/test_batch_norm_shape_check.py @@ -0,0 +1,85 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import pir +from paddle.decomposition import decompose +from paddle.framework import core + +paddle.enable_static() + + +def batch_norm_net1(x, r_m, r_v, w, b): + return paddle.nn.functional.batch_norm(x, r_m, r_v, w, b, training=False) + + +class TestBuildOp(unittest.TestCase): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [1, 64, 512, 1024] + self.c_shape = [64] + self.dtype_x = "float32" + self.init_x_shape = [1, 64, 512, 1024] + self.x = np.random.random(self.x_shape).astype(self.dtype_x) + self.r_m = np.random.random(self.x_shape[1]).astype(self.dtype) + self.r_v = np.random.random(self.x_shape[1]).astype(self.dtype) + self.w = np.random.random(self.x_shape[1]).astype(self.dtype) + self.b = np.random.random(self.x_shape[1]).astype(self.dtype) + self.net = batch_norm_net1 + self.necessary_ops = "pd_op.batch_norm" + self.enable_cinn = False + self.tol = 5e-6 + + def get_ir_program(self): + paddle.enable_static() + x = paddle.randn([4, 4]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data('x', self.x_shape, x.dtype) + x.stop_gradients = False + r_m = paddle.static.data('r_m', self.c_shape, x.dtype) + r_v = paddle.static.data('r_v', self.c_shape, x.dtype) + w = paddle.static.data('w', self.c_shape, x.dtype) + b = paddle.static.data('b', self.c_shape, x.dtype) + y = batch_norm_net1(x, r_m, r_v, w, b) + res = paddle.tanh(y) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program + + def test_build_op(self): + pir_program = self.get_ir_program() + y = pir_program.global_block().ops[-2].results() + orig_shape = y[0].shape + with paddle.pir_utils.IrGuard(): + core._set_prim_forward_enabled(True) + y_new = decompose(pir_program, y) + core._set_prim_forward_enabled(False) + new_shape = y_new[0].shape + assert ( + orig_shape == new_shape + ), f"Original shape {orig_shape} is not equal to new shape {new_shape}" + op_name_list = [op.name() for op in pir_program.global_block().ops] + assert "pd_op.batch_norm_" not in op_name_list + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/prim/test_comp_get_grad_op_desc_prim_enabled.py b/test/deprecated/prim/test_comp_get_grad_op_desc_prim_enabled_deprecated.py similarity index 100% rename from test/deprecated/prim/test_comp_get_grad_op_desc_prim_enabled.py rename to test/deprecated/prim/test_comp_get_grad_op_desc_prim_enabled_deprecated.py diff --git a/test/deprecated/quantization/CMakeLists.txt b/test/deprecated/quantization/CMakeLists.txt index 5fc3911d0417f..0ab38193a8c09 100644 --- a/test/deprecated/quantization/CMakeLists.txt +++ b/test/deprecated/quantization/CMakeLists.txt @@ -193,7 +193,7 @@ if(WIN32) list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp) list(REMOVE_ITEM TEST_OPS test_weight_only_linear) list(REMOVE_ITEM TEST_OPS test_llm_int8_linear) - list(REMOVE_ITEM TEST_OPS test_quant_aware) + list(REMOVE_ITEM TEST_OPS test_quant_aware_deprecated) list(REMOVE_ITEM TEST_OPS test_quant_post_quant_aware) list(REMOVE_ITEM TEST_OPS test_quant_aware_user_defined) list(REMOVE_ITEM TEST_OPS test_quant_amp) @@ -236,14 +236,14 @@ list(REMOVE_ITEM TEST_OPS test_filter_pruning) # fix if(WIN32) set(SINGLE_CARD_TEST_OPS - test_user_defined_quantization - test_quantization_scale_pass - test_quantization_pass - test_moving_average_abs_max_scale_op + test_user_defined_quantization_deprecated + test_quantization_scale_pass_deprecated + test_quantization_pass_deprecated + test_moving_average_abs_max_scale_op_deprecated test_imperative_qat_channelwise test_imperative_qat test_imperative_out_scale - test_graph) + test_graph_deprecated) list(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS}) foreach(src ${SINGLE_CARD_TEST_OPS}) py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0) @@ -260,16 +260,17 @@ if(NOT WIN32) 120) set_tests_properties(test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant_aware PROPERTIES TIMEOUT 200) + set_tests_properties(test_quant_aware_deprecated PROPERTIES TIMEOUT 200) set_tests_properties(test_quant_post_quant_aware PROPERTIES TIMEOUT 200) set_tests_properties(test_quant_aware_user_defined PROPERTIES TIMEOUT 200) set_tests_properties(test_quant_amp PROPERTIES TIMEOUT 200) endif() -set_tests_properties(test_graph PROPERTIES TIMEOUT 120) -set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120) +set_tests_properties(test_graph_deprecated PROPERTIES TIMEOUT 120) +set_tests_properties(test_quantization_pass_deprecated PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 200) -set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 200) +set_tests_properties(test_user_defined_quantization_deprecated + PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_qat_fuse PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200) @@ -279,3 +280,6 @@ if(APPLE) 300) set_tests_properties(test_imperative_skip_op PROPERTIES TIMEOUT 300) endif() + +set_tests_properties(test_quantization_scale_pass_deprecated PROPERTIES TIMEOUT + 100) diff --git a/test/deprecated/quantization/test_graph.py b/test/deprecated/quantization/test_graph_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_graph.py rename to test/deprecated/quantization/test_graph_deprecated.py diff --git a/test/deprecated/quantization/test_moving_average_abs_max_scale_op.py b/test/deprecated/quantization/test_moving_average_abs_max_scale_op_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_moving_average_abs_max_scale_op.py rename to test/deprecated/quantization/test_moving_average_abs_max_scale_op_deprecated.py diff --git a/test/deprecated/quantization/test_quant2_int8_mkldnn_pass.py b/test/deprecated/quantization/test_quant2_int8_mkldnn_pass_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_quant2_int8_mkldnn_pass.py rename to test/deprecated/quantization/test_quant2_int8_mkldnn_pass_deprecated.py diff --git a/test/deprecated/quantization/test_quant_amp.py b/test/deprecated/quantization/test_quant_amp.py index 2f285dfdf07d9..b708355a54827 100644 --- a/test/deprecated/quantization/test_quant_amp.py +++ b/test/deprecated/quantization/test_quant_amp.py @@ -15,10 +15,12 @@ import logging import os +import sys import unittest +sys.path.append(".") import numpy as np -from test_quant_aware import MobileNet +from test_quant_aware_deprecated import MobileNet import paddle from paddle.static.quantization.quanter import convert, quant_aware diff --git a/test/deprecated/quantization/test_quant_aware.py b/test/deprecated/quantization/test_quant_aware_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_quant_aware.py rename to test/deprecated/quantization/test_quant_aware_deprecated.py diff --git a/test/deprecated/quantization/test_quant_aware_user_defined.py b/test/deprecated/quantization/test_quant_aware_user_defined.py index 3521ecf7ddeff..124836f560e6a 100644 --- a/test/deprecated/quantization/test_quant_aware_user_defined.py +++ b/test/deprecated/quantization/test_quant_aware_user_defined.py @@ -13,10 +13,15 @@ # limitations under the License. import logging import os +import sys import unittest +sys.path.append(".") import numpy as np -from test_quant_aware import MobileNet, StaticCase +from test_quant_aware_deprecated import ( + MobileNet, + StaticCase, +) import paddle from paddle.static.quantization.quanter import convert, quant_aware diff --git a/test/deprecated/quantization/test_quant_post_quant_aware.py b/test/deprecated/quantization/test_quant_post_quant_aware.py index 0fe582306fbd7..db9e0a857f9d9 100644 --- a/test/deprecated/quantization/test_quant_post_quant_aware.py +++ b/test/deprecated/quantization/test_quant_post_quant_aware.py @@ -14,10 +14,12 @@ import logging import random +import sys import unittest +sys.path.append(".") import numpy as np -from test_quant_aware import StaticCase +from test_quant_aware_deprecated import StaticCase import paddle from paddle.static.quantization.quanter import convert, quant_aware diff --git a/test/deprecated/quantization/test_quantization_mkldnn_pass.py b/test/deprecated/quantization/test_quantization_mkldnn_pass_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_quantization_mkldnn_pass.py rename to test/deprecated/quantization/test_quantization_mkldnn_pass_deprecated.py diff --git a/test/deprecated/quantization/test_quantization_pass.py b/test/deprecated/quantization/test_quantization_pass_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_quantization_pass.py rename to test/deprecated/quantization/test_quantization_pass_deprecated.py diff --git a/test/deprecated/quantization/test_quantization_scale_pass.py b/test/deprecated/quantization/test_quantization_scale_pass_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_quantization_scale_pass.py rename to test/deprecated/quantization/test_quantization_scale_pass_deprecated.py diff --git a/test/deprecated/quantization/test_user_defined_quantization.py b/test/deprecated/quantization/test_user_defined_quantization_deprecated.py similarity index 100% rename from test/deprecated/quantization/test_user_defined_quantization.py rename to test/deprecated/quantization/test_user_defined_quantization_deprecated.py diff --git a/test/deprecated/rnn/CMakeLists.txt b/test/deprecated/rnn/CMakeLists.txt index 04773499b3591..a06731560086d 100644 --- a/test/deprecated/rnn/CMakeLists.txt +++ b/test/deprecated/rnn/CMakeLists.txt @@ -9,5 +9,7 @@ foreach(TEST_OP ${TEST_OPS}) endforeach() if(NOT WIN32) set_tests_properties(test_rnn_nets_static PROPERTIES TIMEOUT 120) + set_tests_properties(test_rnn_nets_static_deprecated PROPERTIES TIMEOUT 120) set_tests_properties(test_rnn_nets PROPERTIES TIMEOUT 120) + set_tests_properties(test_rnn_nets_deprecated PROPERTIES TIMEOUT 120) endif() diff --git a/test/deprecated/rnn/test_rnn_nets.py b/test/deprecated/rnn/test_rnn_nets.py index f87424245ce81..6cec726472c58 100644 --- a/test/deprecated/rnn/test_rnn_nets.py +++ b/test/deprecated/rnn/test_rnn_nets.py @@ -95,37 +95,12 @@ def test_with_zero_state(self): np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - rnn1 = self.rnn1 - rnn2 = self.rnn2 - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, h1 = rnn1(x, sequence_length=sequence_length) - - seq_len = paddle.to_tensor(sequence_length) - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y2 = paddle.multiply(y2, mask) - - np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) - def test_predict(self): predict_test_util(self.place, "SimpleRNN") def runTest(self): self.test_with_initial_state() self.test_with_zero_state() - self.test_with_input_lengths() self.test_predict() @@ -180,37 +155,12 @@ def test_with_zero_state(self): np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - rnn1 = self.rnn1 - rnn2 = self.rnn2 - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, h1 = rnn1(x, sequence_length=sequence_length) - - seq_len = paddle.to_tensor(sequence_length) - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y2 = paddle.multiply(y2, mask) - - np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) - def test_predict(self): predict_test_util(self.place, "GRU") def runTest(self): self.test_with_initial_state() self.test_with_zero_state() - self.test_with_input_lengths() self.test_predict() @@ -273,31 +223,6 @@ def test_with_zero_state(self): np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - rnn1 = self.rnn1 - rnn2 = self.rnn2 - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) - - seq_len = paddle.to_tensor(sequence_length) - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y2, (h2, c2) = rnn2(paddle.to_tensor(x), sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y2 = paddle.multiply(y2, mask) - - np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) - def test_predict(self): predict_test_util(self.place, "LSTM") predict_test_util(self.place, "LSTM", False) @@ -305,7 +230,6 @@ def test_predict(self): def runTest(self): self.test_with_initial_state() self.test_with_zero_state() - self.test_with_input_lengths() self.test_predict() diff --git a/test/deprecated/rnn/test_rnn_nets_deprecated.py b/test/deprecated/rnn/test_rnn_nets_deprecated.py new file mode 100644 index 0000000000000..ee435a1235ef7 --- /dev/null +++ b/test/deprecated/rnn/test_rnn_nets_deprecated.py @@ -0,0 +1,327 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +paddle.set_default_dtype("float64") +import os +import sys +import tempfile +import unittest + +import numpy as np +from convert import convert_params_for_net + +from paddle.pir_utils import test_with_dygraph_pir + +sys.path.append("../../rnn") +from rnn_numpy import GRU, LSTM, SimpleRNN + +bidirectional_list = ["bidirectional", "bidirect"] + + +class TestSimpleRNN(unittest.TestCase): + def __init__( + self, time_major=True, direction="forward", place="cpu", mode='RNN_TANH' + ): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.mode = mode + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + paddle.disable_static(place) + rnn1 = SimpleRNN( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + nonlinearity=self.mode, + ) + rnn2 = paddle.nn.SimpleRNN( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + activation=self.mode[4:].lower(), + ) + convert_params_for_net(rnn1, rnn2) + + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + def test_with_input_lengths(self): + rnn1 = self.rnn1 + rnn2 = self.rnn2 + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, h1 = rnn1(x, sequence_length=sequence_length) + + seq_len = paddle.to_tensor(sequence_length) + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y2 = paddle.multiply(y2, mask) + + np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + paddle.disable_static(place) + rnn1 = GRU( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + rnn2 = paddle.nn.GRU( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + convert_params_for_net(rnn1, rnn2) + + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + def test_with_input_lengths(self): + rnn1 = self.rnn1 + rnn2 = self.rnn2 + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, h1 = rnn1(x, sequence_length=sequence_length) + + seq_len = paddle.to_tensor(sequence_length) + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y2 = paddle.multiply(y2, mask) + + np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestLSTM(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + paddle.disable_static(place) + rnn1 = LSTM( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + rnn2 = paddle.nn.LSTM( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + convert_params_for_net(rnn1, rnn2) + + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + def test_with_input_lengths(self): + rnn1 = self.rnn1 + rnn2 = self.rnn2 + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) + + seq_len = paddle.to_tensor(sequence_length) + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y2, (h2, c2) = rnn2(paddle.to_tensor(x), sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y2 = paddle.multiply(y2, mask) + + np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestLSTMWithProjSize(TestLSTM): + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + paddle.disable_static(place) + rnn1 = LSTM( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + proj_size=8, + ) + rnn2 = paddle.nn.LSTM( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + proj_size=8, + ) + convert_params_for_net(rnn1, rnn2) + + self.rnn1 = rnn1 + self.rnn2 = rnn2 + self.proj_size = 8 + + +@test_with_dygraph_pir +def predict_test_util(place, mode, stop_gradient=True): + place = paddle.set_device(place) + paddle.seed(123) + np.random.seed(123) + + class Net(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.rnn = getattr(paddle.nn, mode)( + 16, 32, 2, direction="bidirectional", dropout=0.1 + ) + + def forward(self, input): + return self.rnn(input) + + x = paddle.randn((4, 10, 16)) + x.stop_gradient = stop_gradient + seq_len = paddle.to_tensor(np.array([10, 6, 8, 5])) + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, maxlen=10, dtype=x.dtype + ) + mask = paddle.unsqueeze(mask, [2]) + rnn = Net() + y, _ = rnn(x) + y = y * mask + loss = paddle.mean(y) + loss.backward() + optimizer = paddle.optimizer.Adam( + learning_rate=0.1, parameters=rnn.parameters() + ) + optimizer.step() + rnn.eval() + y, _ = rnn(x) + # `jit.to_static` would include a train_program, eval mode might cause + # some errors currently, such as dropout grad op gets `is_test == True`. + rnn.train() + + rnn = paddle.jit.to_static( + rnn, + [paddle.static.InputSpec(shape=[None, None, 16], dtype=x.dtype)], + full_graph=True, + ) + temp_dir = tempfile.TemporaryDirectory() + save_dirname = os.path.join(temp_dir.name, "./inference/%s_infer" % mode) + + paddle.jit.save(rnn, save_dirname) + + paddle.enable_static() + + new_scope = paddle.static.Scope() + with paddle.static.scope_guard(new_scope): + exe = paddle.static.Executor(place) + [ + inference_program, + feed_target_names, + fetch_targets, + ] = paddle.static.load_inference_model(save_dirname, exe) + results = exe.run( + inference_program, + feed={feed_target_names[0]: x.numpy()}, + fetch_list=fetch_targets, + ) + np.testing.assert_equal( + y.numpy(), results[0] + ) # eval results equal predict results + paddle.disable_static() + + temp_dir.cleanup() + + +def load_tests(loader, tests, pattern): + suite = unittest.TestSuite() + devices = ["cpu", "gpu"] if paddle.base.is_compiled_with_cuda() else ["cpu"] + for direction in ["forward", "bidirectional", "bidirect"]: + for time_major in [True, False]: + for device in devices: + for test_class in [ + TestSimpleRNN, + TestLSTM, + TestGRU, + TestLSTMWithProjSize, + ]: + suite.addTest(test_class(time_major, direction, device)) + if test_class == TestSimpleRNN: + suite.addTest( + test_class( + time_major, direction, device, mode="RNN_RELU" + ) + ) + return suite + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/rnn/test_rnn_nets_static.py b/test/deprecated/rnn/test_rnn_nets_static.py index da00c37682fae..3ccdad1dfc71e 100644 --- a/test/deprecated/rnn/test_rnn_nets_static.py +++ b/test/deprecated/rnn/test_rnn_nets_static.py @@ -150,50 +150,9 @@ def test_with_zero_state(self): np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - mp = self.mp.clone() - sp = self.sp - rnn1 = self.rnn1 - rnn2 = self.rnn2 - exe = self.executor - scope = self.scope - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, h1 = rnn1(x, sequence_length=sequence_length) - - with paddle.base.unique_name.guard(): - with paddle.static.program_guard(mp, sp): - x_data = paddle.static.data( - "input", - [-1, -1, 16], - dtype=paddle.framework.get_default_dtype(), - ) - seq_len = paddle.static.data("seq_len", [-1], dtype="int64") - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y, h = rnn2(x_data, sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y = paddle.multiply(y, mask) - - feed_dict = {x_data.name: x, seq_len.name: sequence_length} - - with paddle.static.scope_guard(scope): - y2, h2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h]) - - np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) - def runTest(self): self.test_with_initial_state() self.test_with_zero_state() - self.test_with_input_lengths() class TestGRU(unittest.TestCase): @@ -307,46 +266,6 @@ def test_with_zero_state(self): np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - mp = self.mp.clone() - sp = self.sp - rnn1 = self.rnn1 - rnn2 = self.rnn2 - exe = self.executor - scope = self.scope - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, h1 = rnn1(x, sequence_length=sequence_length) - - with paddle.base.unique_name.guard(): - with paddle.static.program_guard(mp, sp): - x_data = paddle.static.data( - "input", - [-1, -1, 16], - dtype=paddle.framework.get_default_dtype(), - ) - seq_len = paddle.static.data("seq_len", [-1], dtype="int64") - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y, h = rnn2(x_data, sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y = paddle.multiply(y, mask) - - feed_dict = {x_data.name: x, seq_len.name: sequence_length} - - with paddle.static.scope_guard(scope): - y2, h2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h]) - - np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) - def runTest(self): self.test_with_initial_state() self.test_with_zero_state() @@ -476,51 +395,9 @@ def test_with_zero_state(self): np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2, atol=1e-8, rtol=1e-5) - def test_with_input_lengths(self): - mp = self.mp.clone() - sp = self.sp - rnn1 = self.rnn1 - rnn2 = self.rnn2 - exe = self.executor - scope = self.scope - - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) - - y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) - - with paddle.base.unique_name.guard(): - with paddle.static.program_guard(mp, sp): - x_data = paddle.static.data( - "input", - [-1, -1, 16], - dtype=paddle.framework.get_default_dtype(), - ) - seq_len = paddle.static.data("seq_len", [-1], dtype="int64") - mask = paddle.static.nn.sequence_lod.sequence_mask( - seq_len, dtype=paddle.get_default_dtype() - ) - if self.time_major: - mask = paddle.transpose(mask, [1, 0]) - y, (h, c) = rnn2(x_data, sequence_length=seq_len) - mask = paddle.unsqueeze(mask, -1) - y = paddle.multiply(y, mask) - - feed_dict = {x_data.name: x, seq_len.name: sequence_length} - - with paddle.static.scope_guard(scope): - y2, h2, c2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h, c]) - - np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(c1, c2, atol=1e-8, rtol=1e-5) - def runTest(self): self.test_with_initial_state() self.test_with_zero_state() - self.test_with_input_lengths() class TestLSTMWithProjSize(TestLSTM): diff --git a/test/deprecated/rnn/test_rnn_nets_static_deprecated.py b/test/deprecated/rnn/test_rnn_nets_static_deprecated.py new file mode 100644 index 0000000000000..ef58211d65d66 --- /dev/null +++ b/test/deprecated/rnn/test_rnn_nets_static_deprecated.py @@ -0,0 +1,372 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +paddle.set_default_dtype("float64") + + +paddle.enable_static() + +import sys +import unittest + +import numpy as np +from convert import convert_params_for_net_static + +sys.path.append("../../rnn") +from rnn_numpy import GRU, LSTM, SimpleRNN + +bidirectional_list = ["bidirectional", "bidirect"] + + +class TestSimpleRNN(unittest.TestCase): + def __init__( + self, time_major=True, direction="forward", place="cpu", mode="RNN_TANH" + ): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.mode = mode + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + rnn1 = SimpleRNN( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + nonlinearity=self.mode, + ) + + mp = paddle.static.Program() + sp = paddle.static.Program() + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + rnn2 = paddle.nn.SimpleRNN( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + activation=self.mode[4:].lower(), + ) + + exe = paddle.static.Executor(place) + scope = paddle.base.Scope() + with paddle.static.scope_guard(scope): + exe.run(sp) + convert_params_for_net_static(rnn1, rnn2, place) + + self.mp = mp + self.sp = sp + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + self.place = place + self.executor = exe + self.scope = scope + + def test_with_input_lengths(self): + mp = self.mp.clone() + sp = self.sp + rnn1 = self.rnn1 + rnn2 = self.rnn2 + exe = self.executor + scope = self.scope + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, h1 = rnn1(x, sequence_length=sequence_length) + + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + x_data = paddle.static.data( + "input", + [-1, -1, 16], + dtype=paddle.framework.get_default_dtype(), + ) + seq_len = paddle.static.data("seq_len", [-1], dtype="int64") + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y, h = rnn2(x_data, sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y = paddle.multiply(y, mask) + + feed_dict = {x_data.name: x, seq_len.name: sequence_length} + + with paddle.static.scope_guard(scope): + y2, h2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h]) + + np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + rnn1 = GRU( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + + mp = paddle.static.Program() + sp = paddle.static.Program() + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + rnn2 = paddle.nn.GRU( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + ) + + exe = paddle.static.Executor(place) + scope = paddle.base.Scope() + with paddle.static.scope_guard(scope): + exe.run(sp) + convert_params_for_net_static(rnn1, rnn2, place) + + self.mp = mp + self.sp = sp + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + self.place = place + self.executor = exe + self.scope = scope + + def test_with_input_lengths(self): + mp = self.mp.clone() + sp = self.sp + rnn1 = self.rnn1 + rnn2 = self.rnn2 + exe = self.executor + scope = self.scope + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, h1 = rnn1(x, sequence_length=sequence_length) + + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + x_data = paddle.static.data( + "input", + [-1, -1, 16], + dtype=paddle.framework.get_default_dtype(), + ) + seq_len = paddle.static.data("seq_len", [-1], dtype="int64") + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y, h = rnn2(x_data, sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y = paddle.multiply(y, mask) + + feed_dict = {x_data.name: x, seq_len.name: sequence_length} + + with paddle.static.scope_guard(scope): + y2, h2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h]) + + np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestLSTM(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + rnn1 = LSTM( + 16, 32, 2, time_major=self.time_major, direction=self.direction + ) + + mp = paddle.static.Program() + sp = paddle.static.Program() + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + rnn2 = paddle.nn.LSTM( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + ) + + exe = paddle.static.Executor(place) + scope = paddle.base.Scope() + with paddle.static.scope_guard(scope): + exe.run(sp) + convert_params_for_net_static(rnn1, rnn2, place) + + self.mp = mp + self.sp = sp + self.rnn1 = rnn1 + self.rnn2 = rnn2 + + self.place = place + self.executor = exe + self.scope = scope + + def test_with_input_lengths(self): + mp = self.mp.clone() + sp = self.sp + rnn1 = self.rnn1 + rnn2 = self.rnn2 + exe = self.executor + scope = self.scope + + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) + + y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) + + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + x_data = paddle.static.data( + "input", + [-1, -1, 16], + dtype=paddle.framework.get_default_dtype(), + ) + seq_len = paddle.static.data("seq_len", [-1], dtype="int64") + mask = paddle.static.nn.sequence_lod.sequence_mask( + seq_len, dtype=paddle.get_default_dtype() + ) + if self.time_major: + mask = paddle.transpose(mask, [1, 0]) + y, (h, c) = rnn2(x_data, sequence_length=seq_len) + mask = paddle.unsqueeze(mask, -1) + y = paddle.multiply(y, mask) + + feed_dict = {x_data.name: x, seq_len.name: sequence_length} + + with paddle.static.scope_guard(scope): + y2, h2, c2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h, c]) + + np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(c1, c2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_with_input_lengths() + + +class TestLSTMWithProjSize(TestLSTM): + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + place = paddle.set_device(self.place) + rnn1 = LSTM( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + proj_size=8, + ) + + mp = paddle.static.Program() + sp = paddle.static.Program() + with paddle.base.unique_name.guard(): + with paddle.static.program_guard(mp, sp): + rnn2 = paddle.nn.LSTM( + 16, + 32, + 2, + time_major=self.time_major, + direction=self.direction, + proj_size=8, + ) + + exe = paddle.static.Executor(place) + scope = paddle.base.Scope() + with paddle.static.scope_guard(scope): + exe.run(sp) + convert_params_for_net_static(rnn1, rnn2, place) + + self.mp = mp + self.sp = sp + self.rnn1 = rnn1 + self.rnn2 = rnn2 + self.proj_size = 8 + + self.place = place + self.executor = exe + self.scope = scope + + +def load_tests(loader, tests, pattern): + suite = unittest.TestSuite() + devices = ["cpu", "gpu"] if paddle.base.is_compiled_with_cuda() else ["cpu"] + for direction in ["forward", "bidirectional", "bidirect"]: + for time_major in [True, False]: + for device in devices: + for test_class in [ + TestSimpleRNN, + TestLSTM, + TestGRU, + TestLSTMWithProjSize, + ]: + suite.addTest(test_class(time_major, direction, device)) + if test_class == TestSimpleRNN: + suite.addTest( + test_class( + time_major, direction, device, mode="RNN_RELU" + ) + ) + return suite + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/standalone_executor/test_standalone_dist_attr_run_time_set_get.py b/test/deprecated/standalone_executor/test_standalone_dist_attr_run_time_set_get_deprecated.py similarity index 100% rename from test/deprecated/standalone_executor/test_standalone_dist_attr_run_time_set_get.py rename to test/deprecated/standalone_executor/test_standalone_dist_attr_run_time_set_get_deprecated.py diff --git a/test/deprecated/standalone_executor/test_standalone_executor_multi_micro_batch.py b/test/deprecated/standalone_executor/test_standalone_executor_multi_micro_batch_deprecated.py similarity index 100% rename from test/deprecated/standalone_executor/test_standalone_executor_multi_micro_batch.py rename to test/deprecated/standalone_executor/test_standalone_executor_multi_micro_batch_deprecated.py diff --git a/test/deprecated/standalone_executor/test_standalone_executor_plan.py b/test/deprecated/standalone_executor/test_standalone_executor_plan_deprecated.py similarity index 100% rename from test/deprecated/standalone_executor/test_standalone_executor_plan.py rename to test/deprecated/standalone_executor/test_standalone_executor_plan_deprecated.py diff --git a/test/deprecated/standalone_executor/test_standalone_op_priority.py b/test/deprecated/standalone_executor/test_standalone_op_priority_deprecated.py similarity index 100% rename from test/deprecated/standalone_executor/test_standalone_op_priority.py rename to test/deprecated/standalone_executor/test_standalone_op_priority_deprecated.py diff --git a/test/deprecated/standalone_executor/test_standalone_sequentail_run.py b/test/deprecated/standalone_executor/test_standalone_sequentail_run_deprecated.py similarity index 100% rename from test/deprecated/standalone_executor/test_standalone_sequentail_run.py rename to test/deprecated/standalone_executor/test_standalone_sequentail_run_deprecated.py diff --git a/test/deprecated/tokenizer/CMakeLists.txt b/test/deprecated/tokenizer/CMakeLists.txt index 1cf384df660b3..cbab1a270c28f 100644 --- a/test/deprecated/tokenizer/CMakeLists.txt +++ b/test/deprecated/tokenizer/CMakeLists.txt @@ -8,5 +8,6 @@ foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() -set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS - "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_faster_tokenizer_op_deprecated + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_faster_tokenizer_op_deprecated PROPERTIES TIMEOUT 120) diff --git a/test/deprecated/tokenizer/test_faster_tokenizer_op.py b/test/deprecated/tokenizer/test_faster_tokenizer_op_deprecated.py similarity index 100% rename from test/deprecated/tokenizer/test_faster_tokenizer_op.py rename to test/deprecated/tokenizer/test_faster_tokenizer_op_deprecated.py diff --git a/test/distributed_passes/test_fuse_allreduce_split_to_reducescatter_pass.py b/test/distributed_passes/test_fuse_allreduce_split_to_reducescatter_pass.py index b36a5121d2e82..5127589c36396 100644 --- a/test/distributed_passes/test_fuse_allreduce_split_to_reducescatter_pass.py +++ b/test/distributed_passes/test_fuse_allreduce_split_to_reducescatter_pass.py @@ -22,7 +22,7 @@ (%38) = "pd_op.data" () {dtype:(pd_op.DataType)bfloat16,name:"linear_0.tmp_0",persistable:[false],place:(pd_op.Place)Place(gpu:0),shape:(pd_op.IntArray)[4096,1,28672],stop_gradient:[false]} : () -> builtin.tensor<4096x1x28672xbf16> (%48) = "pd_op.data" () {dtype:(pd_op.DataType)bfloat16,name:"input",persistable:[false],place:(pd_op.Place)Place(gpu:0),shape:(pd_op.IntArray)[4096,1,28672],stop_gradient:[false]} : () -> builtin.tensor<4096x1x28672xbf16> (%50) = "pd_op.matmul" (%48, %2) {persistable:[false],stop_gradient:[false],transpose_x:false,transpose_y:true} : (builtin.tensor<4096x1x28672xbf16>, builtin.tensor<8192x28672xbf16>) -> builtin.tensor<4096x1x8192xbf16> - (%57) = "pd_op.c_allreduce_sum_" (%50) {persistable:[false],ring_id:(Int32)36,stop_gradient:[false],use_calc_stream:true,use_model_parallel:true} : (builtin.tensor<4096x1x8192xbf16>) -> builtin.tensor<4096x1x8192xbf16> + (%57) = "pd_op.c_allreduce_sum_" (%50) {event_to_record:"event_7989",events_to_wait:[],execution_stream:"auto_parallel_mp",force_record_event:false,persistable:[false],ring_id:(Int32)36,stop_gradient:[false],use_calc_stream:true,use_model_parallel:true} : (builtin.tensor<4096x1x8192xbf16>) -> builtin.tensor<4096x1x8192xbf16> (%63) = "pd_op.assign" (%57) {persistable:[false],stop_gradient:[false]} : (builtin.tensor<4096x1x8192xbf16>) -> builtin.tensor<4096x1x8192xbf16> (%64) = "pd_op.full" () {dtype:(pd_op.DataType)int32,place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[true],value:(Float)0} : () -> builtin.tensor<1xi32> (%65) = "pd_op.split_with_num" (%63, %64) {num:(Int32)2,persistable:[false],stop_gradient:[false]} : (builtin.tensor<4096x1x8192xbf16>, builtin.tensor<1xi32>) -> vec[builtin.tensor<2048x1x8192xbf16>,builtin.tensor<2048x1x8192xbf16>] diff --git a/test/deprecated/distributed_passes/test_ps_trainer_pass.py b/test/distributed_passes/test_ps_trainer_pass.py similarity index 100% rename from test/deprecated/distributed_passes/test_ps_trainer_pass.py rename to test/distributed_passes/test_ps_trainer_pass.py diff --git a/test/distribution/test_distribution_student_t.py b/test/distribution/test_distribution_student_t.py new file mode 100644 index 0000000000000..900e47cea2428 --- /dev/null +++ b/test/distribution/test_distribution_student_t.py @@ -0,0 +1,274 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import parameterize +import scipy.stats +from distribution import config +from parameterize import ( + TEST_CASE_NAME, + parameterize_cls, + parameterize_func, +) + +import paddle +from paddle.distribution.student_t import StudentT + + +@parameterize.place(config.DEVICES) +@parameterize.parameterize_cls( + (parameterize.TEST_CASE_NAME, 'df', 'loc', 'scale'), + [ + ( + 'one-dim', + 10.0, + 1.0, + 2.0, + ), + ( + 'multi-dim', + parameterize.xrand((2, 1), dtype='float32', min=4, max=30), + parameterize.xrand((2, 3), dtype='float32', min=1, max=10), + parameterize.xrand((2, 3), dtype='float32', min=0.1, max=3), + ), + ( + 'multi-dim2', + parameterize.xrand((2, 1), dtype='float64', min=4, max=30), + parameterize.xrand((2, 3), dtype='float64', min=-10, max=-1), + parameterize.xrand((2, 3), dtype='float64', min=0.1, max=3), + ), + ], +) +class TestStudentT(unittest.TestCase): + def setUp(self): + df = ( + self.df if isinstance(self.df, float) else paddle.to_tensor(self.df) + ) + loc = ( + self.loc + if isinstance(self.loc, float) + else paddle.to_tensor(self.loc) + ) + scale = ( + self.scale + if isinstance(self.scale, float) + else paddle.to_tensor(self.scale) + ) + self._dist = StudentT(df, loc, scale) + + def test_mean(self): + mean = self._dist.mean + target_dtype = ( + "float32" if isinstance(self.df, float) else self.df.dtype + ) + self.assertEqual(mean.numpy().dtype, target_dtype) + np.testing.assert_allclose( + mean, + self._np_mean(), + rtol=config.RTOL.get(str(target_dtype)), + atol=config.ATOL.get(str(target_dtype)), + ) + + def test_variance(self): + var = self._dist.variance + target_dtype = ( + "float32" if isinstance(self.df, float) else self.df.dtype + ) + self.assertEqual(var.numpy().dtype, target_dtype) + np.testing.assert_allclose( + var, + self._np_variance(), + rtol=config.RTOL.get(str(target_dtype)), + atol=config.ATOL.get(str(target_dtype)), + ) + + def test_entropy(self): + entropy = self._dist.entropy() + target_dtype = ( + "float32" if isinstance(self.df, float) else self.df.dtype + ) + self.assertEqual(entropy.numpy().dtype, target_dtype) + np.testing.assert_allclose( + entropy, + self._np_entropy(), + rtol=config.RTOL.get(str(target_dtype)), + atol=config.ATOL.get(str(target_dtype)), + ) + + def test_sample(self): + sample_shape = () + samples = self._dist.sample(sample_shape) + self.assertEqual( + tuple(samples.shape), + sample_shape + self._dist.batch_shape + self._dist.event_shape, + ) + + sample_shape = (10000,) + samples = self._dist.sample(sample_shape) + sample_mean = samples.mean(axis=0) + sample_variance = samples.var(axis=0) + + # Tolerance value 0.1 is empirical value which is consistent with + # TensorFlow + np.testing.assert_allclose( + sample_mean, self._dist.mean, atol=0, rtol=0.10 + ) + # Tolerance value 0.1 is empirical value which is consistent with + # TensorFlow + np.testing.assert_allclose( + sample_variance, self._dist.variance, atol=0, rtol=0.10 + ) + + def _np_variance(self): + if isinstance(self.df, np.ndarray) and self.df.dtype == np.float32: + df = self.df.astype("float64") + else: + df = self.df + if isinstance(self.loc, np.ndarray) and self.loc.dtype == np.float32: + loc = self.loc.astype("float64") + else: + loc = self.loc + if ( + isinstance(self.scale, np.ndarray) + and self.scale.dtype == np.float32 + ): + scale = self.scale.astype("float64") + else: + scale = self.scale + return scipy.stats.t.var(df, loc, scale) + + def _np_mean(self): + if isinstance(self.df, np.ndarray) and self.df.dtype == np.float32: + df = self.df.astype("float64") + else: + df = self.df + if isinstance(self.loc, np.ndarray) and self.loc.dtype == np.float32: + loc = self.loc.astype("float64") + else: + loc = self.loc + if ( + isinstance(self.scale, np.ndarray) + and self.scale.dtype == np.float32 + ): + scale = self.scale.astype("float64") + else: + scale = self.scale + return scipy.stats.t.mean(df, loc, scale) + + def _np_entropy(self): + if isinstance(self.df, np.ndarray) and self.df.dtype == np.float32: + df = self.df.astype("float64") + else: + df = self.df + if isinstance(self.loc, np.ndarray) and self.loc.dtype == np.float32: + loc = self.loc.astype("float64") + else: + loc = self.loc + if ( + isinstance(self.scale, np.ndarray) + and self.scale.dtype == np.float32 + ): + scale = self.scale.astype("float64") + else: + scale = self.scale + return scipy.stats.t.entropy(df, loc, scale) + + +@parameterize.place(config.DEVICES) +@parameterize.parameterize_cls( + (parameterize.TEST_CASE_NAME, 'df', 'loc', 'scale', 'value'), + [ + ( + 'one-dim', + 10.0, + 0.0, + 1.0, + np.array(3.3).astype("float32"), + ), + ( + 'value-broadcast-shape', + parameterize.xrand((2, 1), dtype='float64', min=4, max=30), + parameterize.xrand((2, 1), dtype='float64', min=-10, max=10), + parameterize.xrand((2, 1), dtype='float64', min=0.1, max=5), + parameterize.xrand((2, 4), dtype='float64', min=-10, max=10), + ), + ], +) +class TestStudentTProbs(unittest.TestCase): + def setUp(self): + df = ( + self.df if isinstance(self.df, float) else paddle.to_tensor(self.df) + ) + loc = ( + self.loc + if isinstance(self.loc, float) + else paddle.to_tensor(self.loc) + ) + scale = ( + self.scale + if isinstance(self.scale, float) + else paddle.to_tensor(self.scale) + ) + self._dist = StudentT(df, loc, scale) + + def test_prob(self): + target_dtype = ( + "float32" if isinstance(self.df, float) else self.df.dtype + ) + np.testing.assert_allclose( + self._dist.prob(paddle.to_tensor(self.value)), + scipy.stats.t.pdf(self.value, self.df, self.loc, self.scale), + rtol=config.RTOL.get(str(target_dtype)), + atol=config.ATOL.get(str(target_dtype)), + ) + + def test_log_prob(self): + target_dtype = ( + "float32" if isinstance(self.df, float) else self.df.dtype + ) + np.testing.assert_allclose( + self._dist.log_prob(paddle.to_tensor(self.value)), + scipy.stats.t.logpdf(self.value, self.df, self.loc, self.scale), + rtol=config.RTOL.get(str(target_dtype)), + atol=config.ATOL.get(str(target_dtype)), + ) + + +@parameterize.place(config.DEVICES) +@parameterize_cls([TEST_CASE_NAME], ['StudentTTestError']) +class StudentTTestError(unittest.TestCase): + def setUp(self): + paddle.disable_static(self.place) + + @parameterize_func( + [ + (-5.0, 0.0, 1.0, ValueError), # negative df + (5.0, 0.0, -1.0, ValueError), # negative scale + ] + ) + def test_bad_parameter(self, df, loc, scale, error): + with paddle.base.dygraph.guard(self.place): + self.assertRaises(error, StudentT, df, loc, scale) + + @parameterize_func([(10,)]) # not sequence object sample shape + def test_bad_sample_shape(self, shape): + with paddle.base.dygraph.guard(self.place): + t = StudentT(5.0, 0.0, 1.0) + self.assertRaises(TypeError, t.sample, shape) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/dygraph_to_static/test_mnist.py b/test/dygraph_to_static/test_mnist.py index 4c34ae320abad..8f9abe65638c6 100644 --- a/test/dygraph_to_static/test_mnist.py +++ b/test/dygraph_to_static/test_mnist.py @@ -26,6 +26,8 @@ import paddle from paddle import base +from paddle.framework import use_pir_api +from paddle.jit.pir_translated_layer import PIR_INFER_MODEL_SUFFIX from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn import Linear from paddle.optimizer import Adam @@ -227,16 +229,15 @@ def train(self, to_static=False): prediction, acc, avg_loss = mnist(img, label) loss_data.append(float(avg_loss)) # new save load check - # TODO(@xiongkun): enable this after new save load is supported in pir. - if not paddle.framework.use_pir_api(): - self.check_jit_save_load( - mnist, - [dy_x_data], - [img, label], - to_static, - prediction, - [img.name], - ) + self.check_jit_save_load( + mnist, + [dy_x_data], + [img, label], + to_static, + prediction, + 0, + [img.name], + ) break return loss_data @@ -247,6 +248,7 @@ def check_jit_save_load( input_spec, to_static, gt_out, + gt_out_index, input_names_after_prune, ): if to_static: @@ -255,13 +257,16 @@ def check_jit_save_load( ) model_save_dir = os.path.join(self.temp_dir.name, 'inference') model_save_prefix = os.path.join(model_save_dir, 'mnist') - model_filename = "mnist" + INFER_MODEL_SUFFIX + MODEL_SUFFIX = ( + PIR_INFER_MODEL_SUFFIX if use_pir_api() else INFER_MODEL_SUFFIX + ) + model_filename = "mnist" + MODEL_SUFFIX params_filename = "mnist" + INFER_PARAMS_SUFFIX paddle.jit.save( layer=model, path=model_save_prefix, input_spec=input_spec, - output_spec=[gt_out], + output_spec=[gt_out_index] if use_pir_api() else [gt_out], input_names_after_prune=input_names_after_prune, ) # load in static graph mode @@ -278,15 +283,16 @@ def check_jit_save_load( np.testing.assert_allclose( gt_out.numpy(), dygraph_infer_out, rtol=1e-05 ) - # load in Paddle-Inference - predictor_infer_out = ( - self.predictor_load_and_run_inference_analysis( - model_save_dir, model_filename, params_filename, inputs + if not use_pir_api(): + # load in Paddle-Inference + predictor_infer_out = ( + self.predictor_load_and_run_inference_analysis( + model_save_dir, model_filename, params_filename, inputs + ) + ) + np.testing.assert_allclose( + gt_out.numpy(), predictor_infer_out, rtol=1e-05 ) - ) - np.testing.assert_allclose( - gt_out.numpy(), predictor_infer_out, rtol=1e-05 - ) def jit_load_and_run_inference_static( self, model_path, model_filename, params_filename, inputs diff --git a/test/dygraph_to_static/test_reinforcement_learning.py b/test/dygraph_to_static/test_reinforcement_learning.py index ade9ba14659d2..fca6e89136353 100644 --- a/test/dygraph_to_static/test_reinforcement_learning.py +++ b/test/dygraph_to_static/test_reinforcement_learning.py @@ -16,7 +16,7 @@ import math import unittest -import gym +import gymnasium as gym import numpy as np from dygraph_to_static_utils import ( Dy2StTestBase, diff --git a/test/dygraph_to_static/test_typehint.py b/test/dygraph_to_static/test_typehint.py index fd4dbacc6ad6d..b84ce4f332a91 100644 --- a/test/dygraph_to_static/test_typehint.py +++ b/test/dygraph_to_static/test_typehint.py @@ -35,15 +35,15 @@ def function(x: A) -> A: def fn_annotation_assign_with_value(x: paddle.Tensor): if x: - y: List["paddle.Tensor"] = [x + 1] + y: List[paddle.Tensor] = [x + 1] else: - y: List["paddle.Tensor"] = [x - 1] + y: List[paddle.Tensor] = [x - 1] return y def fn_annotation_assign_without_value(x: paddle.Tensor): if x: - y: List["paddle.Tensor"] + y: List[paddle.Tensor] y = [x + 1] else: y = [x - 1] diff --git a/test/deprecated/fft/test_spectral_op.py b/test/fft/test_spectral_op.py similarity index 99% rename from test/deprecated/fft/test_spectral_op.py rename to test/fft/test_spectral_op.py index 2596fb13eab1c..94168193f468d 100644 --- a/test/deprecated/fft/test_spectral_op.py +++ b/test/fft/test_spectral_op.py @@ -14,6 +14,7 @@ import re import sys +import unittest import numpy as np from op_test import OpTest @@ -311,3 +312,7 @@ def test_check_grad(self): ["X"], "Out", ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/ipu/distributed/run_dist_ipu.sh b/test/ipu/distributed/run_dist_ipu.sh index 1ab804e626c63..e7deb58c28750 100644 --- a/test/ipu/distributed/run_dist_ipu.sh +++ b/test/ipu/distributed/run_dist_ipu.sh @@ -1,13 +1,13 @@ #!/bin/bash - + # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/ir/inference/quant_dequant_test.py b/test/ir/inference/quant_dequant_test.py index c176e802a525c..85724a2cc7df2 100644 --- a/test/ir/inference/quant_dequant_test.py +++ b/test/ir/inference/quant_dequant_test.py @@ -22,9 +22,10 @@ import paddle from paddle import base -from paddle.base import Program, Variable, core +from paddle.base import core from paddle.base.core import AnalysisConfig, create_paddle_predictor from paddle.base.framework import IrGraph +from paddle.static import Variable from paddle.static.io import append_fetch_ops, prepend_feed_ops from paddle.static.quantization import ( AddQuantDequantPass, @@ -39,10 +40,10 @@ class QuantDequantTest(unittest.TestCase): def __init__(self, methodName='runTest'): super().__init__(methodName) paddle.enable_static() - self.main_program = base.Program() - self.startup_program = base.Program() - self.test_main_program = base.Program() - self.test_startup_program = base.Program() + self.main_program = paddle.static.Program() + self.startup_program = paddle.static.Program() + self.test_main_program = paddle.static.Program() + self.test_startup_program = paddle.static.Program() self.feeds = None self.fetch_list = None self.enable_mkldnn = False @@ -62,10 +63,9 @@ def __init__(self, methodName='runTest'): # from Paddle release2.1 def _normalize_program(self, program, feed_vars, fetch_vars): - if not isinstance(program, Program): + if not isinstance(program, paddle.static.Program): raise TypeError( - "program type must be `base.Program`, but received `%s`" - % type(program) + f"program type must be `paddle.static.Program`, but received `{type(program)}`" ) if not isinstance(feed_vars, list): feed_vars = [feed_vars] @@ -127,7 +127,7 @@ def _save_models( if var.name in feeded_var_names: feeded_vars.append(var) - with base.scope_guard(scope): + with paddle.static.scope_guard(scope): paddle.static.io.save_inference_model( dirname, feeded_vars, @@ -155,7 +155,7 @@ def _get_paddle_outs(self, feed, fetch_list, executor, program, scope): ''' Return PaddlePaddle outputs. ''' - with base.scope_guard(scope): + with paddle.static.scope_guard(scope): outs = executor.run( program=program, feed=feed, @@ -245,12 +245,12 @@ def check_output_with_option( or disable TensorRT, enable MKLDNN or disable MKLDNN are all the same. ''' - place = base.CUDAPlace(0) if use_gpu else base.CPUPlace() - executor = base.Executor(place) - scope = base.Scope() + place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() + executor = paddle.static.Executor(place) + scope = paddle.static.Scope() device = "GPU" if use_gpu else "CPU" - with base.scope_guard(scope): + with paddle.static.scope_guard(scope): executor.run(self.startup_program) executor.run(self.test_startup_program) main_graph = IrGraph(core.Graph(self.main_program.desc), for_test=False) @@ -274,11 +274,11 @@ def check_output_with_option( scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place) scale_training_pass.apply(main_graph) - build_strategy = base.BuildStrategy() + build_strategy = paddle.static.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False - binary = base.CompiledProgram(main_graph.graph) + binary = paddle.static.CompiledProgram(main_graph.graph) iters = 10 batch_size = 1 @@ -287,7 +287,7 @@ def check_output_with_option( batch_size=batch_size, ) feeder = base.DataFeeder(feed_list=[self.data, self.label], place=place) - with base.scope_guard(scope): + with paddle.static.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = executor.run( @@ -307,7 +307,7 @@ def check_output_with_option( self.main_program = test_graph.to_program() - with base.scope_guard(scope): + with paddle.static.scope_guard(scope): self.main_program = self._normalize_program( self.main_program, self.data, self.fetch_list ) @@ -450,6 +450,6 @@ def __init__( self.disable_trt_plugin_fp16 = disable_trt_plugin_fp16 def quant_dequant(self): - place = base.CPUPlace() - exe = base.Executor(place) - scope = base.Scope() + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + scope = paddle.static.Scope() diff --git a/test/ir/pir/CMakeLists.txt b/test/ir/pir/CMakeLists.txt index e80898846c557..29df19c523d88 100644 --- a/test/ir/pir/CMakeLists.txt +++ b/test/ir/pir/CMakeLists.txt @@ -42,3 +42,4 @@ py_test_modules( FLAGS_pir_subgraph_saving_dir=${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(fused_pass) +add_subdirectory(translator) diff --git a/test/ir/pir/cinn/CMakeLists.txt b/test/ir/pir/cinn/CMakeLists.txt index a8d99e7170654..6261e22868264 100644 --- a/test/ir/pir/cinn/CMakeLists.txt +++ b/test/ir/pir/cinn/CMakeLists.txt @@ -34,8 +34,8 @@ if(WITH_GPU) PYTHONPATH=${CMAKE_BINARY_DIR}:${CMAKE_BINARY_DIR}/python/:$ENV{PYTHONPATH} FLAGS_enable_pir_api=1 FLAGS_prim_all=True FLAGS_cinn_new_group_scheduler=1 FLAGS_cinn_bucket_compile=1 - FLAGS_support_reduce_stride_read=1 FLAGS_group_schedule_tiling_first=1 - ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_cinn_sub_graph.py + FLAGS_group_schedule_tiling_first=1 ${PYTHON_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/test_cinn_sub_graph.py WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_tests_properties(test_cinn_sub_graph_stride_read PROPERTIES LABELS "RUN_TYPE=CINN") diff --git a/test/ir/pir/cinn/performance/CMakeLists.txt b/test/ir/pir/cinn/performance/CMakeLists.txt index 9bbb186614eb6..a8145d0c4083d 100644 --- a/test/ir/pir/cinn/performance/CMakeLists.txt +++ b/test/ir/pir/cinn/performance/CMakeLists.txt @@ -20,21 +20,6 @@ if(WITH_GPU) WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_tests_properties(${cinn_pir_test_name} PROPERTIES LABELS "RUN_TYPE=CINN") - - add_test( - NAME ${cinn_pir_test_name}_stride_read - COMMAND - ${CMAKE_COMMAND} -E env - PYTHONPATH=${CMAKE_BINARY_DIR}:${CMAKE_BINARY_DIR}/python/:$ENV{PYTHONPATH} - FLAGS_check_infer_symbolic=1 FLAGS_enable_pir_api=1 - FLAGS_cinn_bucket_compile=True FLAGS_prim_enable_dynamic=true - FLAGS_pir_apply_shape_optimization_pass=1 - FLAGS_group_schedule_tiling_first=1 FLAGS_cinn_new_group_scheduler=1 - FLAGS_support_reduce_stride_read=1 ${PYTHON_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/${cinn_pir_test_name}.py - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) - set_tests_properties(${cinn_pir_test_name}_stride_read - PROPERTIES LABELS "RUN_TYPE=CINN") endforeach() endif() diff --git a/test/ir/pir/cinn/sub_graphs/base.py b/test/ir/pir/cinn/sub_graphs/base.py index a11ffe4f9e1bd..a0ceee03095db 100644 --- a/test/ir/pir/cinn/sub_graphs/base.py +++ b/test/ir/pir/cinn/sub_graphs/base.py @@ -30,7 +30,7 @@ def setUp(self): self.atol = 1e-6 self.train_atol = 1e-6 self.with_precision_compare = True - self.with_train = False # 本个pr中默认为false,下个增量pr中改为默认true + self.with_train = True # 本个pr中默认为false,下个增量pr中改为默认true # override customized settting self.init() if self.inputs: diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_0.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_0.py index e5d86d0e40f53..228465812c587 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_0.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_0.py @@ -135,7 +135,6 @@ def init(self): paddle.rand(shape=[22, 512, 7, 7], dtype=paddle.float32), ) self.net = LayerCase - self.with_train = True def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_1.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_1.py index 10ed97211646c..d40e635bca9ed 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_1.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_1.py @@ -62,6 +62,7 @@ def init(self): paddle.rand(shape=[10, 512, 7, 7], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_10.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_10.py index c151d478a6ac6..b871017d1e038 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_10.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_10.py @@ -75,6 +75,7 @@ def init(self): paddle.rand(shape=[10, 36, 28, 28], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_11.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_11.py index 464ab6166a0fa..83fd4bff996bc 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_11.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_11.py @@ -65,6 +65,7 @@ def init(self): paddle.rand(shape=[10, 1280, 1, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_13.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_13.py index 24d79ccfc8e94..dd91f88558b59 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_13.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_13.py @@ -60,6 +60,7 @@ def init(self): paddle.rand(shape=[10, 2048, 7, 7], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_14.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_14.py index 167b10dd6df2f..7708b6fb6c2bb 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_14.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_14.py @@ -72,6 +72,7 @@ def init(self): paddle.rand(shape=[22, 128, 56, 56], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_15.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_15.py index c5050e5cb9d55..4d1ac693615d3 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_15.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_15.py @@ -72,6 +72,7 @@ def init(self): paddle.rand(shape=[10, 122, 28, 28], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_16.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_16.py index 5fad58c5de16b..3e6696a5f23c9 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_16.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_16.py @@ -115,6 +115,7 @@ def init(self): paddle.rand(shape=[22, 28, 56, 56], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_17.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_17.py index 5dc0d861cc847..62ef8a2dbe38c 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_17.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_17.py @@ -60,6 +60,7 @@ def init(self): paddle.rand(shape=[22, 2048, 7, 7], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_18.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_18.py index b4010043304be..e8f4772b757a5 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_18.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_18.py @@ -68,6 +68,7 @@ def init(self): paddle.rand(shape=[22, 1536, 8, 8], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False # NOTE output mismatch with prim diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_2.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_2.py index d3faccc973b03..883067279e417 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_2.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_2.py @@ -74,7 +74,6 @@ def init(self): paddle.rand(shape=[43, 256, 56, 56], dtype=paddle.float32), ) self.net = LayerCase - self.with_train = True def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_20.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_20.py index 57dcec3e56353..82523d9dd29e4 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_20.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_20.py @@ -77,6 +77,7 @@ def init(self): paddle.rand(shape=[86, 192], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_21.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_21.py index 49eea1bd4cbfd..b19151557a65a 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_21.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_21.py @@ -108,6 +108,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[86, 198, 192], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False # NOTE output mismatch with prim diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_23.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_23.py index 83ddc2b51b2b8..b37c912b61f5d 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_23.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_23.py @@ -60,6 +60,7 @@ def init(self): paddle.rand(shape=[11, 24, 56, 56], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_25.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_25.py index b434f440365f6..d6be0ea181c59 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_25.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_25.py @@ -68,6 +68,7 @@ def init(self): paddle.rand(shape=[11, 1280, 7, 7], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_28.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_28.py index 6a25c112a0b47..5387f9ee37177 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_28.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_28.py @@ -68,6 +68,7 @@ def init(self): paddle.rand(shape=[10, 320, 8, 8], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_29.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_29.py index 85b2207fd1ee1..9283f453e46ae 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_29.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_29.py @@ -68,6 +68,7 @@ def init(self): paddle.rand(shape=[10, 2048, 10, 10], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_3.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_3.py index 23b9ec755c7be..9c538dea0d694 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_3.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_3.py @@ -89,6 +89,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[16, 49], dtype=paddle.int64), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_31.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_31.py index 81d18df09b741..eee47cf931cd9 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_31.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_31.py @@ -66,6 +66,7 @@ def init(self): paddle.rand(shape=[22, 288, 14, 14], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_32.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_32.py index 7586bd7c8cd37..2bed2bfc9a742 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_32.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_32.py @@ -54,6 +54,7 @@ def init(self): paddle.rand(shape=[22, 1024, 1, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_33.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_33.py index 0d50f420cdc22..55b168f5e2ade 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_33.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_33.py @@ -84,6 +84,7 @@ def init(self): paddle.rand(shape=[10, 256, 14, 14], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-5 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_34.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_34.py index 7466135585abd..a8d09423a95eb 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_34.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_34.py @@ -57,6 +57,7 @@ def init(self): paddle.rand(shape=[10, 32, 56, 56], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_35.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_35.py index 7eb05d010bd2f..8c70aa1f75ae2 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_35.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_35.py @@ -84,6 +84,7 @@ def init(self): paddle.rand(shape=[4, 3, 384, 384], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_36.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_36.py index 03f141b241bdc..6abd8655d98f6 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_36.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_36.py @@ -70,6 +70,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[6, 9216, 96], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_38.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_38.py index 431650d6bdbef..828f15fa32c3b 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_38.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_38.py @@ -48,6 +48,7 @@ def init(self): paddle.rand(shape=[4, 48, 96, 96], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_39.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_39.py index ddd3cdf8c3eda..44431cb437d82 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_39.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_39.py @@ -46,6 +46,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[12, 288, 192], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_4.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_4.py index 9d419dbb38959..f03c8322cce70 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_4.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_4.py @@ -51,6 +51,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[22, 196, 128], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_41.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_41.py index 352f81b791d41..d3d09e75e4f70 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_41.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_41.py @@ -66,6 +66,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[2], dtype=paddle.int32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_42.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_42.py index 0e8a6574081a4..60d3846377987 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_42.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_42.py @@ -114,6 +114,7 @@ def init(self): paddle.rand(shape=[2, 4], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_43.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_43.py index 0104a18d75d60..9440b6cb9dbd5 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_43.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_43.py @@ -258,6 +258,7 @@ def init(self): paddle.rand(shape=[1, 2048, 24, 36], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-5 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_44.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_44.py index 06c021953fd1e..34416aea9ae97 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_44.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_44.py @@ -143,6 +143,7 @@ def init(self): paddle.rand(shape=[1, 100, 256], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-8 self.with_cinn = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_45.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_45.py index 8c9802242f436..d2f6befdc9147 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_45.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_45.py @@ -70,6 +70,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[1, 4], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_46.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_46.py index 6e45b88c332da..19ec352bcf5d4 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_46.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_46.py @@ -62,6 +62,7 @@ def init(self): paddle.rand(shape=[1, 80, 50, 50], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_47.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_47.py index 72599e85f742f..5096d5f366b63 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_47.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_47.py @@ -47,6 +47,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[2], dtype=paddle.int64), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_48.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_48.py index eaa9d3e6b9232..7fc4b64f1466f 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_48.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_48.py @@ -190,6 +190,7 @@ def init(self): paddle.rand(shape=[1, 625, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-5 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_49.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_49.py index 34ecd19552529..4367e45015b23 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_49.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_49.py @@ -66,6 +66,7 @@ def init(self): paddle.rand(shape=[1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_5.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_5.py index 7c9639d906cda..181d06fffb4c3 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_5.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_5.py @@ -46,6 +46,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[22, 16, 384], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_51.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_51.py index 10ab5da982012..152dc5b2ce483 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_51.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_51.py @@ -90,6 +90,7 @@ def init(self): paddle.rand(shape=[1, 4, 64, 64], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_52.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_52.py index ed08605e070d1..e1a3774b1be35 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_52.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_52.py @@ -94,6 +94,7 @@ def init(self): paddle.rand(shape=[91], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_54.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_54.py index cf04f914d15a9..7bdef30c7d243 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_54.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_54.py @@ -117,6 +117,7 @@ def init(self): paddle.rand(shape=[1, 96, 128, 128], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_55.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_55.py index 7d065da0bc99b..9a623a7afa130 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_55.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_55.py @@ -78,6 +78,7 @@ def init(self): paddle.rand(shape=[1, 192, 32, 32], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_56.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_56.py index 79d9a9c15cf9e..4646923191e60 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_56.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_56.py @@ -74,6 +74,7 @@ def init(self): paddle.rand(shape=[24], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_57.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_57.py index a34e30dc687e2..d297a19fa0932 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_57.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_57.py @@ -42,6 +42,7 @@ def init(self): self.input_specs = [] self.inputs = () self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_59.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_59.py index 12dc85dbf3d3f..072c8077b7295 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_59.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_59.py @@ -95,6 +95,7 @@ def init(self): paddle.rand(shape=[1, 44, 32, 32], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_6.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_6.py index f51b3a846151d..89a1c19ed53a7 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_6.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_6.py @@ -47,6 +47,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[10, 196, 640], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_61.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_61.py index 21332c862ab22..41be02a221bd4 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_61.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_61.py @@ -91,6 +91,7 @@ def init(self): paddle.rand(shape=[1, 4], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-5 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_62.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_62.py index d4a2234509d1c..dd6069d9f9555 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_62.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_62.py @@ -71,6 +71,7 @@ def init(self): paddle.rand(shape=[1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_63.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_63.py index 5456431c96fea..6a6f430bd82be 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_63.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_63.py @@ -96,6 +96,7 @@ def init(self): paddle.rand(shape=[171888, 4], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_64.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_64.py index 9ec76729c00e0..820f7af48178e 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_64.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_64.py @@ -72,6 +72,7 @@ def init(self): paddle.rand(shape=[512, 256, 7, 7], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_65.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_65.py index 18af525df5c4c..e7e636628d5f1 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_65.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_65.py @@ -55,6 +55,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[2, 2002], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_66.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_66.py index 1c3d72c455056..033202891b2ed 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_66.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_66.py @@ -64,6 +64,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[2, 1788], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_67.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_67.py index 75fb8ca7cfb38..74513aac91b5b 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_67.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_67.py @@ -134,6 +134,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[1], dtype=paddle.int32), ) self.net = LayerCase + self.with_train = False self.with_cinn = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_68.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_68.py index d3571d898798f..67df4b8fba497 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_68.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_68.py @@ -206,6 +206,7 @@ def init(self): paddle.rand(shape=[528, 4], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_69.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_69.py index c1c4b94929310..4e64e3aea0bbc 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_69.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_69.py @@ -65,6 +65,7 @@ def init(self): paddle.rand(shape=[1, 171888, 4], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_7.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_7.py index f4236d7664c59..bdc2d7b052c77 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_7.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_7.py @@ -91,6 +91,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[49, 49], dtype=paddle.int64), ) self.net = LayerCase + self.with_train = False self.with_cinn = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_70.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_70.py index 30b04988e601f..a483c47e1e05f 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_70.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_70.py @@ -61,6 +61,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[2], dtype=paddle.int32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_72.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_72.py index ff048a21337da..489eab05cf04e 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_72.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_72.py @@ -143,6 +143,7 @@ def init(self): self.input_specs = [] self.inputs = () self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_73.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_73.py index ea4a9cd49726d..a75d51a21cd1e 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_73.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_73.py @@ -98,6 +98,7 @@ def init(self): paddle.rand(shape=[2], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_cinn = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_74.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_74.py index a069b9bc3874b..03fcab9ff9f00 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_74.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_74.py @@ -75,6 +75,7 @@ def init(self): paddle.rand(shape=[2], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_precision_compare = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_75.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_75.py index 41204b7c15d2e..a20fbaf33e4e7 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_75.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_75.py @@ -96,6 +96,7 @@ def init(self): paddle.rand(shape=[1, 3, 544, 736], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_77.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_77.py index bb22fb38c693a..4ad52c6aa976c 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_77.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_77.py @@ -209,6 +209,7 @@ def init(self): paddle.rand(shape=[1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_78.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_78.py index af4320f4609ef..f987f5a334ca6 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_78.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_78.py @@ -125,6 +125,7 @@ def init(self): paddle.rand(shape=[1, 256, 13, 19], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_79.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_79.py index 96d9de9b9c2b6..1bf2af665a2e2 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_79.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_79.py @@ -134,6 +134,7 @@ def init(self): paddle.rand(shape=[1, 3, 96, 96, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_8.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_8.py index 6340bf5a4d451..656e522137b4b 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_8.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_8.py @@ -47,6 +47,7 @@ def init(self): paddle.rand(shape=[22, 128, 14, 14], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_80.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_80.py index 2fe8b3f007e86..4a34d06b5b4af 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_80.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_80.py @@ -125,6 +125,7 @@ def init(self): paddle.rand(shape=[1, 3, 48, 48, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_81.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_81.py index dc0d1e5126259..acbe1eae0ae60 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_81.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_81.py @@ -80,6 +80,7 @@ def init(self): paddle.rand(shape=[1, 80, 44, 44], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_82.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_82.py index 65ab9b68b7b6d..9761629a802e3 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_82.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_82.py @@ -173,6 +173,7 @@ def init(self): paddle.rand(shape=[2541, 2], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.with_cinn = False # NOTE cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_83.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_83.py index 2a1a527317b91..889e5b0e9dfde 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_83.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_83.py @@ -96,6 +96,7 @@ def init(self): self.input_specs = [] self.inputs = () self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_84.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_84.py index 595163ad073e1..a20bac9133a8f 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_84.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_84.py @@ -81,6 +81,7 @@ def init(self): paddle.rand(shape=[1, 2541, 68], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_85.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_85.py index 9ef4bf92bc473..80137072f1c23 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_85.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_85.py @@ -61,6 +61,7 @@ def init(self): paddle.rand(shape=[16384, 5], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_86.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_86.py index 698760309d8ff..47221f58d3ca3 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_86.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_86.py @@ -247,6 +247,7 @@ def init(self): paddle.rand(shape=[1, 2048, 1, 1], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_87.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_87.py index b44fdc4c28783..4e23ab81535de 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_87.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_87.py @@ -201,6 +201,7 @@ def init(self): paddle.rand(shape=[1, 144, 21, 32], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_88.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_88.py index 425537e634f25..0ed66f4e89e8d 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_88.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_88.py @@ -79,6 +79,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[1, 500], dtype=paddle.int32), ) self.net = LayerCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_89.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_89.py index ab1503ef63afa..21faaf7dcad30 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_89.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_89.py @@ -91,6 +91,7 @@ def init(self): paddle.rand(shape=[1, 256, 28, 40], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False # if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_9.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_9.py index e8919aec6e379..7dd68051a5efa 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_9.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_9.py @@ -90,6 +90,7 @@ def init(self): paddle.randint(low=0, high=10, shape=[49, 196], dtype=paddle.int64), ) self.net = LayerCase + self.with_train = False self.with_cinn = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_90.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_90.py index e3f28f9775a69..85f937d265d5b 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_90.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_90.py @@ -65,6 +65,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[12], dtype=paddle.float32),) self.net = LayerCase + self.with_train = False def set_flags(self): # NOTE(Aurelius84): cinn_op.pool2d only support pool_type='avg' under adaptive=True diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_adaptive_avg_pool2d.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_adaptive_avg_pool2d.py index d4d06895c49ae..1a166fad740a7 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_adaptive_avg_pool2d.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_adaptive_avg_pool2d.py @@ -48,6 +48,7 @@ def init(self): paddle.rand(shape=[22, 480, 7, 7], dtype=paddle.float32), ) self.net = AdaptiveAvgPool2dCase + self.with_train = False # NOTE prim + cinn lead to error diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_add.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_add.py index c9cf656ad4a0c..9434d1c189373 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_add.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_add.py @@ -54,6 +54,7 @@ def init(self): paddle.rand(shape=[22, 196, 128], dtype=paddle.float32), ) self.net = AddCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_add_n.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_add_n.py index c488de14d12be..18cf5c72f2a50 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_add_n.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_add_n.py @@ -104,6 +104,7 @@ def init(self): paddle.rand(shape=[1], dtype=paddle.float32), ) self.net = AddNCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_avg_pool2d.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_avg_pool2d.py index 0a40ca5079931..957102539eb07 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_avg_pool2d.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_avg_pool2d.py @@ -56,6 +56,7 @@ def init(self): paddle.rand(shape=[22, 128, 56, 56], dtype=paddle.float32), ) self.net = AvgPool2dCase + self.with_train = False self.atol = 1e-8 self.with_cinn = False diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_chunk.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_chunk.py index 36dae471d0d7d..35e12f767dae7 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_chunk.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_chunk.py @@ -46,6 +46,7 @@ def init(self): paddle.rand(shape=[10, 2304, 192], dtype=paddle.float32), ) self.net = ChunkCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_concat.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_concat.py index f65682e4b0ae9..b298c0870d4bc 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_concat.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_concat.py @@ -54,6 +54,7 @@ def init(self): paddle.rand(shape=[145, 12, 112, 112], dtype=paddle.float32), ) self.net = ConcatCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_conv_nd.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_conv_nd.py index c189750c9f040..5bdd5b1622a34 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_conv_nd.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_conv_nd.py @@ -63,6 +63,7 @@ def init(self): paddle.rand(shape=[22, 64, 56, 56], dtype=paddle.float32), ) self.net = ConvNdCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_linear.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_linear.py index 381eb461b6328..c4a358ad4b0bf 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_linear.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_linear.py @@ -54,6 +54,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[10, 64], dtype=paddle.float32),) self.net = LinearCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_max_pool2d.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_max_pool2d.py index 5cd643fc5ef4a..96d2bd54868d1 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_max_pool2d.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_max_pool2d.py @@ -55,6 +55,7 @@ def init(self): paddle.rand(shape=[22, 64, 112, 112], dtype=paddle.float32), ) self.net = MaxPool2dCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_mul_method.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_mul_method.py index 1e56b482d3736..fa389063a0513 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_mul_method.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_mul_method.py @@ -54,6 +54,7 @@ def init(self): paddle.rand(shape=[22, 1500, 14, 14], dtype=paddle.float32), ) self.net = LayerCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_relu6.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_relu6.py index f628bc19cc9aa..f267c1610f665 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_relu6.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_relu6.py @@ -47,6 +47,7 @@ def init(self): paddle.rand(shape=[22, 144, 56, 56], dtype=paddle.float32), ) self.net = Relu6Case + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_reshape.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_reshape.py index 5abaff9157d1d..540958310b7cc 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_reshape.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_reshape.py @@ -44,6 +44,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[4312, 640], dtype=paddle.float32),) self.net = ReshapeCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_sigmoid.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_sigmoid.py index 3f77a5c68a93a..a746f3cdd41bc 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_sigmoid.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_sigmoid.py @@ -46,6 +46,7 @@ def init(self): paddle.rand(shape=[10, 512, 1, 1], dtype=paddle.float32), ) self.net = SigmoidCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_split.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_split.py index b82ec109ca724..57de6d8cb09c0 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_split.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_split.py @@ -48,6 +48,7 @@ def init(self): paddle.rand(shape=[11, 976, 7, 7], dtype=paddle.float32), ) self.net = SplitCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_squeeze_unsqueeze.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_squeeze_unsqueeze.py index 516d6c6735ff6..4f7438c8a00eb 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_squeeze_unsqueeze.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_squeeze_unsqueeze.py @@ -51,6 +51,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[1, 12, 1, 64], dtype=paddle.float32),) self.net = SqueezeCase + self.with_train = False self.atol = 1e-8 @@ -66,6 +67,7 @@ def init(self): ] self.inputs = (paddle.rand(shape=[1, 12, 1, 64], dtype=paddle.float32),) self.net = UnsqueezeCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_swish.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_swish.py index 1f7402d0470ed..da572f47bfd94 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_swish.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_swish.py @@ -46,6 +46,7 @@ def init(self): paddle.rand(shape=[43, 32, 112, 112], dtype=paddle.float32), ) self.net = SwishCase + self.with_train = False self.atol = 1e-8 diff --git a/test/ir/pir/cinn/sub_graphs/test_sub_graph_transpose.py b/test/ir/pir/cinn/sub_graphs/test_sub_graph_transpose.py index 49a05607e3ae3..51db880532187 100644 --- a/test/ir/pir/cinn/sub_graphs/test_sub_graph_transpose.py +++ b/test/ir/pir/cinn/sub_graphs/test_sub_graph_transpose.py @@ -46,6 +46,7 @@ def init(self): paddle.rand(shape=[22, 4, 224, 224], dtype=paddle.float32), ) self.net = TransposeCase + self.with_train = False if __name__ == '__main__': diff --git a/test/ir/pir/cinn/symbolic/test_dyshape_group_norm.py b/test/ir/pir/cinn/symbolic/test_dyshape_group_norm.py new file mode 100644 index 0000000000000..a3e9b838eeae4 --- /dev/null +++ b/test/ir/pir/cinn/symbolic/test_dyshape_group_norm.py @@ -0,0 +1,91 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest +from os.path import dirname + +import numpy as np + +import paddle +from paddle import nn +from paddle.static import InputSpec + +sys.path.append(dirname(dirname(__file__))) + +import utils + + +class GroupNorm(nn.Layer): + def __init__(self): + super().__init__() + self.hidden_size = 768 + self.dtype = "float32" + self.weight = paddle.randn([128], dtype=self.dtype) + self.weight.stop_gradient = False + self.bias = paddle.randn([128], dtype=self.dtype) + self.bias.stop_gradient = False + + self.data_format = "NHWC" + + def forward(self, x): + return paddle.nn.functional.group_norm( + x, + num_groups=32, + epsilon=1e-6, + weight=self.weight, + bias=self.bias, + data_format=self.data_format, + ) + + +class TestGroupNorm(unittest.TestCase): + def setUp(self): + paddle.seed(2024) + self.shape = [1, 128, 256, 128] + self.dtype = "float32" + self.data_format = "NHWC" + self.prepare_data() + + def prepare_data(self): + self.x = paddle.randn(self.shape, dtype=self.dtype) + self.x.stop_gradient = False + + def check_jit_kernel_info(self, static_fn): + utils.check_jit_kernel_number(static_fn, 2) + utils.check_jit_kernel_structure(static_fn, {utils.JIT_KERNEL_NAME: 2}) + + def eval(self, use_cinn): + paddle.seed(2024) + net = GroupNorm() + input_spec = [ + InputSpec(shape=[None, None, None, 128], dtype='float32'), + ] + net = utils.apply_to_static(net, use_cinn, input_spec) + net.eval() + out = net(self.x) + if use_cinn: + self.check_jit_kernel_info(net.forward) + return out + + def test_eval(self): + cinn_out = self.eval(use_cinn=True) + dy_out = self.eval(use_cinn=False) + np.testing.assert_allclose( + cinn_out.numpy(), dy_out.numpy(), atol=1e-6, rtol=1e-6 + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/ir/pir/fused_pass/xpu/test_group_norm_silu_xpu_fuse_pass.py b/test/ir/pir/fused_pass/onednn/test_placement_pass_mean_op.py similarity index 52% rename from test/ir/pir/fused_pass/xpu/test_group_norm_silu_xpu_fuse_pass.py rename to test/ir/pir/fused_pass/onednn/test_placement_pass_mean_op.py index 3a515d7d62b66..6443a60c331f9 100644 --- a/test/ir/pir/fused_pass/xpu/test_group_norm_silu_xpu_fuse_pass.py +++ b/test/ir/pir/fused_pass/onednn/test_placement_pass_mean_op.py @@ -11,34 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import unittest import numpy as np from pass_test import PassTest import paddle -from paddle.base import core paddle.enable_static() -class TestGroupNormSiluXpuFusePattern(PassTest): - r""" - X - Scale | Bias - \ | / - group norm - / | \ - / | \ - variance | mean - | - silu - | - output - """ - - def is_program_valid(self, program): +class TestMeanPlacementPass(PassTest): + def is_program_valid(self, program=None): return True def build_ir_program(self): @@ -46,40 +30,28 @@ def build_ir_program(self): main_prog = paddle.static.Program() start_prog = paddle.static.Program() with paddle.pir.core.program_guard(main_prog, start_prog): - channels = 128 - groups = 32 x = paddle.static.data( - name='X', shape=[1, channels, 64, 64], dtype='float32' + name='x', shape=[5, 2, 5, 5], dtype='float32' ) + mean = paddle.mean(x) + out = paddle.assign(mean) + self.pass_attr_list = [{'onednn_placement_pass': {}}] - group_norm = paddle.nn.GroupNorm(groups, channels) - silu = paddle.nn.Silu() - - group_norm_out = group_norm(x) - out = silu(group_norm_out) - out = paddle.assign(out) - self.pass_attr_list = [{'group_norm_silu_xpu_fuse_pass': {}}] self.feeds = { - "X": np.random.random((1, channels, 64, 64)).astype( - "float32" - ), + "x": np.random.random((5, 2, 5, 5)).astype("float32"), } self.fetch_list = [out] self.valid_op_map = { - "pd_op.group_norm": 0, - "pd_op.silu": 0, - "pd_op.group_norm_silu_xpu": 1, + "onednn_op.mean": 1, } return [main_prog, start_prog] - def setUp(self): - if core.is_compiled_with_xpu(): - self.places.append(paddle.XPUPlace(0)) - self.skip_accuracy_verification = True - def sample_program(self): yield self.build_ir_program(), False + def setUp(self): + self.places.append(paddle.CPUPlace()) + def test_check_output(self): self.check_pass_correct() diff --git a/test/ir/pir/fused_pass/pass_test.py b/test/ir/pir/fused_pass/pass_test.py index 3bb937ec59771..c5066bad6b34f 100644 --- a/test/ir/pir/fused_pass/pass_test.py +++ b/test/ir/pir/fused_pass/pass_test.py @@ -69,7 +69,7 @@ def run_program(self, executor, startup_program, main_program): fetches = executor.run( main_program, feed=self.feeds, - fetch_list=self.fetch_list, + fetch_list=main_program.list_vars()[-1], ) return fetches diff --git a/test/ir/pir/fused_pass/test_add_norm_fuse_pass.py b/test/ir/pir/fused_pass/test_add_norm_fuse_pass.py index fac6e62bc2278..addb443cb70f8 100644 --- a/test/ir/pir/fused_pass/test_add_norm_fuse_pass.py +++ b/test/ir/pir/fused_pass/test_add_norm_fuse_pass.py @@ -21,8 +21,6 @@ from paddle.base import core from paddle.pir.core import create_parameter -paddle.enable_static() - class TestRmsNormFusePattern(PassTest): r""" @@ -284,7 +282,7 @@ class TestAddLayerNormFusePattern(TestRmsNormFusePattern): def sample_program(self): for x_shape in [[1, 1, 4096]]: for w_shape in [[4096]]: - for w_type in ['float32']: + for x_type in ['float32', 'float16']: for epilson in [1e-6]: with paddle.pir_utils.IrGuard(): start_prog = paddle.static.Program() @@ -295,10 +293,10 @@ def sample_program(self): residual = paddle.static.data( name='residual', shape=x_shape, - dtype='float32', + dtype=x_type, ) x = paddle.static.data( - name='x', shape=x_shape, dtype='float32' + name='x', shape=x_shape, dtype=x_type ) w_attr = paddle.ParamAttr( learning_rate=0.0, @@ -306,13 +304,19 @@ def sample_program(self): mean=0.0, std=2.0 ), ) + b_attr = paddle.ParamAttr( + learning_rate=0.0, + initializer=paddle.nn.initializer.Normal( + mean=0.0, std=2.0 + ), + ) w1 = create_parameter( name="w1", shape=w_shape, - dtype=w_type, + dtype=x_type, initializer=paddle.nn.initializer.Assign( np.random.random([4096, 4096]).astype( - w_type + x_type ) ), ) @@ -322,6 +326,7 @@ def sample_program(self): add_out.shape[-1:], epsilon=epilson, weight_attr=w_attr, + bias_attr=b_attr, ) layer_norm_out = layer_norm(add_out) matmul_out = paddle.matmul(layer_norm_out, w1) @@ -332,11 +337,11 @@ def sample_program(self): ] self.feeds = { "x": np.random.random(x_shape).astype( - "float32" + x_type ), "residual": np.random.random( x_shape - ).astype("float32"), + ).astype(x_type), } self.fetch_list = [out] self.valid_op_map = { @@ -350,5 +355,202 @@ def test_check_output(self): self.check_pass_correct(atol=1e-3, rtol=1e-3) +class TestAddGroupNormPattern_FP16(PassTest): + r""" + x residual + | | + add + | + group_norm + """ + + def is_program_valid(self, program=None): + return True + + def sample_program(self): + for x_shape in [[2, 6, 4, 2]]: + for residual_shape in [[1, 6, 1, 1]]: + for dtype in ['float16']: + for epilson in [1e-5]: + for groups in [2]: + for data_layout in ['NCHW']: + rand_value = ( + 0.001 + * paddle.rand( + shape=[x_shape[1]], dtype=dtype + ).numpy() + ) + with paddle.pir_utils.IrGuard(): + start_prog = paddle.static.Program() + main_prog = paddle.static.Program() + with paddle.pir.core.program_guard( + main_prog, start_prog + ): + residual = paddle.static.data( + name='residual', + shape=residual_shape, + dtype=dtype, + ) + x = paddle.static.data( + name='x', shape=x_shape, dtype=dtype + ) + w = create_parameter( + shape=[x_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + b = create_parameter( + shape=[residual_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + add_out = paddle.add(x, residual) + + group_norm_out = ( + paddle.nn.functional.group_norm( + add_out, + num_groups=groups, + epsilon=epilson, + weight=w, + bias=b, + data_format=data_layout, + ) + ) + out = paddle.assign(group_norm_out) + self.pass_attr_list = [ + {'add_norm_fuse_pass': {}}, + {'transfer_layout_pass': {}}, + { + 'remove_redundant_transpose_pass': {} + }, + ] + self.feeds = { + "x": np.random.random( + x_shape + ).astype(dtype), + "residual": np.random.random( + residual_shape + ).astype(dtype), + } + self.fetch_list = [out] + self.valid_op_map = { + "pa_op.add": 0, + "pd_op.group_norm": 0, + "pd_op.add_group_norm_silu": 1, + } + yield [main_prog, start_prog], False + + def setUp(self): + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_check_output(self): + self.check_pass_correct() + + +class TestAddGroupNormPatternSilu_FP16(PassTest): + r""" + x residual + | | + add + | + group_norm + """ + + def is_program_valid(self, program=None): + return True + + def sample_program(self): + for x_shape in [[2, 6, 4, 2]]: + for residual_shape in [[1, 6, 1, 1]]: + for dtype in ['float16']: + for epilson in [1e-5]: + for groups in [2]: + for data_layout in ['NCHW']: + rand_value = ( + 0.001 + * paddle.rand( + shape=[x_shape[1]], dtype=dtype + ).numpy() + ) + with paddle.pir_utils.IrGuard(): + start_prog = paddle.static.Program() + main_prog = paddle.static.Program() + with paddle.pir.core.program_guard( + main_prog, start_prog + ): + residual = paddle.static.data( + name='residual', + shape=residual_shape, + dtype=dtype, + ) + x = paddle.static.data( + name='x', shape=x_shape, dtype=dtype + ) + w = create_parameter( + shape=[x_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + b = create_parameter( + shape=[x_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + add_out = paddle.add(x, residual) + group_norm_out = ( + paddle.nn.functional.group_norm( + add_out, + num_groups=groups, + epsilon=epilson, + weight=w, + bias=b, + data_format=data_layout, + ) + ) + out = paddle.nn.functional.silu( + group_norm_out + ) + out = paddle.assign(out) + self.pass_attr_list = [ + {'add_norm_fuse_pass': {}}, + {'transfer_layout_pass': {}}, + { + 'remove_redundant_transpose_pass': {} + }, + ] + self.feeds = { + "x": np.random.random( + x_shape + ).astype(dtype), + "residual": np.random.random( + residual_shape + ).astype(dtype), + } + self.fetch_list = [out] + self.valid_op_map = { + "pd_op.silu": 0, + "pd_op.add": 0, + "pd_op.group_norm": 0, + "pd_op.add_group_norm_silu": 1, + } + yield [main_prog, start_prog], False + + def setUp(self): + if core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_check_output(self): + self.check_pass_correct() + + if __name__ == "__main__": unittest.main() diff --git a/test/ir/pir/fused_pass/test_group_norm_silu_fuse_pass.py b/test/ir/pir/fused_pass/test_group_norm_silu_fuse_pass.py new file mode 100644 index 0000000000000..c6f1411d5cfcf --- /dev/null +++ b/test/ir/pir/fused_pass/test_group_norm_silu_fuse_pass.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from pass_test import PassTest + +import paddle +from paddle.base import core +from paddle.pir.core import create_parameter + + +class GroupNormSiluPattern(PassTest): + r""" + group_norm + | + silu + """ + + def is_program_valid(self, program=None): + return True + + def sample_program(self): + for x_shape in [[2, 6, 4, 2]]: + dtype = None + if core.is_compiled_with_xpu(): + dtype = 'float32' + elif core.is_compiled_with_cuda(): + dtype = 'float16' + for epilson in [1e-5]: + for groups in [2]: + rand_value = ( + 0.001 + * paddle.rand(shape=[x_shape[1]], dtype=dtype).numpy() + ) + with paddle.pir_utils.IrGuard(): + start_prog = paddle.static.Program() + main_prog = paddle.static.Program() + with paddle.pir.core.program_guard( + main_prog, start_prog + ): + x = paddle.static.data( + name='x', shape=x_shape, dtype=dtype + ) + w = create_parameter( + shape=[x_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + b = create_parameter( + shape=[x_shape[1]], + dtype=dtype, + initializer=paddle.nn.initializer.Assign( + rand_value + ), + ) + group_norm_out = paddle.nn.functional.group_norm( + x, + num_groups=groups, + epsilon=epilson, + weight=w, + bias=b, + ) + out = paddle.nn.functional.silu(group_norm_out) + out = paddle.assign(out) + if core.is_compiled_with_xpu(): + self.pass_attr_list = [ + {'group_norm_silu_fuse_pass': {}}, + ] + elif core.is_compiled_with_cuda(): + self.pass_attr_list = [ + {'group_norm_silu_fuse_pass': {}}, + {'transfer_layout_pass': {}}, + ] + self.feeds = { + "x": np.random.random(x_shape).astype(dtype), + } + self.fetch_list = [out] + if core.is_compiled_with_xpu(): + self.valid_op_map = { + "pd_op.silu": 0, + "pd_op.group_norm": 0, + "pd_op.group_norm_silu_xpu": 1, + } + elif core.is_compiled_with_cuda(): + self.valid_op_map = { + "pd_op.silu": 0, + "pd_op.group_norm": 0, + "pd_op.add_group_norm_silu": 1, + } + + yield [main_prog, start_prog], False + + def setUp(self): + if core.is_compiled_with_xpu(): + self.places.append(paddle.XPUPlace(0)) + elif core.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def test_check_output(self): + self.check_pass_correct() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/ir/pir/test_build_op.py b/test/ir/pir/test_build_op.py similarity index 88% rename from test/deprecated/ir/pir/test_build_op.py rename to test/ir/pir/test_build_op.py index cd0ae03b33958..ac92d124a0dc5 100644 --- a/test/deprecated/ir/pir/test_build_op.py +++ b/test/ir/pir/test_build_op.py @@ -22,19 +22,20 @@ def get_ir_program(): paddle.enable_static() - x = paddle.randn([4, 4]) - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x_s = paddle.static.data('x', [4, 4], x.dtype) - x_s.stop_gradient = False - y_s = paddle.matmul(x_s, x_s) - y_s = paddle.add(x_s, y_s) - y_s = paddle.tanh(y_s) - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + x = paddle.randn([4, 4]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x_s = paddle.static.data('x', [4, 4], x.dtype) + x_s.stop_gradient = False + y_s = paddle.matmul(x_s, x_s) + y_s = paddle.add(x_s, y_s) + y_s = paddle.tanh(y_s) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TestBuildOp(unittest.TestCase): diff --git a/test/deprecated/ir/pir/test_ir_backward.py b/test/ir/pir/test_ir_backward.py similarity index 85% rename from test/deprecated/ir/pir/test_ir_backward.py rename to test/ir/pir/test_ir_backward.py index 3f8a77eed354f..c1818aa493a37 100644 --- a/test/deprecated/ir/pir/test_ir_backward.py +++ b/test/ir/pir/test_ir_backward.py @@ -26,23 +26,21 @@ def get_ir_program_0(): paddle.enable_static() - x = paddle.randn([4, 4]) - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x_s = paddle.static.data('x', [4, 4], x.dtype) - x_s.stop_gradient = False - k_s = paddle.tanh(x_s) - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + x = paddle.randn([4, 4]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x_s = paddle.static.data('x', [4, 4], x.dtype) + x_s.stop_gradient = False + k_s = paddle.tanh(x_s) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TesBackward_1(unittest.TestCase): - def tearDown(self) -> None: - paddle.framework.set_flags({"FLAGS_enable_pir_api": False}) - def test_grad(self): pir_program = get_ir_program_0() input = pir_program.global_block().ops[-1].operand(0).source() @@ -138,26 +136,24 @@ def test_split(self): def get_ir_program_1(): paddle.enable_static() - x = paddle.randn([2, 2]) - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x_s = paddle.static.data('x', [4, 4], x.dtype) - x_s.stop_gradient = False - - k_s = paddle.tanh(x_s) - z_x = paddle.tanh(x_s) - out = paddle.add(z_x, k_s) - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + x = paddle.randn([2, 2]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x_s = paddle.static.data('x', [4, 4], x.dtype) + x_s.stop_gradient = False + + k_s = paddle.tanh(x_s) + z_x = paddle.tanh(x_s) + out = paddle.add(z_x, k_s) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TesBackward_2(unittest.TestCase): - def tearDown(self) -> None: - paddle.framework.set_flags({"FLAGS_enable_pir_api": False}) - def test_add_n(self): pir_program = get_ir_program_1() input_x = pir_program.global_block().ops[-3].operand(0).source() @@ -216,23 +212,21 @@ def test_concat(self): def get_ir_program_2(): paddle.enable_static() - x = paddle.randn([2, 2]) - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x_s = paddle.static.data('x', [4, 4], x.dtype) - x_s.stop_gradient = False - k_s = paddle.sum(x_s, axis=(-1,), keepdim=False) - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + x = paddle.randn([2, 2]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x_s = paddle.static.data('x', [4, 4], x.dtype) + x_s.stop_gradient = False + k_s = paddle.sum(x_s, axis=(-1,), keepdim=False) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TestBackward_3(unittest.TestCase): - def tearDown(self) -> None: - paddle.framework.set_flags({"FLAGS_enable_pir_api": False}) - def test_basic_network(self): pir_program = get_ir_program_2() x = pir_program.global_block().ops[-1].operand(0).source() @@ -250,9 +244,6 @@ def test_basic_network(self): class TestBackward_4(unittest.TestCase): - def tearDown(self) -> None: - paddle.framework.set_flags({"FLAGS_enable_pir_api": False}) - def test_basic_network(self): if not paddle.framework.in_pir_mode(): return @@ -293,9 +284,6 @@ def false_func(): class TestBackward_5(unittest.TestCase): - def tearDown(self) -> None: - paddle.framework.set_flags({"FLAGS_enable_pir_api": False}) - def test_skip_vjp(self): if not paddle.framework.in_pir_mode(): return diff --git a/test/deprecated/ir/pir/test_ir_pybind.py b/test/ir/pir/test_ir_pybind.py similarity index 80% rename from test/deprecated/ir/pir/test_ir_pybind.py rename to test/ir/pir/test_ir_pybind.py index afe8b57385379..62f9066c58c37 100644 --- a/test/deprecated/ir/pir/test_ir_pybind.py +++ b/test/ir/pir/test_ir_pybind.py @@ -22,21 +22,22 @@ def get_ir_program(): - x = paddle.randn([4, 4]) - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x_s = paddle.static.data('x', [4, 4], x.dtype) - x_s.stop_gradient = False - y_s = paddle.matmul(x_s, x_s) - z_s = paddle.add(y_s, y_s) - k_s = paddle.tanh(z_s) - q_s = paddle.unsqueeze(k_s, [2]) - - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + x = paddle.randn([4, 4]) + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x_s = paddle.static.data('x', [4, 4], x.dtype) + x_s.stop_gradient = False + y_s = paddle.matmul(x_s, x_s) + z_s = paddle.add(y_s, y_s) + k_s = paddle.tanh(z_s) + q_s = paddle.unsqueeze(k_s, [2]) + + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TestPybind(unittest.TestCase): @@ -165,38 +166,43 @@ def test_type(self): self.assertEqual(add_op.result(0).is_selected_row_type(), True) def test_attr(self): - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - conv_data = paddle.static.data( - 'conv_data', [None, 3, 32, 32], dtype='float32' + with paddle.pir_utils.OldIrGuard(): + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), ) - conv2d_out = paddle.static.nn.conv2d( - input=conv_data, - num_filters=2, - filter_size=3, - stride=3, - act="relu", + with paddle.static.program_guard(main_program, start_program): + conv_data = paddle.static.data( + 'conv_data', [None, 3, 32, 32], dtype='float32' + ) + conv2d_out = paddle.static.nn.conv2d( + input=conv_data, + num_filters=2, + filter_size=3, + stride=3, + act="relu", + ) + full_out = paddle.tensor.fill_constant( + shape=[4, 4], dtype="float32", value=2 + ) + + pir_program = pir.translate_to_pir(main_program.desc) + conv_attr = pir_program.global_block().ops[3].attrs() + full_attr = pir_program.global_block().ops[8].attrs() + self.assertEqual(conv_attr["stop_gradient"], [False]) + self.assertEqual(conv_attr["dilations"], [1, 1]) + self.assertEqual(conv_attr["data_format"], "NCHW") + self.assertEqual(conv_attr["strides"], [3, 3]) + self.assertEqual(conv_attr["paddings"], [0, 0]) + self.assertEqual(conv_attr["padding_algorithm"], "EXPLICIT") + self.assertEqual(conv_attr["groups"], 1) + self.assertEqual( + full_attr["dtype"], paddle.base.core.DataType.FLOAT32 ) - full_out = paddle.tensor.fill_constant( - shape=[4, 4], dtype="float32", value=2 + self.assertTrue( + isinstance(full_attr["place"], paddle.base.core.Place) ) - pir_program = pir.translate_to_pir(main_program.desc) - conv_attr = pir_program.global_block().ops[3].attrs() - full_attr = pir_program.global_block().ops[8].attrs() - self.assertEqual(conv_attr["stop_gradient"], [False]) - self.assertEqual(conv_attr["dilations"], [1, 1]) - self.assertEqual(conv_attr["data_format"], "NCHW") - self.assertEqual(conv_attr["strides"], [3, 3]) - self.assertEqual(conv_attr["paddings"], [0, 0]) - self.assertEqual(conv_attr["padding_algorithm"], "EXPLICIT") - self.assertEqual(conv_attr["groups"], 1) - self.assertEqual(full_attr["dtype"], paddle.base.core.DataType.FLOAT32) - self.assertTrue(isinstance(full_attr["place"], paddle.base.core.Place)) - def test_operands(self): pir_program = get_ir_program() matmul_op = pir_program.global_block().ops[1] diff --git a/test/deprecated/ir/pir/test_ir_vjp.py b/test/ir/pir/test_ir_vjp.py similarity index 62% rename from test/deprecated/ir/pir/test_ir_vjp.py rename to test/ir/pir/test_ir_vjp.py index 8401761ba3a05..53268e7026422 100644 --- a/test/deprecated/ir/pir/test_ir_vjp.py +++ b/test/ir/pir/test_ir_vjp.py @@ -22,17 +22,20 @@ def get_ir_program(): - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x = paddle.static.data('x', [4, 4], 'float32') - x.stop_gradient = False - paddle.tanh(x) - paddle.tensor.fill_constant(shape=[4, 4], dtype='float32', value=2.0) - pir_program = pir.translate_to_pir(main_program.desc) - return pir_program + with paddle.pir_utils.OldIrGuard(): + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data('x', [4, 4], 'float32') + x.stop_gradient = False + paddle.tanh(x) + paddle.tensor.fill_constant( + shape=[4, 4], dtype='float32', value=2.0 + ) + pir_program = pir.translate_to_pir(main_program.desc) + return pir_program class TestTanhVjp(unittest.TestCase): @@ -92,20 +95,23 @@ def test_tanh_vjp2(self): class TestMeanVjp(unittest.TestCase): def test_mean_vjp1(self): - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x = paddle.static.data('x', [4, 4], 'float32') - x.stop_gradient = False - paddle.mean(x, axis=[0, 1]) - paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0) - pir_program = pir.translate_to_pir(main_program.desc) - fill_constant_op = pir_program.global_block().ops[-1] - mean_op = pir_program.global_block().ops[-2] - out_grads = [[fill_constant_op.result(0)]] - stop_gradients = [[False]] + with paddle.pir_utils.OldIrGuard(): + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data('x', [4, 4], 'float32') + x.stop_gradient = False + paddle.mean(x, axis=[0, 1]) + paddle.tensor.fill_constant( + shape=[1], dtype='float32', value=2.0 + ) + pir_program = pir.translate_to_pir(main_program.desc) + fill_constant_op = pir_program.global_block().ops[-1] + mean_op = pir_program.global_block().ops[-2] + out_grads = [[fill_constant_op.result(0)]] + stop_gradients = [[False]] with paddle.pir.core.program_guard(pir_program): grad_outs = call_vjp( mean_op, @@ -138,20 +144,23 @@ def test_mean_vjp1(self): self.assertEqual(len(pir_program.global_block().ops), 4) def test_mean_vjp2(self): - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x = paddle.static.data('x', [4, 4], 'float32') - x.stop_gradient = False - paddle.mean(x, axis=[0, 1]) - paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0) - pir_program = pir.translate_to_pir(main_program.desc) - fill_constant_op = pir_program.global_block().ops[-1] - mean_op = pir_program.global_block().ops[-2] - out_grads = [[fill_constant_op.result(0)]] - stop_gradients = [[True]] + with paddle.pir_utils.OldIrGuard(): + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data('x', [4, 4], 'float32') + x.stop_gradient = False + paddle.mean(x, axis=[0, 1]) + paddle.tensor.fill_constant( + shape=[1], dtype='float32', value=2.0 + ) + pir_program = pir.translate_to_pir(main_program.desc) + fill_constant_op = pir_program.global_block().ops[-1] + mean_op = pir_program.global_block().ops[-2] + out_grads = [[fill_constant_op.result(0)]] + stop_gradients = [[True]] with paddle.pir.core.program_guard(pir_program): grad_outs = call_vjp( mean_op, @@ -165,20 +174,23 @@ def test_mean_vjp2(self): class TesthasVjp(unittest.TestCase): def test_has_vjp(self): - main_program, start_program = ( - paddle.static.Program(), - paddle.static.Program(), - ) - with paddle.static.program_guard(main_program, start_program): - x = paddle.static.data('x', [4, 4], 'float32') - x.stop_gradient = False - paddle.mean(x, axis=[0, 1]) - paddle.tensor.fill_constant(shape=[1], dtype='float32', value=2.0) - pir_program = pir.translate_to_pir(main_program.desc) - fill_constant_op = pir_program.global_block().ops[-1] - mean_op = pir_program.global_block().ops[-2] - self.assertEqual(has_vjp(fill_constant_op), False) - self.assertEqual(has_vjp(mean_op), True) + with paddle.pir_utils.OldIrGuard(): + main_program, start_program = ( + paddle.static.Program(), + paddle.static.Program(), + ) + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data('x', [4, 4], 'float32') + x.stop_gradient = False + paddle.mean(x, axis=[0, 1]) + paddle.tensor.fill_constant( + shape=[1], dtype='float32', value=2.0 + ) + pir_program = pir.translate_to_pir(main_program.desc) + fill_constant_op = pir_program.global_block().ops[-1] + mean_op = pir_program.global_block().ops[-2] + self.assertEqual(has_vjp(fill_constant_op), False) + self.assertEqual(has_vjp(mean_op), True) if __name__ == "__main__": diff --git a/test/ir/pir/test_pass_manager.py b/test/ir/pir/test_pass_manager.py new file mode 100644 index 0000000000000..92113fabd5842 --- /dev/null +++ b/test/ir/pir/test_pass_manager.py @@ -0,0 +1,66 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle +from paddle import pir +from paddle.base import core +from paddle.framework import LayerHelper + +paddle.enable_static() + + +class TestShadowOutputSlice(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones([3, 9, 5], dtype='float32') + y = paddle.static.data( + name="y", shape=[3, 9, 5], dtype="float32" + ) + z = x * y # will be eliminated + + _, out, _ = paddle.split(x, num_or_sections=3, axis=1) + helper = LayerHelper('shadow_output') + helper.append_op( + type="shadow_output", + inputs={"x": [out.name]}, + outputs={"out": [y.name]}, + attrs={"name": out.name}, + ) + + new_program = pir.translate_to_pir(main_program.desc) + op_names = [op.name() for op in new_program.global_block().ops] + self.assertTrue('pd_op.multiply' in op_names) + pm = pir.PassManager() + pm.add_pass( + 'dead_code_elimination_pass', {} + ) # apply pass to eliminate dead code + pm.run(new_program) + op_names = [op.name() for op in new_program.global_block().ops] + self.assertEqual(pm.passes(), ['dead_code_elimination_pass']) + self.assertFalse(pm.empty()) + self.assertTrue( + 'pd_op.multiply' not in op_names + ) # multiply is eliminated because its output is not used + + +if __name__ == "__main__": + unittest.main() diff --git a/test/ir/pir/test_special_op_translator.py b/test/ir/pir/test_special_op_translator.py new file mode 100644 index 0000000000000..09440f2fc48bd --- /dev/null +++ b/test/ir/pir/test_special_op_translator.py @@ -0,0 +1,586 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import pir +from paddle.base import core +from paddle.framework import LayerHelper + +paddle.enable_static() + + +class TestCastOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.to_tensor([2, 3, 4], 'float64') + y = paddle.cast(x, 'uint8') + + _, mappings = pir.translate_to_pir_with_param_map(main_program.desc) + assert len(str(mappings)) > 0, "no mapping found" + + +class TestCondWithInplace(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + + def cond_with_inplace(): + x = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") + y = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") + running_mean = paddle.to_tensor([0], dtype="float32") + running_variance = paddle.to_tensor([1], dtype="float32") + weight = paddle.to_tensor([2], dtype="float32") + bias = paddle.to_tensor([1], dtype="float32") + if x > y: + y = paddle.nn.functional.batch_norm( + x, running_mean, running_variance, weight, bias + ) + else: + y = paddle.nn.functional.batch_norm( + x, running_mean, running_variance, weight, bias + ) + + legacy_program = paddle.jit.to_static( + cond_with_inplace, + input_spec=[], + full_graph=True, + ) + + l = pir.translate_to_pir(legacy_program.main_program.desc) + assert l is not None + + def test_nested_op(self): + with paddle.pir_utils.OldIrGuard(): + + def cond_with_inplace(): + x = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") + y = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") + z = paddle.ones(shape=[2, 1, 2, 3], dtype="float32") + running_mean = paddle.to_tensor([0], dtype="float32") + running_variance = paddle.to_tensor([1], dtype="float32") + weight = paddle.to_tensor([2], dtype="float32") + bias = paddle.to_tensor([1], dtype="float32") + if x > y: + if y > z: + z = paddle.nn.functional.batch_norm( + z, running_mean, running_variance, weight, bias + ) + else: + y = paddle.nn.functional.batch_norm( + x, running_mean, running_variance, weight, bias + ) + else: + if y > z: + z = paddle.nn.functional.batch_norm( + z, running_mean, running_variance, weight, bias + ) + else: + y = paddle.nn.functional.batch_norm( + x, running_mean, running_variance, weight, bias + ) + + legacy_program = paddle.jit.to_static( + cond_with_inplace, + input_spec=[], + full_graph=True, + ) + + l = pir.translate_to_pir(legacy_program.main_program.desc) + assert l is not None + + +class TestElementwiseOpTranscriber(unittest.TestCase): + def test_elementwise_without_y_grad(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x_data = np.random.rand(100, 2, 3) + y_data = np.random.rand(100) + x = paddle.to_tensor(x_data, dtype='float32') + x.stop_gradient = False + y = paddle.to_tensor(y_data, dtype='float32') + + out1 = paddle.tensor.math._elementwise_op( + LayerHelper('elementwise_add', x=x, y=y, axis=0) + ) + out1.stop_gradient = False + mean = paddle.mean(out1) + paddle.static.append_backward(mean) + + out = exe.run(main_program, {}, fetch_list=[out1]) + np.testing.assert_allclose( + out[0], + x_data + y_data.reshape(100, 1, 1), + rtol=1e-6, + atol=1e-6, + ) + + def test_elementwise_with_y_grad(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x_data = np.random.rand(100, 2, 3) + y_data = np.random.rand(100) + x = paddle.to_tensor(x_data, dtype='float32') + x.stop_gradient = False + y = paddle.to_tensor(y_data, dtype='float32') + y.stop_gradient = False + + out1 = paddle.tensor.math._elementwise_op( + LayerHelper('elementwise_add', x=x, y=y, axis=0) + ) + out1.stop_gradient = False + mean = paddle.mean(out1) + paddle.static.append_backward(mean) + + out = exe.run(main_program, {}, fetch_list=[out1]) + np.testing.assert_allclose( + out[0], + x_data + y_data.reshape(100, 1, 1), + rtol=1e-6, + atol=1e-6, + ) + + def test_add_inplace(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones(shape=(100, 2, 3), dtype='float32') + y = paddle.ones(shape=(100, 2, 3), dtype='float32') + + helper = LayerHelper('elementwise_add') + helper.append_op( + type="elementwise_add", + inputs={"X": x, "Y": y}, + outputs={"Out": y}, + attrs={"axis": -1}, + ) + _ = pir.translate_to_pir(main_program.desc) + + +class TestEmbeddingOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.static.data( + name="x", shape=[2, 4], dtype=np.int64 + ) + embedding = paddle.nn.Embedding( + 10, + 3, + weight_attr=paddle.nn.initializer.Constant(value=1.0), + ) + output = embedding(x) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestIncrementOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + data = paddle.zeros(shape=[1], dtype='float32') + counter = paddle.increment(data) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestAssignValueOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.to_tensor( + [[0.1, 0.2], [0.3, 0.4]], + place=paddle.CPUPlace(), + stop_gradient=False, + ) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestRnnOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.randn((4, 16)) + prev_h = paddle.randn((4, 32)) + + cell = paddle.nn.SimpleRNNCell(16, 32) + y, h = cell(x, prev_h) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestEmptyVarTranslate(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x1 = paddle.rand(shape=[3, 3], dtype="float32") + x1.stop_gradient = False + weight = paddle.full( + shape=[3, 3], fill_value="0.5", dtype="float32" + ) + y = paddle.nn.functional.linear(x1, weight) + y.stop_gradient = True + out1 = paddle.concat(x=[x1, y], axis=1) + out2 = paddle.mean(out1) + sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.1) + sgd_optimizer.minimize(out2) + _ = pir.translate_to_pir(main_program.desc) + + +class TestOneHotOpTranscriber(unittest.TestCase): + def test_mutable_attribute(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + depth = paddle.assign(np.array([10], dtype=np.int32)) + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + one_hot_label = paddle.nn.functional.one_hot( + x=label, num_classes=depth + ) + + _ = pir.translate_to_pir(main_program.desc) + + def test_normal_attribute(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + depth = 10 + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + one_hot_label = paddle.nn.functional.one_hot( + x=label, num_classes=depth + ) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestReduceOpTranscriber(unittest.TestCase): + def test_reduce_all(self): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + arr = np.ones([2, 2], dtype="float32") + x = paddle.to_tensor(arr, dtype='int32') + out1 = paddle.all(x) + + out = exe.run(main_program, {}, fetch_list=[out1]) + np.testing.assert_array_equal(out[0], np.all(arr)) + + def test_with_axis(self): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + arr = np.ones([2, 2], dtype="float32") + x = paddle.to_tensor(arr, dtype='int32') + out1 = paddle.all(x, axis=0) + + out = exe.run(main_program, {}, fetch_list=[out1]) + np.testing.assert_array_equal(out[0], np.all(arr, axis=0)) + + +class TestIndexPutOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.randn([2, 3]) + indices = [ + paddle.randint(0, 2, [2]), + paddle.randint(0, 1, [2]), + ] + value = paddle.randn([2]) + y = paddle.index_put(x, indices, value, False) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestGradAddOpTranscriber(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x_data = np.random.rand(100, 2, 3) + y_data = np.random.rand(100, 1, 1) + x = paddle.to_tensor(x_data, dtype='float32') + x.stop_gradient = False + y = paddle.to_tensor(y_data, dtype='float32') + + helper = LayerHelper('grad_add') + out = helper.create_variable_for_type_inference("float") + helper.append_op( + type="grad_add", + inputs={"X": x, "Y": y}, + outputs={"Out": out}, + attrs={"axis": -1}, + ) + + _ = pir.translate_to_pir(main_program.desc) + + +class TestShadowOutputSlice(unittest.TestCase): + def test_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.rand([3, 9, 5]) + y = paddle.static.data( + name="y", shape=[3, 9, 5], dtype="float32" + ) + + _, out, _ = paddle.split(x, num_or_sections=3, axis=1) + helper = LayerHelper('shadow_output') + helper.append_op( + type="shadow_output", + inputs={"x": [out.name]}, + outputs={"out": [y.name]}, + attrs={"name": out.name}, + ) + + l = pir.translate_to_pir(main_program.desc) + + +class TestSetValueOp(unittest.TestCase): + def test_no_mutable_attribute(self): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones(shape=[2, 3, 4], dtype="float32") + x = paddle.static.setitem(x, (0, 0), 6) + ret = exe.run(main_program, fetch_list=[x]) + + x_data = np.ones([2, 3, 4]).astype("float32") + x_data[0, 0] = 6 + np.testing.assert_array_equal(ret[0], x_data) + + def test_with_mutable_attribute(self): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones(shape=[2, 3, 4], dtype="float32") + zero = paddle.full([], 0, dtype="int32") + x = paddle.static.setitem(x, zero, 6) + ret = exe.run(main_program, fetch_list=[x]) + + x_data = np.ones([2, 3, 4]).astype("float32") + x_data[0] = 6 + np.testing.assert_array_equal(ret[0], x_data) + + def test_grad(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + exe = paddle.static.Executor(place) + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + input_shape = [7, 6, 5, 4, 3, 2] + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones(shape=input_shape, dtype="float32") + value = paddle.tensor.fill_constant([1, 3, 2], "float32", 1) + # test stop_gradient + value.stop_gradient = False + x.stop_gradient = False + attrs = { + 'axes': [0], + 'starts': [6], + 'ends': [0], + 'steps': [-4], + 'decrease_axes': [], + 'none_axes': [], + 'dtype': paddle.float32, + } + inputs = {'Input': x, 'ValueTensor': value} + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs=attrs, + ) + y2 = y + 1 + loss = paddle.sum(y2) + opt = paddle.optimizer.Adam() + opt.minimize(loss) + + x_data = np.arange( + 0, np.prod(input_shape), dtype="float32" + ).reshape(input_shape) + fetch_list = [x.grad_name, value.grad_name] + ret = exe.run(main_program, fetch_list=fetch_list) + self.assertTrue((ret[0][6:0:-4] == 0).all()) + + +class TestShareBufferOpTranscriber(unittest.TestCase): + def test_program(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + x = paddle.ones(shape=(100, 2, 3), dtype='float32') + y = paddle.ones(shape=(100, 2, 3), dtype='float32') + + helper = LayerHelper('share_buffer') + helper.append_op( + type="share_buffer", + inputs={"X": x}, + outputs={"Out": y, "XOut": x}, + ) + l = pir.translate_to_pir(main_program.desc) + assert ( + l.global_block().ops[2].name() == "pd_op.share_data_" + ), "share_buffer should be translated to share_data_" + + +class TestDataOp(unittest.TestCase): + def test_data_op(self): + with paddle.pir_utils.OldIrGuard(): + place = core.Place() + place.set_place(paddle.CPUPlace()) + + new_scope = paddle.static.Scope() + main_program = paddle.static.Program() + with paddle.static.scope_guard(new_scope): + with paddle.static.program_guard(main_program): + _ = paddle.static.data( + name="y", shape=[3, 9, 5], dtype="int64" + ) + l = pir.translate_to_pir(main_program.desc) + self.assertTrue(len(l.global_block().ops) > 0) + self.assertTrue(l.global_block().ops[0].name() == "pd_op.data") + data_op = l.global_block().ops[0] + self.assertIn("dtype", data_op.attrs()) + self.assertEqual(str(data_op.attrs()["dtype"]), "paddle.int64") + + +class TestCheckUnregisteredOp(unittest.TestCase): + def test_program(self): + with paddle.pir_utils.OldIrGuard(): + main_program = paddle.static.Program() + with paddle.static.program_guard(main_program): + x = paddle.randn((4, 16)) + prev_h = paddle.randn((4, 32)) + + cell = paddle.nn.SimpleRNNCell(16, 32) + y, h = cell(x, prev_h) + + ops = pir.check_unregistered_ops(main_program.desc) + assert len(ops) == 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/ir/pir/test_standalone_pir.py b/test/ir/pir/test_standalone_pir.py similarity index 91% rename from test/deprecated/ir/pir/test_standalone_pir.py rename to test/ir/pir/test_standalone_pir.py index 6104cf533baa8..73c36afedb548 100644 --- a/test/deprecated/ir/pir/test_standalone_pir.py +++ b/test/ir/pir/test_standalone_pir.py @@ -296,36 +296,36 @@ def tearDown(self): self.temp_dir.cleanup() def test_with_pir(self): - paddle.disable_static() - - linear = paddle.nn.Linear(10, 10) - path = os.path.join(self.model_path, "linear") - - paddle.jit.save( - linear, - path, - input_spec=[paddle.static.InputSpec([10, 10], 'float32', 'x')], - ) - - paddle.enable_static() - place = ( - paddle.CUDAPlace(0) - if paddle.is_compiled_with_cuda() - else paddle.CPUPlace() - ) + with paddle.pir_utils.OldIrGuard(): + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + path = os.path.join(self.model_path, "linear") + + paddle.jit.save( + linear, + path, + input_spec=[paddle.static.InputSpec([10, 10], 'float32', 'x')], + ) - exe = paddle.static.Executor(place) + paddle.enable_static() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) - [ - inference_program, - feed_target_names, - fetch_targets, - ] = paddle.static.io.load_inference_model( - self.model_path, - executor=exe, - model_filename="linear.pdmodel", - params_filename="linear.pdiparams", - ) + exe = paddle.static.Executor(place) + + [ + inference_program, + feed_target_names, + fetch_targets, + ] = paddle.static.io.load_inference_model( + self.model_path, + executor=exe, + model_filename="linear.pdmodel", + params_filename="linear.pdiparams", + ) class TestPirConcatDygraph(unittest.TestCase): diff --git a/test/deprecated/ir/pir/translator/CMakeLists.txt b/test/ir/pir/translator/CMakeLists.txt similarity index 100% rename from test/deprecated/ir/pir/translator/CMakeLists.txt rename to test/ir/pir/translator/CMakeLists.txt diff --git a/test/deprecated/ir/pir/translator/test_all_reduce_translator.py b/test/ir/pir/translator/test_all_reduce_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_all_reduce_translator.py rename to test/ir/pir/translator/test_all_reduce_translator.py index 3bef81873428a..017c8b4c90e50 100644 --- a/test/deprecated/ir/pir/translator/test_all_reduce_translator.py +++ b/test/ir/pir/translator/test_all_reduce_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCAllReduceMinOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_barrier_translator.py b/test/ir/pir/translator/test_barrier_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_barrier_translator.py rename to test/ir/pir/translator/test_barrier_translator.py index 7d570df843081..60bf1ed57da17 100644 --- a/test/deprecated/ir/pir/translator/test_barrier_translator.py +++ b/test/ir/pir/translator/test_barrier_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestBarrierOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_allreduce_min_translator.py b/test/ir/pir/translator/test_c_allreduce_min_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_allreduce_min_translator.py rename to test/ir/pir/translator/test_c_allreduce_min_translator.py index 60549a63ec6e4..dfb628936001e 100644 --- a/test/deprecated/ir/pir/translator/test_c_allreduce_min_translator.py +++ b/test/ir/pir/translator/test_c_allreduce_min_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCAllReduceMinOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_allreduce_prod_translator.py b/test/ir/pir/translator/test_c_allreduce_prod_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_allreduce_prod_translator.py rename to test/ir/pir/translator/test_c_allreduce_prod_translator.py index 855f2e5f7293b..f803b7cabaf51 100644 --- a/test/deprecated/ir/pir/translator/test_c_allreduce_prod_translator.py +++ b/test/ir/pir/translator/test_c_allreduce_prod_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCAllReduceProdOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_reduce_max_translator.py b/test/ir/pir/translator/test_c_reduce_max_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_reduce_max_translator.py rename to test/ir/pir/translator/test_c_reduce_max_translator.py index c40624ad74fbb..700135b619b6a 100644 --- a/test/deprecated/ir/pir/translator/test_c_reduce_max_translator.py +++ b/test/ir/pir/translator/test_c_reduce_max_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCReduceMaxOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_reduce_min_translator.py b/test/ir/pir/translator/test_c_reduce_min_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_reduce_min_translator.py rename to test/ir/pir/translator/test_c_reduce_min_translator.py index 71610cf9a3e43..bb77cd649b16b 100644 --- a/test/deprecated/ir/pir/translator/test_c_reduce_min_translator.py +++ b/test/ir/pir/translator/test_c_reduce_min_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCReduceMinOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_reduce_prod_translator.py b/test/ir/pir/translator/test_c_reduce_prod_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_reduce_prod_translator.py rename to test/ir/pir/translator/test_c_reduce_prod_translator.py index 34caa22d77b9f..ac1553bf92b7c 100644 --- a/test/deprecated/ir/pir/translator/test_c_reduce_prod_translator.py +++ b/test/ir/pir/translator/test_c_reduce_prod_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCReduceProdOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_scatter_translator.py b/test/ir/pir/translator/test_c_scatter_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_scatter_translator.py rename to test/ir/pir/translator/test_c_scatter_translator.py index 66dbb3320ab43..79b8c24eb7911 100644 --- a/test/deprecated/ir/pir/translator/test_c_scatter_translator.py +++ b/test/ir/pir/translator/test_c_scatter_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCScatterOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_c_split_translator.py b/test/ir/pir/translator/test_c_split_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_c_split_translator.py rename to test/ir/pir/translator/test_c_split_translator.py index e09194e9ca019..9fe3df6d3560d 100644 --- a/test/deprecated/ir/pir/translator/test_c_split_translator.py +++ b/test/ir/pir/translator/test_c_split_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCSplitOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_dgc_momentum_translator.py b/test/ir/pir/translator/test_dgc_momentum_translator.py similarity index 98% rename from test/deprecated/ir/pir/translator/test_dgc_momentum_translator.py rename to test/ir/pir/translator/test_dgc_momentum_translator.py index b44b981ddc6cb..75a62c22e7f57 100644 --- a/test/deprecated/ir/pir/translator/test_dgc_momentum_translator.py +++ b/test/ir/pir/translator/test_dgc_momentum_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDgcMomemtumOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_dgc_translator.py b/test/ir/pir/translator/test_dgc_translator.py similarity index 98% rename from test/deprecated/ir/pir/translator/test_dgc_translator.py rename to test/ir/pir/translator/test_dgc_translator.py index 6f2fe03137eb9..87d72c7afafcb 100644 --- a/test/deprecated/ir/pir/translator/test_dgc_translator.py +++ b/test/ir/pir/translator/test_dgc_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDgcOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_distributed_fused_lamb.py b/test/ir/pir/translator/test_distributed_fused_lamb.py similarity index 99% rename from test/deprecated/ir/pir/translator/test_distributed_fused_lamb.py rename to test/ir/pir/translator/test_distributed_fused_lamb.py index 9493772d63799..4e03fd93082b3 100644 --- a/test/deprecated/ir/pir/translator/test_distributed_fused_lamb.py +++ b/test/ir/pir/translator/test_distributed_fused_lamb.py @@ -20,6 +20,8 @@ from paddle.base import core, unique_name from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDistributedFusedLambOpTranslator(test_op_translator.TestOpTranslator): def setUp(self): diff --git a/test/deprecated/ir/pir/translator/test_distributed_fused_lamb_init.py b/test/ir/pir/translator/test_distributed_fused_lamb_init.py similarity index 99% rename from test/deprecated/ir/pir/translator/test_distributed_fused_lamb_init.py rename to test/ir/pir/translator/test_distributed_fused_lamb_init.py index 618c526830d5b..8faa4a33209c9 100644 --- a/test/deprecated/ir/pir/translator/test_distributed_fused_lamb_init.py +++ b/test/ir/pir/translator/test_distributed_fused_lamb_init.py @@ -20,6 +20,8 @@ from paddle.base import unique_name from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDistributedFusedLambInitOpTranslator( test_op_translator.TestOpTranslator diff --git a/test/deprecated/ir/pir/translator/test_distributed_lookup_table_translate.py b/test/ir/pir/translator/test_distributed_lookup_table_translate.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_distributed_lookup_table_translate.py rename to test/ir/pir/translator/test_distributed_lookup_table_translate.py index ead69d9dcbbf0..e596432748779 100644 --- a/test/deprecated/ir/pir/translator/test_distributed_lookup_table_translate.py +++ b/test/ir/pir/translator/test_distributed_lookup_table_translate.py @@ -23,6 +23,8 @@ ) from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDistributedLookupTableOpTranslator( test_op_translator.TestOpTranslator diff --git a/test/deprecated/ir/pir/translator/test_distributed_push_sparse_translator.py b/test/ir/pir/translator/test_distributed_push_sparse_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_distributed_push_sparse_translator.py rename to test/ir/pir/translator/test_distributed_push_sparse_translator.py index 996a48f99ec4d..d9bada6c0baeb 100644 --- a/test/deprecated/ir/pir/translator/test_distributed_push_sparse_translator.py +++ b/test/ir/pir/translator/test_distributed_push_sparse_translator.py @@ -23,6 +23,8 @@ ) from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDistributedPushSparseOpTranslator( test_op_translator.TestOpTranslator diff --git a/test/deprecated/ir/pir/translator/test_global_gather_translator.py b/test/ir/pir/translator/test_global_gather_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_global_gather_translator.py rename to test/ir/pir/translator/test_global_gather_translator.py index cbd883aaf6500..83afd6b103442 100644 --- a/test/deprecated/ir/pir/translator/test_global_gather_translator.py +++ b/test/ir/pir/translator/test_global_gather_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestGlobalGatherOpTranslator( test_op_translator.TestOpWithBackwardTranslator diff --git a/test/deprecated/ir/pir/translator/test_global_scatter_translator.py b/test/ir/pir/translator/test_global_scatter_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_global_scatter_translator.py rename to test/ir/pir/translator/test_global_scatter_translator.py index fb349a30b95e2..3ea1c4fdc87ea 100644 --- a/test/deprecated/ir/pir/translator/test_global_scatter_translator.py +++ b/test/ir/pir/translator/test_global_scatter_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestGlobalScatterOpTranslator( test_op_translator.TestOpWithBackwardTranslator diff --git a/test/deprecated/ir/pir/translator/test_limit_by_capacity_translator.py b/test/ir/pir/translator/test_limit_by_capacity_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_limit_by_capacity_translator.py rename to test/ir/pir/translator/test_limit_by_capacity_translator.py index 82739201c3dd9..25a375a297709 100644 --- a/test/deprecated/ir/pir/translator/test_limit_by_capacity_translator.py +++ b/test/ir/pir/translator/test_limit_by_capacity_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestDistributedLookupTableOpTranslator( test_op_translator.TestOpTranslator diff --git a/test/deprecated/ir/pir/translator/test_nop_translator.py b/test/ir/pir/translator/test_nop_translator.py similarity index 96% rename from test/deprecated/ir/pir/translator/test_nop_translator.py rename to test/ir/pir/translator/test_nop_translator.py index e3a7722cd8354..f45ada523ec1a 100644 --- a/test/deprecated/ir/pir/translator/test_nop_translator.py +++ b/test/ir/pir/translator/test_nop_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestNopTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_op_translator.py b/test/ir/pir/translator/test_op_translator.py similarity index 98% rename from test/deprecated/ir/pir/translator/test_op_translator.py rename to test/ir/pir/translator/test_op_translator.py index 7ec1f0dd8d380..775677ea397ff 100644 --- a/test/deprecated/ir/pir/translator/test_op_translator.py +++ b/test/ir/pir/translator/test_op_translator.py @@ -21,6 +21,8 @@ paddle.enable_static() +paddle.pir_utils._switch_to_old_ir_() + class TestOpTranslator(unittest.TestCase): def setUp(self): diff --git a/test/deprecated/ir/pir/translator/test_partial_allgather_translator.py b/test/ir/pir/translator/test_partial_allgather_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_partial_allgather_translator.py rename to test/ir/pir/translator/test_partial_allgather_translator.py index 37c19e2105066..e4a1653137fb7 100644 --- a/test/deprecated/ir/pir/translator/test_partial_allgather_translator.py +++ b/test/ir/pir/translator/test_partial_allgather_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPartialAllgetherOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_partial_recv_translator.py b/test/ir/pir/translator/test_partial_recv_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_partial_recv_translator.py rename to test/ir/pir/translator/test_partial_recv_translator.py index 6f06ec4fad073..953e6d9ed2f13 100644 --- a/test/deprecated/ir/pir/translator/test_partial_recv_translator.py +++ b/test/ir/pir/translator/test_partial_recv_translator.py @@ -23,6 +23,8 @@ ) from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPartialRecvOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_partial_send_translator.py b/test/ir/pir/translator/test_partial_send_translator.py similarity index 96% rename from test/deprecated/ir/pir/translator/test_partial_send_translator.py rename to test/ir/pir/translator/test_partial_send_translator.py index 9f133f5274969..36c275480c2bc 100644 --- a/test/deprecated/ir/pir/translator/test_partial_send_translator.py +++ b/test/ir/pir/translator/test_partial_send_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPartialSendTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_prune_gate_by_capacity_translator.py b/test/ir/pir/translator/test_prune_gate_by_capacity_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_prune_gate_by_capacity_translator.py rename to test/ir/pir/translator/test_prune_gate_by_capacity_translator.py index 637429bfa70b7..0ce278f77b90e 100644 --- a/test/deprecated/ir/pir/translator/test_prune_gate_by_capacity_translator.py +++ b/test/ir/pir/translator/test_prune_gate_by_capacity_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPruneGateByCapacityOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/ir/pir/translator/test_pull_box_sparse_translator.py b/test/ir/pir/translator/test_pull_box_sparse_translator.py index f691892adc4f4..85fcfcb909567 100644 --- a/test/ir/pir/translator/test_pull_box_sparse_translator.py +++ b/test/ir/pir/translator/test_pull_box_sparse_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPullBoxSparseOpTranslator( test_op_translator.TestOpWithBackwardTranslator diff --git a/test/deprecated/ir/pir/translator/test_pull_gpups_sparse_translator.py b/test/ir/pir/translator/test_pull_gpups_sparse_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_pull_gpups_sparse_translator.py rename to test/ir/pir/translator/test_pull_gpups_sparse_translator.py index abc695a0573a2..c55a9b6eb6f3a 100644 --- a/test/deprecated/ir/pir/translator/test_pull_gpups_sparse_translator.py +++ b/test/ir/pir/translator/test_pull_gpups_sparse_translator.py @@ -20,6 +20,8 @@ from paddle.base import core from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPullGpupsSparseOpTranslator( test_op_translator.TestOpWithBackwardTranslator diff --git a/test/deprecated/ir/pir/translator/test_pull_sparse_v2_translator.py b/test/ir/pir/translator/test_pull_sparse_v2_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_pull_sparse_v2_translator.py rename to test/ir/pir/translator/test_pull_sparse_v2_translator.py index 374c7f5ee2e61..f91bb3ccc2f90 100644 --- a/test/deprecated/ir/pir/translator/test_pull_sparse_v2_translator.py +++ b/test/ir/pir/translator/test_pull_sparse_v2_translator.py @@ -20,6 +20,8 @@ from paddle.base import core from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPullSparseV2OpTranslator( test_op_translator.TestOpWithBackwardTranslator diff --git a/test/deprecated/ir/pir/translator/test_push_dense_translator.py b/test/ir/pir/translator/test_push_dense_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_push_dense_translator.py rename to test/ir/pir/translator/test_push_dense_translator.py index cdd87ba72d3ed..26191a0b2d048 100644 --- a/test/deprecated/ir/pir/translator/test_push_dense_translator.py +++ b/test/ir/pir/translator/test_push_dense_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestPushDenseOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_random_routing_translator.py b/test/ir/pir/translator/test_random_routing_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_random_routing_translator.py rename to test/ir/pir/translator/test_random_routing_translator.py index 86d047930f8b7..c8b353fd7e71f 100644 --- a/test/deprecated/ir/pir/translator/test_random_routing_translator.py +++ b/test/ir/pir/translator/test_random_routing_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestRandomRoutingOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/deprecated/ir/pir/translator/test_send_and_recv_translator.py b/test/ir/pir/translator/test_send_and_recv_translator.py similarity index 97% rename from test/deprecated/ir/pir/translator/test_send_and_recv_translator.py rename to test/ir/pir/translator/test_send_and_recv_translator.py index c452ae34eb7c7..e71d43c524ba9 100644 --- a/test/deprecated/ir/pir/translator/test_send_and_recv_translator.py +++ b/test/ir/pir/translator/test_send_and_recv_translator.py @@ -19,6 +19,8 @@ import paddle from paddle.base.layer_helper import LayerHelper +paddle.pir_utils._switch_to_old_ir_() + class TestCReduceMinOpTranslator(test_op_translator.TestOpTranslator): def append_op(self): diff --git a/test/ir/test_ir_fusion_group_pass.py b/test/ir/test_ir_fusion_group_pass.py index 0637efb067f7e..56c723613e939 100644 --- a/test/ir/test_ir_fusion_group_pass.py +++ b/test/ir/test_ir_fusion_group_pass.py @@ -72,7 +72,7 @@ def _feed_random_data(self, feed_vars): elif var.dtype == paddle.float16: dtype = "float16" else: - raise ValueError("Unsupported dtype %s" % var.dtype) + raise ValueError(f"Unsupported dtype {var.dtype}") feeds[var.name] = np.random.random(shape).astype(dtype) return feeds diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index 8c4cfe9113ab3..4b390ca18a3f1 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -155,6 +155,7 @@ if(WIN32) list(REMOVE_ITEM TEST_OPS test_fused_layernorm_op) list(REMOVE_ITEM TEST_OPS test_matmul_int8_op) list(REMOVE_ITEM TEST_OPS test_variable_length_memory_efficient_attention) + list(REMOVE_ITEM TEST_OPS test_ops_nms) endif() list(REMOVE_ITEM TEST_OPS test_checkpoint_saver) @@ -421,14 +422,11 @@ function(parallel_bash_test_modules TARGET_NAME) endif() endfunction() +list(REMOVE_ITEM TEST_OPS test_data_norm_op) list(REMOVE_ITEM TEST_OPS test_warpctc_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_resnet_sorted_gradient) list(REMOVE_ITEM TEST_OPS test_imperative_se_resnext) -list(REMOVE_ITEM TEST_OPS test_parallel_executor_seresnext_base_cpu) -list(REMOVE_ITEM TEST_OPS test_parallel_executor_seresnext_with_reduce_cpu) -list(REMOVE_ITEM TEST_OPS - test_parallel_executor_seresnext_with_fuse_all_reduce_cpu) list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist) list(REMOVE_ITEM TEST_OPS test_basic_gru_api) list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op) @@ -437,6 +435,8 @@ list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op) list(REMOVE_ITEM TEST_OPS test_fuse_bn_add_act_pass) list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) list(REMOVE_ITEM TEST_OPS test_layers) +list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) +list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) @@ -485,6 +485,8 @@ endif() # Some ops need to check results when gc is enabled # Currently, only ops that register NoNeedBufferVarsInference need to do this test set(TEST_OPS_WITH_GC + test_affine_channel_op + test_scatter_op test_concat_op test_elementwise_add_op test_lookup_table_op @@ -571,6 +573,11 @@ if((WITH_GPU) AND (WITH_CUDNN_FRONTEND)) test_fused_dot_product_attention_op) endif() +py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS + ${GC_ENVS}) +py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS + ${GC_ENVS}) + set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_norm_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") @@ -681,7 +688,7 @@ if(WITH_DISTRIBUTE) endif() endif() endif() - +py_test_modules(test_data_norm_op MODULES test_data_norm_op) py_test_modules( test_fuse_bn_add_act_pass MODULES @@ -753,6 +760,7 @@ if(WITH_DISTRIBUTE) endif() # setting timeout value as 15S +set_tests_properties(test_isin PROPERTIES TIMEOUT 30) set_tests_properties(test_binomial_op PROPERTIES TIMEOUT 30) set_tests_properties(test_run PROPERTIES TIMEOUT 120) set_tests_properties(test_sync_batch_norm_op PROPERTIES TIMEOUT 180) @@ -788,12 +796,18 @@ if(WITH_NV_JETSON) set_tests_properties(test_norm_op PROPERTIES TIMEOUT 1200) set_tests_properties(test_batch_norm_op_prim_nchw PROPERTIES TIMEOUT 1500) set_tests_properties(test_batch_norm_op_prim_nhwc PROPERTIES TIMEOUT 1500) + set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 1500) + set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 1500) else() set_tests_properties(test_concat_op PROPERTIES TIMEOUT 400) set_tests_properties(test_conv3d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_norm_op PROPERTIES TIMEOUT 150) set_tests_properties(test_batch_norm_op_prim_nchw PROPERTIES TIMEOUT 250) set_tests_properties(test_batch_norm_op_prim_nhwc PROPERTIES TIMEOUT 250) + set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 250) endif() if((WITH_GPU OR WITH_ROCM) AND (LINUX)) py_test_modules(test_conv3d_transpose_op MODULES test_conv3d_transpose_op @@ -946,6 +960,10 @@ if(WITH_CUDNN_FRONTEND) endif() set(TEST_CINN_OPS + test_assign_op + test_layer_norm_op + test_atan2_op + test_cast_op test_stack_op test_activation_op test_fill_any_like_op @@ -954,6 +972,22 @@ set(TEST_CINN_OPS test_elementwise_sub_op test_elementwise_div_op test_elementwise_max_op + test_elementwise_mul_op + test_elementwise_pow_op + test_expand_v2_op + test_flatten_contiguous_range_op + test_flip + test_full_like_op + test_top_k_op + test_top_k_v2_op + test_reshape_op + test_triangular_solve_op + test_split_op + test_scatter_op + test_reverse_op + test_roll_op + test_meshgrid_op + test_index_select_op test_mean_op test_clip_op test_gather_op @@ -997,6 +1031,13 @@ set_tests_properties( # These UTs are to temporarily test static build for standalone_executor, will be removed after static build is enabled by default. set(STATIC_BUILD_TESTS test_adagrad_op + test_batch_norm_op + test_nce + test_layer_norm_op + test_eigh_op + test_matmul_op + test_matmul_v2_op + test_paddle_save_load_binary test_assign_pos_op test_bucketize_api test_c_embedding_op @@ -1099,3 +1140,54 @@ set_pir_tests_properties() set_tests_properties(test_nadam_op PROPERTIES TIMEOUT 100) set_tests_properties(test_radam_op PROPERTIES TIMEOUT 100) set_tests_properties(test_nan_inf PROPERTIES TIMEOUT 120) +set_tests_properties(test_sparse_mask_as_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_conv2d_op_depthwise_conv + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES TIMEOUT 120) +set_tests_properties(test_crop_tensor_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_cross_entropy2_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 300) +set_tests_properties(test_deformable_conv_v1_op PROPERTIES TIMEOUT 300) +set_tests_properties(test_elementwise_mul_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_fractional_max_pool2d_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_graph_send_ue_recv_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_graph_send_uv_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) +set_tests_properties(test_gru_unit_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_star_gan_with_gradient_penalty + PROPERTIES TIMEOUT 120) +set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_lstm_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_matmul_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_matmul_v2_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_nearest_interp_v2_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_pad3d_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_paddle_save_load_binary PROPERTIES TIMEOUT 120) +set_tests_properties(test_partial_concat_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_qr_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_row_conv_op PROPERTIES TIMEOUT 120) +if(WIN32) + set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 900) +else() + set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 600) +endif() +set_tests_properties(test_svd_op PROPERTIES TIMEOUT 80) +set_tests_properties(test_trilinear_interp_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_matmul_op_static_build PROPERTIES TIMEOUT 120) +set_tests_properties(test_matmul_v2_op_static_build PROPERTIES TIMEOUT 120) +set_tests_properties(test_paddle_save_load_binary_static_build + PROPERTIES TIMEOUT 120) +set_tests_properties(test_argsort_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_conv_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_conv_nn_grad PROPERTIES TIMEOUT 220) +set_tests_properties(test_data_norm_op PROPERTIES LABELS "RUN_TYPE=DIST") +set_tests_properties(test_deformable_conv_op PROPERTIES TIMEOUT 200) +set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) +set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150) +set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250) diff --git a/test/legacy_test/dist_ctr_reader.py b/test/legacy_test/dist_ctr_reader.py index 23f4daf2a5d8f..039d2c8aaf178 100644 --- a/test/legacy_test/dist_ctr_reader.py +++ b/test/legacy_test/dist_ctr_reader.py @@ -114,7 +114,7 @@ def train(self): Load trainset. ''' file_name = "train.txt" - logger.info("load trainset from %s" % file_name) + logger.info(f"load trainset from {file_name}") mode = TaskMode.create_train() return self._parse_creator(file_name, mode) @@ -123,7 +123,7 @@ def test(self): Load testset. ''' file_name = "test.txt" - logger.info("load testset from %s" % file_name) + logger.info(f"load testset from {file_name}") mode = TaskMode.create_test() return self._parse_creator(file_name, mode) @@ -132,7 +132,7 @@ def infer(self): Load infer set. ''' file_name = "infer.txt" - logger.info("load inferset from %s" % file_name) + logger.info(f"load inferset from {file_name}") mode = TaskMode.create_infer() return self._parse_creator(file_name, mode) diff --git a/test/legacy_test/dist_test.sh b/test/legacy_test/dist_test.sh index 69a893a7ddc13..3ae7b209f4a00 100644 --- a/test/legacy_test/dist_test.sh +++ b/test/legacy_test/dist_test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -82,7 +82,7 @@ if [[ $exit_code -ne 0 ]]; then fi #display system context -for i in {1..2}; do +for i in {1..2}; do sleep 3 ps -aux netstat -anlp diff --git a/test/legacy_test/gradient_checker.py b/test/legacy_test/gradient_checker.py index 210db283b979a..41c668043e3f8 100644 --- a/test/legacy_test/gradient_checker.py +++ b/test/legacy_test/gradient_checker.py @@ -324,7 +324,7 @@ def _compute_analytical_jacobian_pir( filted_idx, filted_dx = zip(*filted) # get the name in feeds of dyi - name = 'dys_%s' % i + name = f'dys_{i}' np_t = np.array(feeds[name]).astype(np_type) shape = np_t.shape np_t = np_t.flatten() @@ -392,7 +392,7 @@ def fail_test(msg): if in_pir_mode(): analytical = [] for i in range(len(y)): - name = 'dys_%s' % i + name = f'dys_{i}' feeds.update( { name: np.zeros( @@ -780,7 +780,7 @@ def get_pir_static_double_grad( yi.persistable = True np_type = dtype_to_np_dtype(yi.dtype) dy = paddle.static.data( - name='Dgrad_%s' % i, + name=f'Dgrad_{i}', shape=yi.shape, dtype=np_type, ) @@ -797,7 +797,7 @@ def get_pir_static_double_grad( yi.persistable = True np_type = dtype_to_np_dtype(yi.dtype) dy = paddle.static.data( - name='Dgrad_%s' % i, + name=f'Dgrad_{i}', shape=yi.shape, dtype=np_type, ) @@ -851,12 +851,12 @@ def get_pir_static_double_grad( yi = y[i] np_type = dtype_to_np_dtype(yi.dtype) dy = paddle.static.data( - name='dys_%s' % i, + name=f'dys_{i}', shape=yi.shape, dtype=np_type, ) value = np.ones(yi.shape, dtype=np_type) - feeds.update({'dys_%s' % i: value}) + feeds.update({f'dys_{i}': value}) dys.append(dy) # append second order backward @@ -1130,7 +1130,7 @@ def get_pir_static_triple_grad( yi.persistable = True np_type = dtype_to_np_dtype(yi.dtype) dy = paddle.static.data( - name='Tgrad_%s' % i, + name=f'Tgrad_{i}', shape=yi.shape, dtype=np_type, ) @@ -1147,7 +1147,7 @@ def get_pir_static_triple_grad( yi.persistable = True np_type = dtype_to_np_dtype(yi.dtype) dy = paddle.static.data( - name='Tgrad_%s' % i, + name=f'Tgrad_{i}', shape=yi.shape, dtype=np_type, ) diff --git a/test/legacy_test/op.py b/test/legacy_test/op.py index 0dec2f001188e..e60a0e63ae8dd 100644 --- a/test/legacy_test/op.py +++ b/test/legacy_test/op.py @@ -163,7 +163,7 @@ def __call__(self, *args, **kwargs): new_attr.scalars.MergeFrom(item) else: raise NotImplementedError( - "A not supported attribute type: %s." % (str(attr.type)) + f"A not supported attribute type: {str(attr.type)}." ) for attr_name, defalut_val in self.__extra_attrs__.items(): user_defined_attr = kwargs.get(attr_name, None) @@ -212,7 +212,7 @@ def __call__(self, *args, **kwargs): new_attr.scalars.MergeFrom(item) else: raise NotImplementedError( - "A not supported attribute type: %s." % (str(attr_type)) + f"A not supported attribute type: {str(attr_type)}." ) return op_desc @@ -292,7 +292,7 @@ def types(self): def get_op_info(self, t): if t not in self.op_methods: - raise ValueError("The operator: %s is not registered." % t) + raise ValueError(f"The operator: {t} is not registered.") return self.op_methods.get(t) def get_op_input_names(self, type): diff --git a/test/legacy_test/op_test.py b/test/legacy_test/op_test.py index ed4e0f478ed38..eec710f01cf8e 100644 --- a/test/legacy_test/op_test.py +++ b/test/legacy_test/op_test.py @@ -114,7 +114,7 @@ def check_out_dtype(api_fn, in_specs, expect_dtypes, target_index=0, **configs): ) input_t.append( paddle.static.data( - name='data_%s' % index, shape=shape, dtype=dtype + name=f'data_{index}', shape=shape, dtype=dtype ) ) @@ -223,7 +223,7 @@ def __get_elem__(tensor, i): return tensor._get_complex128_element(i) else: raise TypeError( - "Unsupported test data type %s." % tensor_to_check_dtype + f"Unsupported test data type {tensor_to_check_dtype}." ) def __set_elem__(tensor, i, e): @@ -251,7 +251,7 @@ def __set_elem__(tensor, i, e): return tensor._set_complex128_element(i, e) else: raise TypeError( - "Unsupported test data type %s." % tensor_to_check_dtype + f"Unsupported test data type {tensor_to_check_dtype}." ) # we only compute gradient of one element each time. @@ -501,7 +501,7 @@ def is_complex_test(): and not hasattr(cls, "exist_check_grad") ): raise AssertionError( - "This test of %s op needs check_grad." % cls.op_type + f"This test of {cls.op_type} op needs check_grad." ) # check for op test with fp64 precision, but not check onednn op test for now @@ -518,8 +518,7 @@ def is_complex_test(): and not cls.check_prim_pir ): raise AssertionError( - "This test of %s op needs check_grad with fp64 precision." - % cls.op_type + f"This test of {cls.op_type} op needs check_grad with fp64 precision." ) if ( @@ -1061,7 +1060,7 @@ def create_var( name_temp = name else: nplist_value_temp = np_list[name] - name_temp = unique_name.generate("%s_out" % (name)) + name_temp = unique_name.generate(f"{name}_out") v = create_var( nplist_value_temp, name_temp, @@ -1184,10 +1183,9 @@ def cal_python_api(python_api, args, kernel_sig): return None if not hasattr(self, "python_api"): print(kernel_sig) - assert hasattr(self, "python_api"), ( - "Detect there is KernelSignature for `%s` op, please set the `self.python_api` if you set check_dygraph = True" - % self.op_type - ) + assert hasattr( + self, "python_api" + ), f"Detect there is KernelSignature for `{self.op_type}` op, please set the `self.python_api` if you set check_dygraph = True" args = OpTestUtils.prepare_python_api_arguments( self.python_api, dygraph_tensor_inputs, @@ -1288,10 +1286,9 @@ def get_kernel_signature(self, place, egr_inps=None, egr_oups=None): return None if not hasattr(self, "python_api"): print(kernel_sig) - assert hasattr(self, "python_api"), ( - "Detect there is KernelSignature for `%s` op, please set the `self.python_api` if you set check_dygraph = True" - % self.op_type - ) + assert hasattr( + self, "python_api" + ), f"Detect there is KernelSignature for `{self.op_type}` op, please set the `self.python_api` if you set check_dygraph = True" return kernel_sig def get_ir_input_attr_dict_and_feed(self, stop_gradient): @@ -2573,7 +2570,7 @@ def _is_skip_name(self, name): not in no_check_set_white_list.no_check_set_white_list ): raise AssertionError( - "no_check_set of op %s must be set to None." % self.op_type + f"no_check_set of op {self.op_type} must be set to None." ) if check_prim: @@ -3091,7 +3088,7 @@ def check_grad_with_place_for_static( analytic_grads, inputs_to_check, max_relative_error, - "Gradient Check On %s" % str(place), + f"Gradient Check On {str(place)}", atol=atol, ) @@ -3366,7 +3363,7 @@ def check_grad_with_place( dygraph_dygraph_grad, inputs_to_check, max_relative_error, - "Gradient Check On %s" % str(place), + f"Gradient Check On {str(place)}", atol=atol, ) @@ -3406,7 +3403,7 @@ def check_grad_with_place( pir_grad, inputs_to_check, max_relative_error, - "Gradient Check On %s" % str(place), + f"Gradient Check On {str(place)}", atol=atol, ) @@ -3484,7 +3481,7 @@ def _get_dygraph_grad( ) else: raise TypeError( - "Unsupported test data type %s." % type(cast_input) + f"Unsupported test data type {type(cast_input)}." ) outputs = {} @@ -3850,12 +3847,12 @@ def construct_output_dict_by_kernel_sig(ret_tuple, output_sig): range(len(user_defined_grad_outputs)), ): grad_val = paddle.static.data( - name='val_grad_%s' % idx, + name=f'val_grad_{idx}', shape=grad_out_value.shape, dtype=grad_out_value.dtype, ) grad_outputs.append(grad_val) - feed.update({'val_grad_%s' % idx: grad_out_value}) + feed.update({f'val_grad_{idx}': grad_out_value}) # delete the inputs which no need to calculate grad for no_grad_val in no_grad_set: del static_inputs[no_grad_val] @@ -3894,8 +3891,7 @@ def construct_output_dict_by_kernel_sig(ret_tuple, output_sig): ) else: raise TypeError( - "Unsupported test data type %s." - % type(cast_input) + f"Unsupported test data type {type(cast_input)}." ) outputs = {} diff --git a/test/legacy_test/parallel_test.sh b/test/legacy_test/parallel_test.sh index 551b7cdb7a43c..893163700a55d 100644 --- a/test/legacy_test/parallel_test.sh +++ b/test/legacy_test/parallel_test.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/prim_op_test.py b/test/legacy_test/prim_op_test.py index 6894d37a2839a..c059499f43e16 100644 --- a/test/legacy_test/prim_op_test.py +++ b/test/legacy_test/prim_op_test.py @@ -100,8 +100,7 @@ def _get_kernel_signature( """we think the kernel_sig is missing.""" kernel_sig = None print( - "[Warning: op_test.py] Kernel Signature is not found for %s, fall back to intermediate state." - % op_type + f"[Warning: op_test.py] Kernel Signature is not found for {op_type}, fall back to intermediate state." ) return kernel_sig @@ -677,9 +676,9 @@ def check_static_comp(self): # ensure the operator not in program if check_prim is True if not in_pir_mode(): forward_ops = [op.type for op in main_program.blocks[0].ops] - assert self.op_type not in forward_ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (self.op_type) + assert ( + self.op_type not in forward_ops + ), f"{self.op_type} shouldn't appear in program when check_prim is True" exe = paddle.static.Executor(self.place) exe.run(startup_program) ret = exe.run(main_program, feed=feed, fetch_list=ret) @@ -761,9 +760,9 @@ def check_jit_comp(self): .forward_program.block(0) .ops ] - assert self.op_type not in forward_ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (self.op_type) + assert ( + self.op_type not in forward_ops + ), f"{self.op_type} shouldn't appear in program when check_prim is True" ret = flatten(_as_list(net(args))) ret = paddle.utils.map_structure(lambda x: x.numpy(), ret) if OpTestUtils.is_bfloat16_type(self.dtype): @@ -854,9 +853,9 @@ def check_jit_comp_with_cinn(self): .forward_program.block(0) .ops ] - assert self.op_type not in forward_ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (self.op_type) + assert ( + self.op_type not in forward_ops + ), f"{self.op_type} shouldn't appear in program when check_prim is True" ret = flatten(_as_list(net(args))) ret = paddle.utils.map_structure(lambda x: x.numpy(), ret) if OpTestUtils.is_bfloat16_type(self.dtype): @@ -1160,9 +1159,9 @@ def check_static_comp(self): if not in_pir_mode(): ops = [op.type for op in main_program.blocks[0].ops] backward_op_type = self.op_type + "_grad" - assert backward_op_type not in ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (backward_op_type) + assert ( + backward_op_type not in ops + ), f"{backward_op_type} shouldn't appear in program when check_prim is True" elif self.prim_op_type == "prim": grad_ops = [] for op in main_program.global_block().ops: @@ -1261,9 +1260,9 @@ def check_jit_comp(self): .ops ] backward_op_type = self.op_type + "_grad" - assert backward_op_type not in ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (backward_op_type) + assert ( + backward_op_type not in ops + ), f"{backward_op_type} shouldn't appear in program when check_prim is True" out = _as_list(net(args)) if hasattr(self.op_test, "python_out_sig"): outputs_sig = self.op_test.python_out_sig @@ -1387,9 +1386,9 @@ def check_jit_comp_with_cinn(self): .ops ] backward_op_type = self.op_type + "_grad" - assert backward_op_type not in ops, ( - "%s shouldn't appear in program when check_prim is True" - ) % (backward_op_type) + assert ( + backward_op_type not in ops + ), f"{backward_op_type} shouldn't appear in program when check_prim is True" out = _as_list(net(args)) if hasattr(self.op_test, "python_out_sig"): diff --git a/test/legacy_test/run_server_for_communicator_geo.py b/test/legacy_test/run_server_for_communicator_geo.py index 4f4173e5a2d0f..31bdddda31a15 100644 --- a/test/legacy_test/run_server_for_communicator_geo.py +++ b/test/legacy_test/run_server_for_communicator_geo.py @@ -16,7 +16,7 @@ import sys sys.path.append("../deprecated/legacy_test") -from test_communicator_geo import TestCommunicatorGeoEnd2End +from test_communicator_geo_deprecated import TestCommunicatorGeoEnd2End import paddle diff --git a/test/legacy_test/test_ZeroPad1d.py b/test/legacy_test/test_ZeroPad1d.py new file mode 100644 index 0000000000000..31baf6a7cf246 --- /dev/null +++ b/test/legacy_test/test_ZeroPad1d.py @@ -0,0 +1,90 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import to_tensor +from paddle.nn import ZeroPad1D + + +class TestZeroPad1dAPI(unittest.TestCase): + def setUp(self): + if paddle.is_compiled_with_cuda(): + paddle.device.set_device('gpu:0') + else: + paddle.device.set_device('cpu') + self.shape = [4, 6, 6] + self.support_dtypes = ['float32', 'float64', 'int32', 'int64'] + + def test_support_dtypes(self): + for dtype in self.support_dtypes: + pad = 2 + x = np.random.randint(-255, 255, size=self.shape).astype(dtype) + expect_res = np.pad( + x, + [[0, 0], [0, 0], [pad, pad]], + mode='constant', + constant_values=0, + ) + + x_tensor = to_tensor(x).astype(dtype) + zeropad1d = ZeroPad1D(padding=pad) + ret_res = zeropad1d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad2(self): + pad = [1, 2] + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad( + x, [[0, 0], [0, 0], pad], mode='constant', constant_values=0 + ) + + x_tensor = to_tensor(x) + zeropad1d = ZeroPad1D(padding=pad) + ret_res = zeropad1d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad3(self): + pad = (1, 2) + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad(x, [[0, 0], [0, 0], [pad[0], pad[1]]]) + + x_tensor = to_tensor(x) + zeropad1d = ZeroPad1D(padding=pad) + ret_res = zeropad1d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad4(self): + pad = [1, 2] + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad(x, [[0, 0], [0, 0], [pad[0], pad[1]]]) + + x_tensor = to_tensor(x) + pad_tensor = to_tensor(pad, dtype='int32') + zeropad1d = ZeroPad1D(padding=pad_tensor) + ret_res = zeropad1d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_repr(self): + pad = [1, 2] + zeropad1d = ZeroPad1D(padding=pad) + name_str = zeropad1d.extra_repr() + assert name_str == 'padding=[1, 2], data_format=NCL' + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_ZeroPad3d.py b/test/legacy_test/test_ZeroPad3d.py new file mode 100644 index 0000000000000..8cc7a45c959df --- /dev/null +++ b/test/legacy_test/test_ZeroPad3d.py @@ -0,0 +1,117 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle import to_tensor +from paddle.nn import ZeroPad3D + + +class TestZeroPad3DAPI(unittest.TestCase): + def setUp(self): + if paddle.is_compiled_with_cuda(): + paddle.device.set_device('gpu:0') + else: + paddle.device.set_device('cpu') + self.shape = [4, 3, 6, 6, 6] + self.support_dtypes = ['float32', 'float64', 'int32', 'int64'] + + def test_support_dtypes(self): + for dtype in self.support_dtypes: + pad = 2 + x = np.random.randint(-255, 255, size=self.shape).astype(dtype) + expect_res = np.pad( + x, + [[0, 0], [0, 0], [pad, pad], [pad, pad], [pad, pad]], + mode='constant', + constant_values=0, + ) + + x_tensor = to_tensor(x).astype(dtype) + zeropad3d = ZeroPad3D(padding=pad) + ret_res = zeropad3d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad2(self): + pad = [1, 2, 3, 4, 5, 6] + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad( + x, + [ + [0, 0], + [0, 0], + [pad[4], pad[5]], + [pad[2], pad[3]], + [pad[0], pad[1]], + ], + mode='constant', + constant_values=0, + ) + + x_tensor = to_tensor(x) + zeropad3d = ZeroPad3D(padding=pad) + ret_res = zeropad3d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad3(self): + pad = (1, 2, 3, 4, 5, 6) + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad( + x, + [ + [0, 0], + [0, 0], + [pad[4], pad[5]], + [pad[2], pad[3]], + [pad[0], pad[1]], + ], + ) + + x_tensor = to_tensor(x) + zeropad3d = ZeroPad3D(padding=pad) + ret_res = zeropad3d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_support_pad4(self): + pad = [1, 2, 3, 4, 5, 6] + x = np.random.randint(-255, 255, size=self.shape) + expect_res = np.pad( + x, + [ + [0, 0], + [0, 0], + [pad[4], pad[5]], + [pad[2], pad[3]], + [pad[0], pad[1]], + ], + ) + + x_tensor = to_tensor(x) + pad_tensor = to_tensor(pad, dtype='int32') + zeropad3d = ZeroPad3D(padding=pad_tensor) + ret_res = zeropad3d(x_tensor).numpy() + np.testing.assert_allclose(expect_res, ret_res, rtol=1e-05) + + def test_repr(self): + pad = pad = [1, 2, 3, 4, 5, 6] + zeropad3d = ZeroPad3D(padding=pad) + name_str = zeropad3d.extra_repr() + assert name_str == 'padding=[1, 2, 3, 4, 5, 6], data_format=NCDHW' + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_accuracy_op.py b/test/legacy_test/test_accuracy_op.py similarity index 99% rename from test/deprecated/legacy_test/test_accuracy_op.py rename to test/legacy_test/test_accuracy_op.py index 44c4cfa7c49ac..bf6d86d10da9e 100755 --- a/test/deprecated/legacy_test/test_accuracy_op.py +++ b/test/legacy_test/test_accuracy_op.py @@ -126,7 +126,7 @@ def test_type_errors(self): self.assertRaises(TypeError, paddle.metric.accuracy, x2, label) x3 = paddle.static.data( - name='input', shape=[-1, 2], dtype="float16" + name='input', shape=[-1, 2], dtype="float32" ) paddle.static.accuracy(input=x3, label=label) paddle.metric.accuracy(input=x3, label=label) diff --git a/test/legacy_test/test_activation_op.py b/test/legacy_test/test_activation_op.py index 7806017bbfeed..4de793c943265 100644 --- a/test/legacy_test/test_activation_op.py +++ b/test/legacy_test/test_activation_op.py @@ -365,19 +365,19 @@ def test_out_name(self): data = paddle.static.data( name="X", shape=[-1, 1], dtype="float32" ) - out = eval("paddle.%s(data, name='Y')" % self.op_type) + out = eval(f"paddle.{self.op_type}(data, name='Y')") place = base.CPUPlace() exe = base.Executor(place) (result,) = exe.run(feed={"X": np_x}, fetch_list=[out]) - expected = eval("np.%s(np_x)" % self.op_type) + expected = eval(f"np.{self.op_type}(np_x)") np.testing.assert_allclose(result, expected, rtol=1e-05) def test_dygraph(self): with base.dygraph.guard(): np_x = np.array([0.1]) x = paddle.to_tensor(np_x) - z = eval("paddle.%s(x).numpy()" % self.op_type) - z_expected = eval("np.%s(np_x)" % self.op_type) + z = eval(f"paddle.{self.op_type}(x).numpy()") + z_expected = eval(f"np.{self.op_type}(np_x)") np.testing.assert_allclose(z, z_expected, rtol=1e-05) @@ -3287,26 +3287,34 @@ def test_check_grad(self): self.check_grad( ['X'], 'Out', - check_prim=True - if self.dtype not in [np.complex64, np.complex128] - else False, + check_prim=( + True + if self.dtype not in [np.complex64, np.complex128] + else False + ), only_check_prim=self.if_only_check_prim(), check_pir=True, - check_prim_pir=True - if self.dtype not in [np.complex64, np.complex128] - else False, + check_prim_pir=( + True + if self.dtype not in [np.complex64, np.complex128] + else False + ), check_pir_onednn=self.check_pir_onednn, ) def test_check_output(self): self.check_output( - check_prim=True - if self.dtype not in [np.complex64, np.complex128] - else False, + check_prim=( + True + if self.dtype not in [np.complex64, np.complex128] + else False + ), check_pir=True, - check_prim_pir=True - if self.dtype not in [np.complex64, np.complex128] - else False, + check_prim_pir=( + True + if self.dtype not in [np.complex64, np.complex128] + else False + ), check_pir_onednn=self.check_pir_onednn, ) @@ -4880,8 +4888,8 @@ def test_errors(self): F.softsign(x_fp16) -def ref_thresholded_relu(x, threshold=1.0): - out = (x > threshold) * x +def ref_thresholded_relu(x, threshold=1.0, value=0.0): + out = (x > threshold) * x + (x <= threshold) * value return out @@ -4893,15 +4901,16 @@ def setUp(self): self.python_api = paddle.nn.functional.thresholded_relu threshold = 15 + value = 5 np.random.seed(1024) x = np.random.uniform(-20, 20, self.shape).astype(self.dtype) x[np.abs(x) < 0.005] = 0.02 - out = ref_thresholded_relu(x, threshold) + out = ref_thresholded_relu(x, threshold, value) self.inputs = {'X': OpTest.np_dtype_to_base_dtype(x)} self.outputs = {'Out': out} - self.attrs = {"threshold": threshold} + self.attrs = {"threshold": threshold, "value": value} self.convert_input_output() def init_shape(self): @@ -4929,6 +4938,7 @@ class TestThresholdedReluAPI(unittest.TestCase): # test paddle.nn.ThresholdedReLU, paddle.nn.functional.thresholded_relu def setUp(self): self.threshold = 15 + self.value = 5 np.random.seed(1024) self.x_np = np.random.uniform(-20, 20, [10, 12]).astype(np.float64) self.x_np[np.abs(self.x_np) < 0.005] = 0.02 @@ -4943,22 +4953,30 @@ def test_static_api(self): with static_guard(): with paddle.static.program_guard(paddle.static.Program()): x = paddle.static.data('X', self.x_np.shape, self.x_np.dtype) - out1 = F.thresholded_relu(x, self.threshold) - thresholded_relu = paddle.nn.ThresholdedReLU(self.threshold) + out1 = F.thresholded_relu(x, self.threshold, self.value) + thresholded_relu = paddle.nn.ThresholdedReLU( + self.threshold, self.value + ) out2 = thresholded_relu(x) exe = paddle.static.Executor(self.place) res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2]) - out_ref = ref_thresholded_relu(self.x_np, self.threshold) + out_ref = ref_thresholded_relu( + self.x_np, self.threshold, self.value + ) for r in res: np.testing.assert_allclose(out_ref, r, rtol=1e-05) def test_dygraph_api(self): with dynamic_guard(): x = paddle.to_tensor(self.x_np) - out1 = F.thresholded_relu(x, self.threshold) - thresholded_relu = paddle.nn.ThresholdedReLU(self.threshold) + out1 = F.thresholded_relu(x, self.threshold, self.value) + thresholded_relu = paddle.nn.ThresholdedReLU( + self.threshold, self.value + ) out2 = thresholded_relu(x) - out_ref = ref_thresholded_relu(self.x_np, self.threshold) + out_ref = ref_thresholded_relu( + self.x_np, self.threshold, self.value + ) for r in [out1, out2]: np.testing.assert_allclose(out_ref, r.numpy(), rtol=1e-05) @@ -5359,7 +5377,7 @@ def create_test_act_fp16_class( enable_cinn=False, check_pir=False, grad_atol=1e-2, - **kwargs + **kwargs, ): @unittest.skipIf( not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA" @@ -5556,7 +5574,7 @@ def create_test_act_bf16_class( check_pir=False, check_prim_pir=False, grad_atol=1e-2, - **kwargs + **kwargs, ): @unittest.skipIf( not core.is_compiled_with_cuda() diff --git a/test/deprecated/legacy_test/test_adamax_api.py b/test/legacy_test/test_adamax_api.py similarity index 66% rename from test/deprecated/legacy_test/test_adamax_api.py rename to test/legacy_test/test_adamax_api.py index 1fc1878d81995..a995659df4c10 100644 --- a/test/deprecated/legacy_test/test_adamax_api.py +++ b/test/legacy_test/test_adamax_api.py @@ -17,7 +17,6 @@ import numpy as np import paddle -from paddle import base class TestAdamaxAPI(unittest.TestCase): @@ -36,34 +35,6 @@ def test_adamax_api_dygraph(self): adam.step() adam.clear_gradients() - def test_adamax_api(self): - paddle.enable_static() - place = base.CPUPlace() - shape = [2, 3, 8, 8] - exe = base.Executor(place) - train_prog = base.Program() - startup = base.Program() - with base.program_guard(train_prog, startup): - with base.unique_name.guard(): - data = paddle.static.data(name="data", shape=shape) - conv = paddle.static.nn.conv2d(data, 8, 3) - loss = paddle.mean(conv) - beta1 = 0.85 - beta2 = 0.95 - opt = paddle.optimizer.Adamax( - learning_rate=1e-5, - beta1=beta1, - beta2=beta2, - weight_decay=0.01, - epsilon=1e-8, - ) - opt.minimize(loss) - - exe.run(startup) - data_np = np.random.random(shape).astype('float32') - rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) - assert rets[0] is not None - class TestAdamaxAPIGroup(TestAdamaxAPI): def test_adamax_api_dygraph(self): diff --git a/test/deprecated/legacy_test/test_add_position_encoding_op.py b/test/legacy_test/test_add_position_encoding_op.py similarity index 100% rename from test/deprecated/legacy_test/test_add_position_encoding_op.py rename to test/legacy_test/test_add_position_encoding_op.py diff --git a/test/deprecated/legacy_test/test_addmm_op.py b/test/legacy_test/test_addmm_op.py similarity index 100% rename from test/deprecated/legacy_test/test_addmm_op.py rename to test/legacy_test/test_addmm_op.py diff --git a/test/deprecated/legacy_test/test_affine_channel_op.py b/test/legacy_test/test_affine_channel_op.py similarity index 100% rename from test/deprecated/legacy_test/test_affine_channel_op.py rename to test/legacy_test/test_affine_channel_op.py diff --git a/test/deprecated/legacy_test/test_affine_grid_op.py b/test/legacy_test/test_affine_grid_op.py similarity index 100% rename from test/deprecated/legacy_test/test_affine_grid_op.py rename to test/legacy_test/test_affine_grid_op.py diff --git a/test/deprecated/legacy_test/test_argsort_op.py b/test/legacy_test/test_argsort_op.py similarity index 68% rename from test/deprecated/legacy_test/test_argsort_op.py rename to test/legacy_test/test_argsort_op.py index 58597766644f5..e1786ada841bc 100644 --- a/test/deprecated/legacy_test/test_argsort_op.py +++ b/test/legacy_test/test_argsort_op.py @@ -20,9 +20,6 @@ import paddle from paddle import base from paddle.base import core -from paddle.base.backward import append_backward -from paddle.base.executor import Executor -from paddle.base.framework import Program, grad_var_name from paddle.pir_utils import test_with_pir_api np.random.seed(123) @@ -66,293 +63,6 @@ def create_tensor(np_data, place): return tensor -class TestArgsortOpCPU(unittest.TestCase): - def setup_program(self): - self.main_program = Program() - self.startup_program = Program() - self.init_place() - - def setUp(self): - paddle.enable_static() - self.init_axis() - self.init_datatype() - self.init_direction() - self.init_inputshape() - - self.setup_program() - self.feed_data_field = {"x", "label"} - self.grad_data_field = {"x"} - - self.py_argsort = PyArgsort( - self.input_shape, self.axis, self.descending, self.dtype - ) - - with base.program_guard(self.main_program, self.startup_program): - x = paddle.static.data( - name="x", shape=[-1] + list(self.input_shape), dtype=self.dtype - ) - x.stop_gradient = False - x.desc.set_need_check_feed(False) - label = paddle.static.data( - name="label", - shape=[-1] + list(self.input_shape), - dtype=self.dtype, - ) - label.desc.set_need_check_feed(False) - self.index = paddle.argsort( - x=x, axis=self.axis, descending=self.descending - ) - self.sorted_x = paddle.sort( - x=x, axis=self.axis, descending=self.descending - ) - self.sorted_x.stop_gradient = False - loss = paddle.multiply(self.sorted_x, label) - self.loss = paddle.sum(loss) - - def forward(self): - self.feed_map = { - x: create_tensor(getattr(self.py_argsort, x), self.place) - for x in self.feed_data_field - } - exe = Executor(self.place) - out = exe.run( - self.main_program, - feed=self.feed_map, - fetch_list=[self.index, self.sorted_x, self.loss], - ) - return out - - def backward(self): - self.feed_map = { - x: create_tensor(getattr(self.py_argsort, x), self.place) - for x in self.feed_data_field - } - fetch_list = [ - self.main_program.global_block().var(grad_var_name(x)) - for x in self.grad_data_field - ] - exe = Executor(self.place) - out = exe.run( - self.main_program, - feed=self.feed_map, - fetch_list=fetch_list, - return_numpy=False, - ) - return out - - def test_backward(self, numeric_grad_delta=1e-5, max_relative_error=1e-7): - self.check_forward() - - with base.program_guard(self.main_program, self.startup_program): - append_backward(self.loss) - - ana_grad = [np.array(x) for x in self.backward()] - - num_grad = self.get_numerical_gradient(delta=numeric_grad_delta) - self.assert_is_close( - num_grad, - ana_grad, - 'x', - max_relative_error=max_relative_error, - msg_prefix="Gradient Check On %s" % str(self.place), - ) - - def check_forward(self): - pd_outputs = self.forward() - py_outputs = self.py_argsort.forward() - for pd_output, py_output in zip(pd_outputs, py_outputs): - self.assertEqual(pd_output.shape, py_output.shape) - np.testing.assert_allclose( - pd_output, py_output, rtol=1e-05, atol=0, equal_nan=False - ) - - def get_numerical_gradient(self, delta=1e-7): - if self.dtype == 'float16': - delta = np.array(delta).astype(np.float16) - feed_list = [getattr(self.py_argsort, x) for x in self.grad_data_field] - grad_list = [np.zeros_like(x) for x in feed_list] - for feed, grad in zip(feed_list, grad_list): - for f, g in np.nditer([feed, grad], op_flags=['readwrite']): - o = float(f) - f[...] = o + delta - y_pos = self.forward()[2] - - f[...] = o - delta - y_neg = self.forward()[2] - - f[...] = o - dout_dfeed = (y_pos - y_neg) / (delta * 2) - g[...] = dout_dfeed - - return grad_list - - def assert_is_close( - self, - numeric_grads, - analytic_grads, - names, - max_relative_error, - msg_prefix, - ): - for a, b, name in zip(numeric_grads, analytic_grads, names): - abs_a = np.abs(a) - abs_a[abs_a < 1e-3] = 1 - - diff_mat = np.abs(a - b) / abs_a - max_diff = np.max(diff_mat) - - def err_msg(): - offset = np.argmax(diff_mat > max_relative_error) - return ( - "%s error, %s variable %s max gradient diff %f over limit %f, " - "the first error element is %d, expected %f, but got %f." - ) % ( - 'argsort', - msg_prefix, - name, - max_diff, - max_relative_error, - offset, - a.flatten()[offset], - b.flatten()[offset], - ) - - self.assertLessEqual(max_diff, max_relative_error, err_msg()) - - def init_axis(self): - self.axis = -1 - - def init_datatype(self): - self.dtype = "float64" - - def init_direction(self): - self.descending = False - - def init_inputshape(self): - self.input_shape = (2, 2, 2, 2, 3) - - def init_place(self): - self.place = core.CPUPlace() - - -class TestArgsortOpGPU(TestArgsortOpCPU): - def init_place(self): - if core.is_compiled_with_cuda(): - self.place = core.CUDAPlace(0) - else: - self.place = core.CPUPlace() - - -class TestArgsortOpAxis0CPU(TestArgsortOpCPU): - def init_axis(self): - self.axis = 0 - - -class TestArgsortOpAxis0GPU(TestArgsortOpGPU): - def init_axis(self): - self.axis = 0 - - -class TestArgsortOpAxis1CPU(TestArgsortOpCPU): - def init_axis(self): - self.axis = 1 - - -class TestArgsortOpAxis1GPU(TestArgsortOpGPU): - def init_axis(self): - self.axis = 1 - - -class TestArgsortOpAxis2CPU(TestArgsortOpCPU): - def init_axis(self): - self.axis = 2 - - -class TestArgsortOpAxis2GPU(TestArgsortOpGPU): - def init_axis(self): - self.axis = 2 - - -class TestArgsortOpAxisNeg1CPU(TestArgsortOpCPU): - def init_axis(self): - self.axis = -1 - - -class TestArgsortOpAxisNeg1GPU(TestArgsortOpGPU): - def init_axis(self): - self.axis = -1 - - -class TestArgsortOpAxisNeg2CPU(TestArgsortOpCPU): - def init_axis(self): - self.axis = -2 - - -class TestArgsortOpAxisNeg2GPU(TestArgsortOpGPU): - def init_axis(self): - self.axis = -2 - - -class TestArgsortOpDescendingAxisCPU(TestArgsortOpCPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxisGPU(TestArgsortOpGPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis0CPU(TestArgsortOpAxis0CPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis0GPU(TestArgsortOpAxis0GPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis1CPU(TestArgsortOpAxis1CPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis1GPU(TestArgsortOpAxis1GPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis2CPU(TestArgsortOpAxis2CPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxis2GPU(TestArgsortOpAxis2GPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxisNeg1CPU(TestArgsortOpAxisNeg1CPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxisNeg1GPU(TestArgsortOpAxisNeg1GPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxisNeg2CPU(TestArgsortOpAxisNeg2CPU): - def init_direction(self): - self.descending = True - - -class TestArgsortOpDescendingAxisNeg2GPU(TestArgsortOpAxisNeg2GPU): - def init_direction(self): - self.descending = True - - class TestArgsortErrorOnCPU(unittest.TestCase): def setUp(self): self.place = core.CPUPlace() diff --git a/test/deprecated/legacy_test/test_assign_op.py b/test/legacy_test/test_assign_op.py similarity index 100% rename from test/deprecated/legacy_test/test_assign_op.py rename to test/legacy_test/test_assign_op.py diff --git a/test/deprecated/legacy_test/test_atan2_op.py b/test/legacy_test/test_atan2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_atan2_op.py rename to test/legacy_test/test_atan2_op.py diff --git a/test/deprecated/legacy_test/test_attribute_var.py b/test/legacy_test/test_attribute_var.py similarity index 82% rename from test/deprecated/legacy_test/test_attribute_var.py rename to test/legacy_test/test_attribute_var.py index e06e8a3d80d50..cdae49ba0741a 100644 --- a/test/deprecated/legacy_test/test_attribute_var.py +++ b/test/legacy_test/test_attribute_var.py @@ -66,43 +66,6 @@ def infer_prog(self): return res -class TestDropout(UnittestBase): - def init_info(self): - self.shapes = [[10, 10]] - self.save_path = os.path.join(self.temp_dir.name, 'dropout') - - def test_static(self): - main_prog = Program() - startup_prog = Program() - with program_guard(main_prog, startup_prog): - fc = paddle.nn.Linear(10, 10) - x = paddle.randn(self.shapes[0]) - x.stop_gradient = False - feat = fc(x) - # p is a Variable - p = paddle.randn([1]) - out = paddle.nn.functional.dropout(feat, p=p) - sgd = paddle.optimizer.SGD() - sgd.minimize(paddle.mean(out)) - # test _to_string - self.assertTrue("Var[" in str(main_prog)) - - exe = paddle.static.Executor() - exe.run(startup_prog) - res = exe.run(fetch_list=[x, out]) - # export model - paddle.static.save_inference_model(self.save_path, [x], [out], exe) - - # Test for Inference Predictor - infer_out = self.infer_prog() - self.assertEqual(infer_out.shape, (10, 10)) - - self.assertEqual( - main_prog.block(0).ops[4].all_attrs()['dropout_prob'].name, - p.name, - ) - - class TestTileTensorList(UnittestBase): def init_info(self): self.shapes = [[2, 3, 4]] diff --git a/test/legacy_test/test_backward.py b/test/legacy_test/test_backward.py new file mode 100644 index 0000000000000..05fdb572c79de --- /dev/null +++ b/test/legacy_test/test_backward.py @@ -0,0 +1,106 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.base import backward + + +class BackwardNet: + """ + Abstract Base Class. + All Net inherited this Class should implement two functions: + build_model: build net to test the logic of backward + init_data: fake input data to test all programs. + """ + + def __init__(self): + self.stop_gradient_grad_vars = set() + self.no_grad_vars = set() + self.params_names = set() + self.op_path = [] + + def build_model(self): + """ + Build net to test the logic of backward. + :return: loss + """ + raise NotImplementedError + + def init_data(self): + """ + Fake input data to test all programs. + :return: dict, {'var_name': var_data} + """ + raise NotImplementedError + + +# TODO(Aurelius84): add conditional network test +class ConditionalNet(BackwardNet): + def __init__(self): + super().__init__() + + +class TestBackwardUninitializedVariable(unittest.TestCase): + """this case is found in yolov5 while to_static. + gradient aggregation may cause sum a invalid variable. + """ + + def test(self): + paddle.enable_static() + main_prg, startup_prg = paddle.static.Program(), paddle.static.Program() + with paddle.static.program_guard(main_prg, startup_prg): + gt = paddle.static.data(name='gt', shape=[4], dtype='float32') + x = paddle.static.data(name='x', shape=[2], dtype='float32') + gt.stop_gradient = True + x.stop_gradient = False + gt = gt.reshape([4, 1]).reshape([4]) + loss = ( + paddle.nn.functional.binary_cross_entropy(x, gt[:2]) + + (gt[2:4] * x).sum() + ) + exe = paddle.static.Executor() + paddle.base.backward.gradients(loss, []) + exe.run(startup_prg) + # Optimizer + out = exe.run( + main_prg, + feed={ + 'gt': np.array([1.0, 1.0, 0.0, 0.0], dtype='float32'), + 'x': np.array([0.5, 0.5], dtype='float32'), + }, + fetch_list=[loss], + ) + print(out) + + +class TestStripGradSuffix(unittest.TestCase): + def test_strip_grad_suffix(self): + cases = ( + ('x@GRAD', 'x'), + ('x@GRAD@GRAD', 'x'), + ('x@GRAD@RENAME@1', 'x'), + ('x@GRAD_slice_0@GRAD', 'x@GRAD_slice_0'), + ('grad/grad/x@GRAD@RENAME@block0@1@GRAD', 'x'), + ) + for input_, desired in cases: + self.assertEqual(backward._strip_grad_suffix_(input_), desired) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_batch_norm_op.py b/test/legacy_test/test_batch_norm_op.py similarity index 63% rename from test/deprecated/legacy_test/test_batch_norm_op.py rename to test/legacy_test/test_batch_norm_op.py index 63893a4353a3c..445c2082d13cd 100644 --- a/test/deprecated/legacy_test/test_batch_norm_op.py +++ b/test/legacy_test/test_batch_norm_op.py @@ -26,7 +26,7 @@ import paddle from paddle import base -from paddle.base import Program, core, program_guard +from paddle.base import core from paddle.base.framework import grad_var_name from paddle.pir_utils import test_with_pir_api @@ -545,402 +545,6 @@ def test_check_output(self): ) -class TestBatchNormOpTraining(unittest.TestCase): - def setUp(self): - self.use_mkldnn = False - self.fuse_with_relu = False - self.data_formats = ["NCHW", "NHWC"] - self.momentum = 0.9 - self.use_momentum_variable = False - self.epsilon = 0.00001 - self.init_kernel_type() - self.init_test_case() - - def init_test_case(self): - self.use_global_stats = False - self.no_grad_set = set() - self.fetch_list = [ - 'y', - 'mean', - 'variance', - 'saved_mean', - 'saved_variance', - 'x@GRAD', - 'scale@GRAD', - 'bias@GRAD', - ] - - def __assert_close(self, tensor, np_array, msg, atol=1e-4): - np.allclose(np.array(tensor), np_array, atol=atol) - - def ref_forward_backward( - self, - x, - y_grad, - scale, - bias, - mean, - variance, - epsilon, - momentum, - shape, - data_layout, - ): - # run forward - y, saved_mean, var_ref = _reference_training( - x, scale, bias, epsilon, data_layout - ) - mean_out = saved_mean * (1.0 - momentum) + momentum * mean - variance_out = var_ref * (1.0 - momentum) + momentum * variance - saved_variance = 1.0 / np.sqrt(var_ref + epsilon) - # run backward - x_grad, scale_grad, bias_grad = _reference_grad( - x, y_grad, scale, saved_mean, var_ref, epsilon, data_layout - ) - - return ( - y, - mean_out, - variance_out, - saved_mean, - saved_variance, - x_grad, - scale_grad, - bias_grad, - ) - - def set_mean_variance(self, scale_shape, x, data_layout): - mean, variance = _cal_mean_variance(x, self.epsilon, data_layout) - mean_pre = np.zeros(scale_shape).astype(np.float32) - variance_pre = np.ones(scale_shape).astype(np.float32) - # computing global mean/variance for one step - if self.use_global_stats: - mom = self.momentum - mean = mean * (1.0 - mom) + mom * mean_pre - variance = variance * (1.0 - mom) + mom * variance_pre - return mean, variance - - def test_forward_backward(self): - def test_with_place(place, data_layout, shape): - # attr - epsilon = self.epsilon - momentum = self.momentum - if data_layout == "NCHW": - n, c, h, w = shape[0], shape[1], shape[2], shape[3] - else: - n, h, w, c = shape[0], shape[1], shape[2], shape[3] - scale_shape = [c] - - np.random.seed(123) - x = np.random.random_sample(shape).astype(np.float32) - scale = np.random.random_sample(scale_shape).astype(np.float32) - bias = np.random.random_sample(scale_shape).astype(np.float32) - mean, variance = self.set_mean_variance(scale_shape, x, data_layout) - y_grad = np.random.random_sample(shape).astype(np.float32) - momentum_var = np.array([momentum]).astype(np.float32) - - ( - y, - mean_out, - variance_out, - saved_mean, - saved_variance, - x_grad, - scale_grad, - bias_grad, - ) = self.ref_forward_backward( - x, - y_grad, - scale, - bias, - mean, - variance, - epsilon, - momentum, - shape, - data_layout, - ) - - var_dict = locals() - var_dict['y@GRAD'] = y_grad - var_dict['x@GRAD'] = x_grad - var_dict['scale@GRAD'] = scale_grad - var_dict['bias@GRAD'] = bias_grad - - var_names = [ - 'x', - 'scale', - 'bias', - 'mean', - 'variance', - 'y', - 'saved_mean', - 'saved_variance', - 'momentum_var', - ] - ground_truth = {name: var_dict[name] for name in var_names} - - program = base.Program() - with base.program_guard(program): - block = program.global_block() - for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape, - ) - inputs = { - "X": block.var('x'), - "Scale": block.var('scale'), - "Bias": block.var('bias'), - "Mean": block.var('mean'), - "Variance": block.var('variance'), - } - attrs = { - "epsilon": epsilon, - "is_test": False, - "data_layout": data_layout, - "use_mkldnn": self.use_mkldnn, - "fuse_with_relu": self.fuse_with_relu, - "use_global_stats": self.use_global_stats, - } - if self.use_momentum_variable: - inputs['MomentumTensor'] = block.var('momentum_var') - else: - attrs['momentum'] = momentum - - outputs = { - "Y": block.var('y'), - "MeanOut": block.var('mean'), # share memory - "VarianceOut": block.var('variance'), # share memory - "SavedMean": block.var('saved_mean'), - "SavedVariance": block.var('saved_variance'), - } - block.create_var(name="reserve_space", dtype='float32') - outputs["ReserveSpace"] = block.var('reserve_space') - bn_op = block.append_op( - type="batch_norm", - inputs=inputs, - outputs=outputs, - attrs=attrs, - ) - block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) - - # generate backward op_desc - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( - bn_op.desc, self.no_grad_set, [] - ) - grad_op_desc = grad_op_desc_list[0] - new_op_desc = block.desc.append_op() - new_op_desc.copy_from(grad_op_desc) - for var_name in grad_op_desc.output_arg_names(): - block.desc.var(var_name.encode("ascii")) - grad_op_desc.infer_var_type(block.desc) - grad_op_desc.infer_shape(block.desc) - for arg in grad_op_desc.output_arg_names(): - grad_var = block.desc.find_var(arg.encode("ascii")) - grad_var.set_dtype(core.VarDesc.VarType.FP32) - - program._sync_with_cpp() - - exe = base.Executor(place) - out = exe.run( - program, - feed={ - name: var_dict[name] - for name in [ - 'x', - 'scale', - 'bias', - 'mean', - 'variance', - 'y@GRAD', - 'momentum_var', - ] - }, - fetch_list=self.fetch_list, - ) - - for id, name in enumerate(self.fetch_list): - if name == 'variance': - self.__assert_close( - var_dict[name], out[id], name, atol=1e-3 - ) - continue - self.__assert_close(var_dict[name], out[id], name) - print("op test forward passed: ", str(place), data_layout) - - places = [core.CPUPlace()] - - if core.is_compiled_with_cuda(): - places.append(core.CUDAPlace(0)) - - for place in places: - for data_format in self.data_formats: - test_with_place(place, data_format, [2, 3, 4, 5]) - - def init_kernel_type(self): - pass - - -class TestBatchNormOpTrainingCase1(TestBatchNormOpTraining): - def init_test_case(self): - self.use_global_stats = False - self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} - self.fetch_list = ['y', 'mean', 'variance', 'x@GRAD'] - - -class TestBatchNormOpTrainingCase2(TestBatchNormOpTraining): - def init_test_case(self): - self.use_global_stats = False - self.no_grad_set = set() - self.fetch_list = [ - 'y', - 'mean', - 'variance', - 'saved_mean', - 'saved_variance', - 'x@GRAD', - 'scale@GRAD', - 'bias@GRAD', - ] - os.environ['FLAGS_cudnn_batchnorm_spatial_persistent'] = "1" - - -class TestBatchNormOpTrainingCase3(TestBatchNormOpTraining): - def init_test_case(self): - self.use_global_stats = False - self.no_grad_set = {'x@GRAD'} - self.fetch_list = ['y', 'mean', 'variance', 'scale@GRAD', 'bias@GRAD'] - - -class TestBatchNormOpTrainingMomentumVariable(TestBatchNormOpTraining): - def init_test_case(self): - self.use_momentum_variable = True - self.use_global_stats = False - self.no_grad_set = set() - self.fetch_list = [ - 'y', - 'mean', - 'variance', - 'saved_mean', - 'saved_variance', - 'x@GRAD', - 'scale@GRAD', - 'bias@GRAD', - ] - - -class TestBatchNormOpFreezeStatsTraining(TestBatchNormOpTraining): - def init_test_case(self): - self.use_global_stats = True - self.no_grad_set = set() - self.fetch_list = [ - 'y', - 'mean', - 'variance', - 'x@GRAD', - 'scale@GRAD', - 'bias@GRAD', - ] - - def reference_grad(self, x, y_grad, scale, mean, var, epsilon, data_format): - if data_format == "NCHW": - x = np.transpose(x, (0, 2, 3, 1)) - y_grad = np.transpose(y_grad, (0, 2, 3, 1)) - - x_grad = scale * y_grad / np.sqrt(var + epsilon) - grad_scale = np.sum( - y_grad * (x - mean) / np.sqrt(var + epsilon), axis=(0, 1, 2) - ) - grad_offset = np.sum(y_grad, axis=(0, 1, 2)) - - # transfer back to N, C, H, W - if data_format == "NCHW": - x_grad = np.transpose(x_grad, (0, 3, 1, 2)) - x = np.transpose(x, (0, 3, 1, 2)) - y_grad = np.transpose(y_grad, (0, 3, 1, 2)) - - return x_grad, grad_scale, grad_offset - - def ref_forward_backward( - self, - x, - y_grad, - scale, - bias, - mean, - variance, - epsilon, - momentum, - shape, - data_layout, - ): - if data_layout != "NCHW" and data_layout != "NHWC": - raise ValueError("Unknown data order.") - - if data_layout == "NCHW": - x = np.transpose(x, (0, 2, 3, 1)) - - # run normalizaton - normalized = (x - mean) / np.sqrt(variance + epsilon) - y = normalized * scale + bias - - # transfer back to N, C, H, W - if data_layout == "NCHW": - x = np.transpose(x, (0, 3, 1, 2)) - y = np.transpose(y, (0, 3, 1, 2)) - - mean_out = mean - variance_out = variance - saved_variance = 1.0 / np.sqrt(variance + epsilon) - # run backward - x_grad, scale_grad, bias_grad = self.reference_grad( - x, y_grad, scale, mean, variance, epsilon, data_layout - ) - - return ( - y, - mean_out, - variance_out, - mean, - saved_variance, - x_grad, - scale_grad, - bias_grad, - ) - - -class TestBatchNormOpFreezeStatsAndScaleBiasTraining( - TestBatchNormOpFreezeStatsTraining -): - def init_test_case(self): - self.use_global_stats = True - self.no_grad_set = {'scale@GRAD', 'bias@GRAD'} - self.fetch_list = ['y', 'mean', 'variance', 'x@GRAD'] - - -class TestBatchNormOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - # the input of batch_norm must be Variable. - x1 = base.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], base.CPUPlace() - ) - self.assertRaises(TypeError, paddle.static.nn.batch_norm, x1) - - # the input dtype of batch_norm must be float16 or float32 or float64 - # float16 only can be set on GPU place - x2 = paddle.static.data( - name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32" - ) - self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2) - - # the first dimension of input for batch_norm must between [2d, 5d]. - x3 = paddle.static.data("", shape=[0], dtype="float32") - self.assertRaises(ValueError, paddle.static.nn.batch_norm, x3) - - class TestDygraphBatchNormAPIError(unittest.TestCase): @test_with_pir_api def test_errors(self): diff --git a/test/deprecated/legacy_test/test_bce_loss.py b/test/legacy_test/test_bce_loss.py similarity index 100% rename from test/deprecated/legacy_test/test_bce_loss.py rename to test/legacy_test/test_bce_loss.py diff --git a/test/deprecated/legacy_test/test_bicubic_interp_op.py b/test/legacy_test/test_bicubic_interp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_bicubic_interp_op.py rename to test/legacy_test/test_bicubic_interp_op.py diff --git a/test/deprecated/legacy_test/test_bilinear_interp_op.py b/test/legacy_test/test_bilinear_interp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_bilinear_interp_op.py rename to test/legacy_test/test_bilinear_interp_op.py diff --git a/test/legacy_test/test_bilinear_tensor_product_op.py b/test/legacy_test/test_bilinear_tensor_product_op.py new file mode 100644 index 0000000000000..8a74e5c2bdfbf --- /dev/null +++ b/test/legacy_test/test_bilinear_tensor_product_op.py @@ -0,0 +1,57 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest + +import paddle +from paddle import base + + +class TestBilinearTensorProductOp(OpTest): + def setUp(self): + self.op_type = "bilinear_tensor_product" + self.python_api = paddle.nn.functional.bilinear + batch_size = 6 + size0 = 5 + size1 = 4 + size2 = 5 + dtype = "float32" if base.core.is_compiled_with_rocm() else "float64" + a = np.random.random((batch_size, size0)).astype(dtype) + b = np.random.random((batch_size, size1)).astype(dtype) + w = np.random.random((size2, size0, size1)).astype(dtype) + bias = np.random.random((1, size2)).astype(dtype) + output = np.zeros((batch_size, size2)).astype(dtype) + for i in range(size2): + w_i = w[i, :, :] + output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) + self.inputs = { + 'X': a, + 'Y': b, + 'Weight': w, + 'Bias': bias, + } + self.outputs = {'Out': output + bias} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/test/legacy_test/test_block_diag.py b/test/legacy_test/test_block_diag.py new file mode 100644 index 0000000000000..842f360f33c4b --- /dev/null +++ b/test/legacy_test/test_block_diag.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import scipy + +import paddle +from paddle import base + + +class TestBlockDiagError(unittest.TestCase): + def test_errors(self): + def test_type_error(): + A = np.array([[1, 2], [3, 4]]) + B = np.array([[5, 6], [7, 8]]) + C = np.array([[9, 10], [11, 12]]) + with paddle.static.program_guard(base.Program()): + out = paddle.block_diag([A, B, C]) + + self.assertRaises(TypeError, test_type_error) + + def test_dime_error(): + A = paddle.to_tensor([[[1, 2], [3, 4]]]) + B = paddle.to_tensor([[[5, 6], [7, 8]]]) + C = paddle.to_tensor([[[9, 10], [11, 12]]]) + with paddle.static.program_guard(base.Program()): + out = paddle.block_diag([A, B, C]) + + self.assertRaises(ValueError, test_dime_error) + + +class TestBlockDiag(unittest.TestCase): + def setUp(self): + paddle.seed(2024) + self.type_list = ['int32', 'int64', 'float32', 'float64'] + self.place = [('cpu', paddle.CPUPlace())] + ( + [('gpu', paddle.CUDAPlace(0))] + if paddle.is_compiled_with_cuda() + else [] + ) + + def test_dygraph(self): + paddle.disable_static() + for device, place in self.place: + paddle.set_device(device) + for i in self.type_list: + A = np.random.randn(2, 3).astype(i) + B = np.random.randn(2).astype(i) + C = np.random.randn(4, 1).astype(i) + s_out = scipy.linalg.block_diag(A, B, C) + + A_tensor = paddle.to_tensor(A) + B_tensor = paddle.to_tensor(B) + C_tensor = paddle.to_tensor(C) + out = paddle.block_diag([A_tensor, B_tensor, C_tensor]) + np.testing.assert_allclose(out.numpy(), s_out) + + def test_static(self): + paddle.enable_static() + for device, place in self.place: + paddle.set_device(device) + for i in self.type_list: + A = np.random.randn(2, 3).astype(i) + B = np.random.randn(2).astype(i) + C = np.random.randn(4, 1).astype(i) + s_out = scipy.linalg.block_diag(A, B, C) + + with paddle.static.program_guard(paddle.static.Program()): + A_tensor = paddle.static.data('A', [2, 3], i) + B_tensor = paddle.static.data('B', [2], i) + C_tensor = paddle.static.data('C', [4, 1], i) + out = paddle.block_diag([A_tensor, B_tensor, C_tensor]) + exe = paddle.static.Executor(place) + res = exe.run( + feed={'A': A, 'B': B, 'C': C}, + fetch_list=[out], + ) + np.testing.assert_allclose(res[0], s_out) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_bmm_op.py b/test/legacy_test/test_bmm_op.py similarity index 100% rename from test/deprecated/legacy_test/test_bmm_op.py rename to test/legacy_test/test_bmm_op.py diff --git a/test/deprecated/legacy_test/test_cast_op.py b/test/legacy_test/test_cast_op.py similarity index 100% rename from test/deprecated/legacy_test/test_cast_op.py rename to test/legacy_test/test_cast_op.py diff --git a/test/deprecated/legacy_test/test_channel_shuffle.py b/test/legacy_test/test_channel_shuffle.py similarity index 100% rename from test/deprecated/legacy_test/test_channel_shuffle.py rename to test/legacy_test/test_channel_shuffle.py diff --git a/test/legacy_test/test_cholesky_op.py b/test/legacy_test/test_cholesky_op.py index d98596fc29c89..25fc0f9365299 100644 --- a/test/legacy_test/test_cholesky_op.py +++ b/test/legacy_test/test_cholesky_op.py @@ -121,14 +121,14 @@ def func(self, place): for i in range(len(out)): yi = out[i] dy = paddle.static.data( - name='dys_%s' % i, + name=f'dys_{i}', shape=yi.shape, dtype=root_data.dtype, ) dy.stop_gradient = False dy.persistable = True value = np.zeros(yi.shape, dtype=root_data.dtype) - feeds.update({'dys_%s' % i: value}) + feeds.update({f'dys_{i}': value}) dys.append(dy) fetch_list = base.gradients(out, root, dys) grad_check( diff --git a/test/legacy_test/test_collective_api_base.py b/test/legacy_test/test_collective_api_base.py index fa31fe1e16b54..dfc5c36a7eb5a 100644 --- a/test/legacy_test/test_collective_api_base.py +++ b/test/legacy_test/test_collective_api_base.py @@ -201,7 +201,7 @@ def setUp(self): self._trainers = 2 self._ps_endpoints = f"127.0.0.1:{self._find_free_port()},127.0.0.1:{self._find_free_port()}" self._python_interp = sys.executable - self._master_endpoints = "127.0.0.1:%s" % (self._find_free_port()) + self._master_endpoints = f"127.0.0.1:{self._find_free_port()}" self.temp_dir = tempfile.TemporaryDirectory() @@ -305,15 +305,15 @@ def _run_cluster(self, model_file, envs): tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() - sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err) - sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err) + sys.stderr.write(f'trainer 0 stderr: {tr0_err}\n') + sys.stderr.write(f'trainer 1 stderr: {tr1_err}\n') # close trainer file tr0_pipe.close() tr1_pipe.close() with open(path0, "r") as f: - sys.stderr.write('trainer 0 stderr file: %s\n' % f.read()) + sys.stderr.write(f'trainer 0 stderr file: {f.read()}\n') with open(path1, "r") as f: - sys.stderr.write('trainer 1 stderr file: %s\n' % f.read()) + sys.stderr.write(f'trainer 1 stderr file: {f.read()}\n') def load_and_remove(path): with open(path, 'rb') as f: diff --git a/test/legacy_test/test_collective_base.py b/test/legacy_test/test_collective_base.py index b11b992bcd5f8..07573f6ce7e00 100644 --- a/test/legacy_test/test_collective_base.py +++ b/test/legacy_test/test_collective_base.py @@ -232,8 +232,8 @@ def _run_cluster(self, model_file, envs): tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() - sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err) - sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err) + sys.stderr.write(f'trainer 0 stderr: {tr0_err}\n') + sys.stderr.write(f'trainer 1 stderr: {tr1_err}\n') # close trainer file tr0_pipe.close() tr1_pipe.close() diff --git a/test/deprecated/legacy_test/test_complex_abs.py b/test/legacy_test/test_complex_abs.py similarity index 100% rename from test/deprecated/legacy_test/test_complex_abs.py rename to test/legacy_test/test_complex_abs.py diff --git a/test/deprecated/legacy_test/test_complex_op.py b/test/legacy_test/test_complex_op.py similarity index 100% rename from test/deprecated/legacy_test/test_complex_op.py rename to test/legacy_test/test_complex_op.py diff --git a/test/deprecated/legacy_test/test_complex_variable.py b/test/legacy_test/test_complex_variable.py similarity index 100% rename from test/deprecated/legacy_test/test_complex_variable.py rename to test/legacy_test/test_complex_variable.py diff --git a/test/deprecated/legacy_test/test_complex_view_op.py b/test/legacy_test/test_complex_view_op.py similarity index 100% rename from test/deprecated/legacy_test/test_complex_view_op.py rename to test/legacy_test/test_complex_view_op.py diff --git a/test/deprecated/legacy_test/test_conj_op.py b/test/legacy_test/test_conj_op.py similarity index 100% rename from test/deprecated/legacy_test/test_conj_op.py rename to test/legacy_test/test_conj_op.py diff --git a/test/legacy_test/test_conv2d_op.py b/test/legacy_test/test_conv2d_op.py index a3bfa75d1225f..b0b0d0abe2d96 100644 --- a/test/legacy_test/test_conv2d_op.py +++ b/test/legacy_test/test_conv2d_op.py @@ -34,14 +34,14 @@ def conv2d_forward_naive( ): if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if data_format not in ["NCHW", "NHWC"]: raise ValueError( - "Unknown Attr(data_format): '%s' ." - "It can only be 'NCHW' or 'NHWC'." % str(data_format) + f"Unknown Attr(data_format): '{str(data_format)}' ." + "It can only be 'NCHW' or 'NHWC'." ) channel_last = data_format == "NHWC" diff --git a/test/deprecated/legacy_test/test_conv2d_op_depthwise_conv.py b/test/legacy_test/test_conv2d_op_depthwise_conv.py similarity index 100% rename from test/deprecated/legacy_test/test_conv2d_op_depthwise_conv.py rename to test/legacy_test/test_conv2d_op_depthwise_conv.py diff --git a/test/legacy_test/test_conv2d_transpose_op.py b/test/legacy_test/test_conv2d_transpose_op.py index 36796adfdaec2..dd14afecf09ec 100644 --- a/test/legacy_test/test_conv2d_transpose_op.py +++ b/test/legacy_test/test_conv2d_transpose_op.py @@ -37,8 +37,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): padding_algorithm = attrs['padding_algorithm'] if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if attrs['data_format'] == 'NHWC': diff --git a/test/deprecated/legacy_test/test_conv2d_transpose_op_depthwise_conv.py b/test/legacy_test/test_conv2d_transpose_op_depthwise_conv.py similarity index 100% rename from test/deprecated/legacy_test/test_conv2d_transpose_op_depthwise_conv.py rename to test/legacy_test/test_conv2d_transpose_op_depthwise_conv.py diff --git a/test/legacy_test/test_conv3d_op.py b/test/legacy_test/test_conv3d_op.py index 143deb493c756..a41580c7b0445 100644 --- a/test/legacy_test/test_conv3d_op.py +++ b/test/legacy_test/test_conv3d_op.py @@ -37,14 +37,14 @@ def conv3d_forward_naive( ): if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if data_format not in ["NCDHW", "NDHWC"]: raise ValueError( - "Unknown Attr(data_format): '%s' ." - "It can only be 'NCDHW' or 'NDHWC'." % str(data_format) + f"Unknown Attr(data_format): '{str(data_format)}' ." + "It can only be 'NCDHW' or 'NDHWC'." ) channel_last = data_format == "NDHWC" diff --git a/test/legacy_test/test_conv3d_transpose_op.py b/test/legacy_test/test_conv3d_transpose_op.py index 78d88d53ff500..9e6f3445eaf99 100644 --- a/test/legacy_test/test_conv3d_transpose_op.py +++ b/test/legacy_test/test_conv3d_transpose_op.py @@ -42,8 +42,8 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs): padding_algorithm = attrs['padding_algorithm'] if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if attrs['data_format'] == 'NHWC': diff --git a/test/legacy_test/test_conv3d_transpose_part2_op.py b/test/legacy_test/test_conv3d_transpose_part2_op.py new file mode 100644 index 0000000000000..da75a5720a80d --- /dev/null +++ b/test/legacy_test/test_conv3d_transpose_part2_op.py @@ -0,0 +1,104 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +sys.path.append("../../legacy_test") +from test_conv3d_transpose_op import ( + TestConv3DTransposeOp, + create_test_cudnn_bf16_class, + create_test_cudnn_fp16_class, +) + + +class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = 'NHWC' + + +class TestWithAsymmetricPad_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 0, 1, 0, 1, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = 'NHWC' + + +class TestWithGroups_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_filter = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 2 + self.input_size = [2, 5, 5, 5, 4] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 3, 3, 3, 3] + self.data_format = 'NHWC' + + +class TestWithStride_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NCDHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = 'NHWC' + + +class TestWithDilation_NHWC(TestConv3DTransposeOp): + def init_test_case(self): + self.check_no_input = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [2, 2, 2] + self.groups = 1 + self.input_size = [2, 5, 5, 5, 3] # NCDHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = 'NHWC' + + +# ----------------Conv3DTransposeCUDNN fp16---------------- +create_test_cudnn_fp16_class(TestWithSymmetricPad_NHWC) +create_test_cudnn_fp16_class(TestWithAsymmetricPad_NHWC) +create_test_cudnn_fp16_class(TestWithGroups_NHWC) +create_test_cudnn_fp16_class(TestWithStride_NHWC) +create_test_cudnn_fp16_class(TestWithDilation_NHWC) + + +# ----------------Conv3DTransposeCUDNN bf16---------------- +create_test_cudnn_bf16_class(TestWithSymmetricPad_NHWC) +create_test_cudnn_bf16_class(TestWithAsymmetricPad_NHWC) +create_test_cudnn_bf16_class(TestWithGroups_NHWC) +create_test_cudnn_bf16_class(TestWithStride_NHWC) +create_test_cudnn_bf16_class(TestWithDilation_NHWC) + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_conv_nn_grad.py b/test/legacy_test/test_conv_nn_grad.py similarity index 58% rename from test/deprecated/legacy_test/test_conv_nn_grad.py rename to test/legacy_test/test_conv_nn_grad.py index 40152a181f1c6..58461aefcd9ab 100644 --- a/test/deprecated/legacy_test/test_conv_nn_grad.py +++ b/test/legacy_test/test_conv_nn_grad.py @@ -26,23 +26,6 @@ class TestConvDoubleGradCheck(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 4, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d(x, 2, 1, groups=1, bias_attr=False) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -66,28 +49,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConvDoubleGradCheckTest0(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 4, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d(x, 2, 1, bias_attr=False) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -111,28 +76,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConvDoubleGradCheckTest1(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 3, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d(x, 2, 1, padding=1, bias_attr=False) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -156,28 +103,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DDoubleGradCheck(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 4, 3, 4, 2] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d(x, 2, 1, bias_attr=False) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -201,28 +130,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DDoubleGradCheckTest1(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 4, 5, 3, 2] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d(x, 2, 1, padding=1, bias_attr=False) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -246,35 +157,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 0, 1], - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -298,35 +184,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding="SAME", - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -350,35 +211,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding="VALID", - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -402,37 +238,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 1], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NHWC", - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -457,37 +266,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv2d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 1, 0], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NHWC", - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -512,35 +294,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 2, 2, 2] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 0, 1, 1, 2], - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -564,36 +321,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 2, 2, 2] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding="SAME", - groups=1, - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -617,35 +348,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 3, 3, 2] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding="VALID", - bias_attr=False, - use_cudnn=True, - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -669,37 +375,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 2, 2, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 1, 1], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NDHWC", - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -724,37 +403,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 2, 2, 2, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - y = paddle.static.nn.conv3d( - input=x, - num_filters=2, - filter_size=1, - padding=[1, 0, 1, 0, 1, 0], - bias_attr=False, - use_cudnn=True, - groups=1, - data_format="NDHWC", - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -779,35 +431,10 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) class TestDepthWiseConvDoubleGradCheck(unittest.TestCase): - @prog_scope() - def func(self, place): - shape = [2, 4, 3, 3] - eps = 0.005 - dtype = np.float32 if base.core.is_compiled_with_rocm() else np.float64 - x = paddle.static.data('x', shape, dtype) - - # condition of depthwise conv: - # use_cudnn == False - # groups == filters - # num_filters % num_channels == 0 - y = paddle.static.nn.conv2d( - x, shape[1], 1, groups=shape[1], bias_attr=False, use_cudnn=False - ) - x_arr = np.random.uniform(-1, 1, shape).astype(dtype) - - w = base.default_main_program().global_block().all_parameters() - w_arr = [] - for p in w: - w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype)) - gradient_checker.double_grad_check( - [x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps - ) - @test_with_pir_api @prog_scope() def func_pir(self, place): @@ -832,7 +459,6 @@ def test_grad(self): if core.is_compiled_with_cuda(): places.append(base.CUDAPlace(0)) for p in places: - self.func(p) self.func_pir(p) diff --git a/test/deprecated/legacy_test/test_copysign_op.py b/test/legacy_test/test_copysign_op.py similarity index 100% rename from test/deprecated/legacy_test/test_copysign_op.py rename to test/legacy_test/test_copysign_op.py diff --git a/test/legacy_test/test_cpuonly_launch.sh b/test/legacy_test/test_cpuonly_launch.sh index 1c35166cf4434..8048e2697167e 100644 --- a/test/legacy_test/test_cpuonly_launch.sh +++ b/test/legacy_test/test_cpuonly_launch.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,7 +22,7 @@ function test_launch_cpuonly(){ else if grep -q "CPUONLY" ut.elog; then echo "test_launch_cpuonly successfully" - else + else echo "test_launch_cpuonly failed" exit -1 fi diff --git a/test/deprecated/legacy_test/test_crop_tensor_op.py b/test/legacy_test/test_crop_tensor_op.py similarity index 100% rename from test/deprecated/legacy_test/test_crop_tensor_op.py rename to test/legacy_test/test_crop_tensor_op.py diff --git a/test/deprecated/legacy_test/test_cross_entropy2_op.py b/test/legacy_test/test_cross_entropy2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_cross_entropy2_op.py rename to test/legacy_test/test_cross_entropy2_op.py diff --git a/test/deprecated/legacy_test/test_cross_entropy_op.py b/test/legacy_test/test_cross_entropy_op.py similarity index 100% rename from test/deprecated/legacy_test/test_cross_entropy_op.py rename to test/legacy_test/test_cross_entropy_op.py diff --git a/test/deprecated/legacy_test/test_cummax_op.py b/test/legacy_test/test_cummax_op.py similarity index 100% rename from test/deprecated/legacy_test/test_cummax_op.py rename to test/legacy_test/test_cummax_op.py diff --git a/test/deprecated/legacy_test/test_cumprod_op.py b/test/legacy_test/test_cumprod_op.py similarity index 100% rename from test/deprecated/legacy_test/test_cumprod_op.py rename to test/legacy_test/test_cumprod_op.py diff --git a/test/deprecated/legacy_test/test_data_norm_op.py b/test/legacy_test/test_data_norm_op.py similarity index 91% rename from test/deprecated/legacy_test/test_data_norm_op.py rename to test/legacy_test/test_data_norm_op.py index 954c3da834fd7..8c6c4d599f180 100644 --- a/test/deprecated/legacy_test/test_data_norm_op.py +++ b/test/legacy_test/test_data_norm_op.py @@ -19,9 +19,7 @@ from op import Operator from op_test import OpTest -import paddle -from paddle import base -from paddle.base import Program, core, program_guard +from paddle.base import core def _reference_testing(x, batch_size, batch_sum, batch_square_sum, slot_dim=-1): @@ -524,37 +522,5 @@ def test_check_grad(self): self.check_grad(['X'], 'Y', no_grad_set=set(), check_dygraph=False) -class TestDataNormOpErrorr(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - x2 = paddle.static.data(name='x2', shape=[-1, 3, 4], dtype="int32") - # self.assertRaises(TypeError, base.data_norm, x2) - paddle.static.nn.data_norm( - input=x2, param_attr={}, enable_scale_and_shift=True - ) - - # Test input with dimension 1 - paddle.enable_static() - x3 = paddle.static.data("", shape=[0], dtype="float32") - self.assertRaises(ValueError, paddle.static.nn.data_norm, x3) - - # The size of input in data_norm should not be 0. - def test_0_size(): - paddle.enable_static() - x = paddle.static.data(name='x', shape=[0, 3], dtype='float32') - out = paddle.static.nn.data_norm(x, slot_dim=1) - cpu = base.core.CPUPlace() - exe = base.Executor(cpu) - exe.run(base.default_startup_program()) - test_program = base.default_main_program().clone(for_test=True) - exe.run( - test_program, - fetch_list=out, - feed={'x': np.ones([0, 3]).astype('float32')}, - ) - - self.assertRaises(ValueError, test_0_size) - - if __name__ == '__main__': unittest.main() diff --git a/test/legacy_test/test_deform_conv2d.py b/test/legacy_test/test_deform_conv2d.py new file mode 100644 index 0000000000000..95e180c373842 --- /dev/null +++ b/test/legacy_test/test_deform_conv2d.py @@ -0,0 +1,346 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import TestCase + +import numpy as np + +import paddle +import paddle.nn.initializer as I +from paddle.pir_utils import test_with_pir_api + + +class TestDeformConv2D(TestCase): + batch_size = 4 + spatial_shape = (5, 5) + dtype = "float32" + + def setUp(self): + self.in_channels = 2 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [0, 0] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = True + + def prepare(self): + np.random.seed(1) + paddle.seed(1) + if isinstance(self.kernel_size, int): + filter_shape = (self.kernel_size,) * 2 + else: + filter_shape = tuple(self.kernel_size) + self.filter_shape = filter_shape + + self.weight = np.random.uniform( + -1, + 1, + (self.out_channels, self.in_channels // self.groups) + filter_shape, + ).astype(self.dtype) + if not self.no_bias: + self.bias = np.random.uniform(-1, 1, (self.out_channels,)).astype( + self.dtype + ) + + def out_size( + in_size, pad_size, dilation_size, kernel_size, stride_size + ): + return ( + in_size + 2 * pad_size - (dilation_size * (kernel_size - 1) + 1) + ) / stride_size + 1 + + out_h = int( + out_size( + self.spatial_shape[0], + self.padding[0], + self.dilation[0], + self.kernel_size[0], + self.stride[0], + ) + ) + out_w = int( + out_size( + self.spatial_shape[1], + self.padding[1], + self.dilation[1], + self.kernel_size[1], + self.stride[1], + ) + ) + out_shape = (out_h, out_w) + + self.input_shape = ( + self.batch_size, + self.in_channels, + ) + self.spatial_shape + + self.offset_shape = ( + self.batch_size, + self.deformable_groups * 2 * filter_shape[0] * filter_shape[1], + ) + out_shape + + self.mask_shape = ( + self.batch_size, + self.deformable_groups * filter_shape[0] * filter_shape[1], + ) + out_shape + + self.input = np.random.uniform(-1, 1, self.input_shape).astype( + self.dtype + ) + + self.offset = np.random.uniform(-1, 1, self.offset_shape).astype( + self.dtype + ) + + self.mask = np.random.uniform(-1, 1, self.mask_shape).astype(self.dtype) + + def static_graph_case_dcn(self): + main = paddle.static.Program() + start = paddle.static.Program() + paddle.enable_static() + with paddle.static.program_guard(main, start): + x = paddle.static.data( + "input", (-1, self.in_channels, -1, -1), dtype=self.dtype + ) + offset = paddle.static.data( + "offset", + ( + -1, + self.deformable_groups + * 2 + * self.filter_shape[0] + * self.filter_shape[1], + -1, + -1, + ), + dtype=self.dtype, + ) + mask = paddle.static.data( + "mask", + ( + -1, + self.deformable_groups + * self.filter_shape[0] + * self.filter_shape[1], + -1, + -1, + ), + dtype=self.dtype, + ) + + y_v1 = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.filter_shape, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + deformable_groups=self.deformable_groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + )(x, offset, None) + + y_v2 = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.filter_shape, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + deformable_groups=self.deformable_groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + )(x, offset, mask) + + exe = paddle.static.Executor(self.place) + exe.run(start) + out_v1, out_v2 = exe.run( + main, + feed={ + "input": self.input, + "offset": self.offset, + "mask": self.mask, + }, + fetch_list=[y_v1, y_v2], + ) + return out_v1, out_v2 + + def dygraph_case_dcn(self): + paddle.disable_static() + x = paddle.to_tensor(self.input) + offset = paddle.to_tensor(self.offset) + mask = paddle.to_tensor(self.mask) + + bias = None if self.no_bias else paddle.to_tensor(self.bias) + + deform_conv2d = paddle.vision.ops.DeformConv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + deformable_groups=self.deformable_groups, + groups=self.groups, + weight_attr=I.Assign(self.weight), + bias_attr=False if self.no_bias else I.Assign(self.bias), + ) + + y_v1 = deform_conv2d(x, offset) + y_v2 = deform_conv2d(x, offset, mask) + + out_v1 = y_v1.numpy() + out_v2 = y_v2.numpy() + + return out_v1, out_v2 + + @test_with_pir_api + def _test_identity(self): + self.prepare() + static_dcn_v1, static_dcn_v2 = self.static_graph_case_dcn() + dy_dcn_v1, dy_dcn_v2 = self.dygraph_case_dcn() + np.testing.assert_array_almost_equal(static_dcn_v1, dy_dcn_v1) + np.testing.assert_array_almost_equal(static_dcn_v2, dy_dcn_v2) + + def test_identity(self): + self.place = paddle.CPUPlace() + self._test_identity() + + if paddle.is_compiled_with_cuda(): + self.place = paddle.CUDAPlace(0) + self._test_identity() + + +# testcases for DeformConv2D +class TestDeformConv2DWithPadding(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [2, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = True + + +class TestDeformConv2DWithBias(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [2, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithAsynPadding(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 2] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithDilation(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [3, 3] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithStride(TestDeformConv2D): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [2, 2] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithDeformable_Groups(TestDeformConv2D): + def setUp(self): + self.in_channels = 5 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 5 + self.groups = 1 + self.no_bias = False + + +class TestDeformConv2DWithGroups(TestDeformConv2D): + def setUp(self): + self.in_channels = 5 + self.out_channels = 5 + self.kernel_size = [3, 3] + self.padding = [1, 1] + self.stride = [1, 1] + self.dilation = [1, 1] + self.deformable_groups = 1 + self.groups = 5 + self.no_bias = False + + +class TestDeformConv2DError(unittest.TestCase): + @test_with_pir_api + def test_input_error(self): + def test_input_rank_error(): + paddle.enable_static() + x = paddle.static.data(name='error_x_1', shape=[0], dtype='float32') + offset = paddle.static.data( + name='error_offset_1', shape=[0], dtype='float32' + ) + mask = paddle.static.data( + name='error_mask_1', shape=[0, 0, 0], dtype='float32' + ) + out = paddle.vision.ops.DeformConv2D( + in_channels=0, + out_channels=0, + kernel_size=0, + deformable_groups=0, + )(x, offset, mask) + + self.assertRaises(AssertionError, test_input_rank_error) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_deformable_conv_op.py b/test/legacy_test/test_deformable_conv_op.py similarity index 85% rename from test/deprecated/legacy_test/test_deformable_conv_op.py rename to test/legacy_test/test_deformable_conv_op.py index 63d939cc85626..23b49f4f93606 100644 --- a/test/deprecated/legacy_test/test_deformable_conv_op.py +++ b/test/legacy_test/test_deformable_conv_op.py @@ -372,73 +372,6 @@ def init_test_case(self): class TestModulatedDeformableConvInvalidInput(unittest.TestCase): - def test_error(self): - def test_invalid_input(): - paddle.enable_static() - input = [1, 3, 32, 32] - offset = paddle.static.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32' - ) - mask = paddle.static.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32' - ) - loss = paddle.static.nn.common.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1 - ) - - self.assertRaises(TypeError, test_invalid_input) - - def test_invalid_offset(): - paddle.enable_static() - input = paddle.static.data( - name='input', shape=[None, 3, 32, 32], dtype='int32' - ) - offset = paddle.static.data( - name='offset', shape=[None, 3, 32, 32], dtype='float32' - ) - mask = paddle.static.data( - name='mask', shape=[None, 3, 32, 32], dtype='float32' - ) - loss = paddle.static.nn.common.deformable_conv( - input, offset, mask, num_filters=4, filter_size=1 - ) - - self.assertRaises(TypeError, test_invalid_offset) - - def test_invalid_filter(): - paddle.enable_static() - input = paddle.static.data( - name='input_filter', shape=[None, 3, 32, 32], dtype='float32' - ) - offset = paddle.static.data( - name='offset_filter', shape=[None, 3, 32, 32], dtype='float32' - ) - mask = paddle.static.data( - name='mask_filter', shape=[None, 3, 32, 32], dtype='float32' - ) - loss = paddle.static.nn.common.deformable_conv( - input, offset, mask, num_filters=4, filter_size=0 - ) - - self.assertRaises(ValueError, test_invalid_filter) - - def test_invalid_groups(): - paddle.enable_static() - input = paddle.static.data( - name='input_groups', shape=[1, 1, 1, 1], dtype='float32' - ) - offset = paddle.static.data( - name='offset_groups', shape=[1, 1], dtype='float32' - ) - mask = paddle.static.data( - name='mask_groups', shape=[1], dtype='float32' - ) - paddle.static.nn.deform_conv2d( - input, offset, mask, 1, 1, padding=1, groups=0 - ) - - self.assertRaises(ValueError, test_invalid_groups) - @test_with_pir_api def test_error_api(self): def test_invalid_input(): diff --git a/test/deprecated/legacy_test/test_deformable_conv_v1_op.py b/test/legacy_test/test_deformable_conv_v1_op.py similarity index 100% rename from test/deprecated/legacy_test/test_deformable_conv_v1_op.py rename to test/legacy_test/test_deformable_conv_v1_op.py diff --git a/test/deprecated/legacy_test/test_determinant_op.py b/test/legacy_test/test_determinant_op.py similarity index 100% rename from test/deprecated/legacy_test/test_determinant_op.py rename to test/legacy_test/test_determinant_op.py diff --git a/test/legacy_test/test_device_guard.py b/test/legacy_test/test_device_guard.py new file mode 100644 index 0000000000000..9d53982992ab7 --- /dev/null +++ b/test/legacy_test/test_device_guard.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + +paddle.enable_static() + + +def execute(main_program, startup_program): + if paddle.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + else: + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(startup_program) + exe.run(main_program) + + +def get_valid_warning_num(warning, w): + num = 0 + for i in range(len(w)): + if warning in str(w[i].message): + num += 1 + return num + + +class TestDeviceGuard(unittest.TestCase): + def test_cpu_only_op(self): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard(main_program, startup_program): + x = paddle.full( + shape=[2, 255, 13, 13], fill_value=0.3, dtype='float32' + ) + gt_box = paddle.full( + shape=[2, 6, 4], fill_value=0.5, dtype='float32' + ) + gt_label = paddle.full(shape=[2, 6], fill_value=1.0, dtype='int32') + gt_score = paddle.full( + shape=[2, 6], fill_value=0.5, dtype='float32' + ) + anchors = [ + 10, + 13, + 16, + 30, + 33, + 23, + 30, + 61, + 62, + 45, + 59, + 119, + 116, + 90, + 156, + 198, + 373, + 326, + ] + anchor_mask = [0, 1, 2] + with paddle.static.device_guard("gpu"): + # yolo_loss only has cpu kernel, so its cpu kernel will be executed + loss = paddle.vision.ops.yolo_loss( + x=x, + gt_box=gt_box, + gt_label=gt_label, + gt_score=gt_score, + anchors=anchors, + anchor_mask=anchor_mask, + class_num=80, + ignore_thresh=0.7, + downsample_ratio=32, + ) + + execute(main_program, startup_program) + + def test_error(self): + def device_attr(): + with paddle.static.device_guard("cpu1"): + out = paddle.full(shape=[1], fill_value=0.2, dtype='float32') + + def device_attr2(): + with paddle.static.device_guard("cpu:1"): + out = paddle.full(shape=[1], fill_value=0.2, dtype='float32') + + self.assertRaises(ValueError, device_attr) + self.assertRaises(ValueError, device_attr2) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_diagonal_op.py b/test/legacy_test/test_diagonal_op.py similarity index 100% rename from test/deprecated/legacy_test/test_diagonal_op.py rename to test/legacy_test/test_diagonal_op.py diff --git a/test/legacy_test/test_dist_base.py b/test/legacy_test/test_dist_base.py index 0abf18fe42c87..143f7e1ee8e62 100755 --- a/test/legacy_test/test_dist_base.py +++ b/test/legacy_test/test_dist_base.py @@ -1040,7 +1040,7 @@ def __free_port(): ) as s: s.bind(('', 0)) print_to_err( - type(self).__name__, "socket name: %s" % s.getsockname()[1] + type(self).__name__, f"socket name: {s.getsockname()[1]}" ) return s.getsockname()[1] @@ -1479,10 +1479,9 @@ def _get_nccl2_trainer_cmd( def _run_cluster_gloo( self, model, envs, update_method, check_error_log, log_name ): - assert update_method == "gloo", ( - "_run_cluster_gloo must have update_method: gloo, but get %s" - % update_method - ) + assert ( + update_method == "gloo" + ), f"_run_cluster_gloo must have update_method: gloo, but get {update_method}" assert ( not self._use_hallreduce ), "_run_cluster_gloo must have _use_hallreduce = false" @@ -1551,9 +1550,7 @@ def _run_cluster_nccl2( if DIST_UT_PORT == 0: # NOTE(wangxi). hallreduce test must use 4cards after nccl>=2.7 for i in range(0, 4): - self._ps_endpoints += "127.0.0.1:%s," % ( - self._find_free_port() - ) + self._ps_endpoints += f"127.0.0.1:{self._find_free_port()}," else: for i in range(0, 4): self._ps_endpoints += "127.0.0.1:%s," % (DIST_UT_PORT + i) diff --git a/test/legacy_test/test_dist_hapi_model.py b/test/legacy_test/test_dist_hapi_model.py index 03a92d6f3cbc9..e41f5b344a594 100644 --- a/test/legacy_test/test_dist_hapi_model.py +++ b/test/legacy_test/test_dist_hapi_model.py @@ -70,9 +70,11 @@ def start_local_trainers( procs = [] for t in pod.trainers: proc_env = { - "FLAGS_selected_gpus": "%s" % ",".join([str(g) for g in t.gpus]), + "FLAGS_selected_gpus": "{}".format( + ",".join([str(g) for g in t.gpus]) + ), "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}", "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), "FLAGS_dynamic_static_unified_comm": "0", diff --git a/test/deprecated/legacy_test/test_eigh_op.py b/test/legacy_test/test_eigh_op.py similarity index 100% rename from test/deprecated/legacy_test/test_eigh_op.py rename to test/legacy_test/test_eigh_op.py diff --git a/test/deprecated/legacy_test/test_elementwise_heaviside_op.py b/test/legacy_test/test_elementwise_heaviside_op.py similarity index 100% rename from test/deprecated/legacy_test/test_elementwise_heaviside_op.py rename to test/legacy_test/test_elementwise_heaviside_op.py diff --git a/test/deprecated/legacy_test/test_elementwise_mul_op.py b/test/legacy_test/test_elementwise_mul_op.py similarity index 100% rename from test/deprecated/legacy_test/test_elementwise_mul_op.py rename to test/legacy_test/test_elementwise_mul_op.py diff --git a/test/deprecated/legacy_test/test_elementwise_pow_op.py b/test/legacy_test/test_elementwise_pow_op.py similarity index 100% rename from test/deprecated/legacy_test/test_elementwise_pow_op.py rename to test/legacy_test/test_elementwise_pow_op.py diff --git a/test/deprecated/legacy_test/test_executor_and_use_program_cache.py b/test/legacy_test/test_executor_and_use_program_cache.py similarity index 66% rename from test/deprecated/legacy_test/test_executor_and_use_program_cache.py rename to test/legacy_test/test_executor_and_use_program_cache.py index c43b58f027ca1..15df67914856d 100644 --- a/test/deprecated/legacy_test/test_executor_and_use_program_cache.py +++ b/test/legacy_test/test_executor_and_use_program_cache.py @@ -25,66 +25,6 @@ from paddle.pir_utils import test_with_pir_api -class TestExecutor(unittest.TestCase): - def test_mul(self): - main_program = base.Program() - startup_program = base.Program() - with base.program_guard(main_program, startup_program): - a = paddle.static.data(name='a', shape=[-1, 784], dtype='float32') - b = paddle.static.data(name='b', shape=[784, 100], dtype='float32') - a.desc.set_need_check_feed(False) - b.desc.set_need_check_feed(False) - output = paddle.matmul(x=a, y=b) - - # Compute with numpy - a_np = np.random.random((100, 784)).astype('float32') - b_np = np.random.random((784, 100)).astype('float32') - out_np = np.dot(a_np, b_np) - - place = paddle.CPUPlace() - exe = base.Executor(place) - - def _train(use_program_cache, max_iters=1): - import time - - run_time = 0.0 - for i in range(max_iters): - begin = time.time() - outs = exe.run( - program=main_program, - feed={'a': a_np, 'b': b_np}, - fetch_list=[output], - use_program_cache=use_program_cache, - ) - end = time.time() - run_time += end - begin - out = outs[0] - self.assertEqual((100, 100), out.shape) - np.testing.assert_allclose(out, out_np, rtol=1e-05) - return run_time - - max_iters = 3 - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters - ) - print("run time with program cache: %f" % run_time_with_cache) - - run_time_without_cache = _train( - use_program_cache=False, max_iters=max_iters - ) - print("run time without program cache: %f" % run_time_without_cache) - - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters - ) - print("run time with program cache: %f" % run_time_with_cache) - - run_time_with_cache = _train( - use_program_cache=True, max_iters=max_iters - ) - print("run time with program cache: %f" % run_time_with_cache) - - class ExecutorPaddingRNNTest(PaddingRNNTestBase): def train_and_save_inference_program( self, rnn_model="static", use_program_cache=True diff --git a/test/deprecated/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_expand_v2_op.py rename to test/legacy_test/test_expand_v2_op.py diff --git a/test/legacy_test/test_fc_op.py b/test/legacy_test/test_fc_op.py new file mode 100644 index 0000000000000..d61c93361097b --- /dev/null +++ b/test/legacy_test/test_fc_op.py @@ -0,0 +1,136 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest + +SEED = 2020 + + +def fc_refer(matrix, with_bias, with_relu=False): + in_n, in_c, in_h, in_w = matrix.input.shape + w_i, w_o = matrix.weights.shape + + x_data = np.reshape(matrix.input, [in_n, in_c * in_h * in_w]) + w_data = np.reshape(matrix.weights, [w_i, w_o]) + b_data = np.reshape(matrix.bias, [1, w_o]) + result = None + + if with_bias: + result = np.dot(x_data, w_data) + b_data + else: + result = np.dot(x_data, w_data) + + if with_relu: + return np.maximum(result, 0) + else: + return result + + +class MatrixGenerate: + def __init__(self, mb, ic, oc, h, w, bias_dims=2): + self.input = np.random.random((mb, ic, h, w)).astype("float32") + self.weights = np.random.random((ic * h * w, oc)).astype("float32") + if bias_dims == 2: + self.bias = np.random.random((1, oc)).astype("float32") + else: + self.bias = np.random.random(oc).astype("float32") + + +class TestFCOp(OpTest): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 10, 15, 3, 3, 2) + + def setUp(self): + self.op_type = "fc" + self.config() + + if self.with_bias: + self.inputs = { + 'Input': self.matrix.input, + 'W': self.matrix.weights, + 'Bias': self.matrix.bias, + } + else: + self.inputs = {'Input': self.matrix.input, 'W': self.matrix.weights} + + if self.with_relu: + activation_type = "relu" + else: + activation_type = "" + self.attrs = {'use_mkldnn': False, 'activation_type': activation_type} + + self.outputs = { + 'Out': fc_refer(self.matrix, self.with_bias, self.with_relu) + } + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestFCOpNoBias1(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(2, 8, 10, 1, 1, 2) + + +class TestFCOpNoBias2(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) + + +class TestFCOpNoBias4(TestFCOp): + def config(self): + self.with_bias = False + self.with_relu = False + self.matrix = MatrixGenerate(1, 32, 64, 3, 3, 1) + + +class TestFCOpWithBias1(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = False + self.matrix = MatrixGenerate(3, 8, 10, 2, 1, 2) + + +class TestFCOpWithBias2(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(4, 5, 6, 2, 2, 1) + + +class TestFCOpWithBias3(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 64, 32, 3, 3, 1) + + +class TestFCOpWithPadding(TestFCOp): + def config(self): + self.with_bias = True + self.with_relu = True + self.matrix = MatrixGenerate(1, 4, 3, 128, 128, 2) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_fill_any_op.py b/test/legacy_test/test_fill_any_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fill_any_op.py rename to test/legacy_test/test_fill_any_op.py diff --git a/test/deprecated/legacy_test/test_fill_diagonal_tensor_op.py b/test/legacy_test/test_fill_diagonal_tensor_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fill_diagonal_tensor_op.py rename to test/legacy_test/test_fill_diagonal_tensor_op.py diff --git a/test/deprecated/legacy_test/test_flatten_contiguous_range_op.py b/test/legacy_test/test_flatten_contiguous_range_op.py similarity index 100% rename from test/deprecated/legacy_test/test_flatten_contiguous_range_op.py rename to test/legacy_test/test_flatten_contiguous_range_op.py diff --git a/test/legacy_test/test_fleet_launch_async.sh b/test/legacy_test/test_fleet_launch_async.sh index f50e24f10beca..88a53788719ad 100644 --- a/test/legacy_test/test_fleet_launch_async.sh +++ b/test/legacy_test/test_fleet_launch_async.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleet_launch_cloud.sh b/test/legacy_test/test_fleet_launch_cloud.sh index 0d05b73d3566f..08079ea2848cf 100644 --- a/test/legacy_test/test_fleet_launch_cloud.sh +++ b/test/legacy_test/test_fleet_launch_cloud.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleet_launch_elastic.sh b/test/legacy_test/test_fleet_launch_elastic.sh index a3e76a564f5b7..07d4dc993f3ae 100644 --- a/test/legacy_test/test_fleet_launch_elastic.sh +++ b/test/legacy_test/test_fleet_launch_elastic.sh @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,11 +19,11 @@ rm -rf log* pids=`ps -ef | grep "python -m paddle.distributed.launch elastic_demo.[py]" | awk '{print $2}'` if [ -n "$pids" ]; then - echo $pids | xargs kill -9 + echo $pids | xargs kill -9 fi pids=`ps -ef | grep "/usr/bin/python -u elastic_demo.[py]" | awk '{print $2}'` if [ -n "$pids" ]; then - echo $pids | xargs kill -9 + echo $pids | xargs kill -9 fi python -m pip install --no-cache-dir etcd3 -i https://mirror.baidu.com/pypi/simple @@ -102,7 +102,7 @@ check_env() { echo "PADDLE_TRAINERS error" exit -1 fi - + if grep -q "0-DISTRIBUTED_TRAINER_ENDPOINTS=$DISTRIBUTED_TRAINER_ENDPOINTS" $lw0 && grep -q "1-DISTRIBUTED_TRAINER_ENDPOINTS=$DISTRIBUTED_TRAINER_ENDPOINTS" $lw0; then echo "DISTRIBUTED_TRAINER_ENDPOINTS ok" else diff --git a/test/legacy_test/test_fleet_launch_nproc.sh b/test/legacy_test/test_fleet_launch_nproc.sh index 63fce18683c04..5371b90822e15 100644 --- a/test/legacy_test/test_fleet_launch_nproc.sh +++ b/test/legacy_test/test_fleet_launch_nproc.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleet_launch_ps.sh b/test/legacy_test/test_fleet_launch_ps.sh index bfbaf258c86b4..9b81cd4866a62 100644 --- a/test/legacy_test/test_fleet_launch_ps.sh +++ b/test/legacy_test/test_fleet_launch_ps.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleet_launch_rank_mapping.sh b/test/legacy_test/test_fleet_launch_rank_mapping.sh index eb84f9f6e847a..abd347664dc01 100755 --- a/test/legacy_test/test_fleet_launch_rank_mapping.sh +++ b/test/legacy_test/test_fleet_launch_rank_mapping.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleet_run_random_port.sh b/test/legacy_test/test_fleet_run_random_port.sh index 9ca48f2ab5bb3..bb71f883d30e4 100644 --- a/test/legacy_test/test_fleet_run_random_port.sh +++ b/test/legacy_test/test_fleet_run_random_port.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/legacy_test/test_fleetrun.sh b/test/legacy_test/test_fleetrun.sh index 710859727d2c9..f04245fcf0c09 100644 --- a/test/legacy_test/test_fleetrun.sh +++ b/test/legacy_test/test_fleetrun.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/deprecated/legacy_test/test_flip.py b/test/legacy_test/test_flip.py similarity index 100% rename from test/deprecated/legacy_test/test_flip.py rename to test/legacy_test/test_flip.py diff --git a/test/deprecated/legacy_test/test_fmax_op.py b/test/legacy_test/test_fmax_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fmax_op.py rename to test/legacy_test/test_fmax_op.py diff --git a/test/deprecated/legacy_test/test_fmin_op.py b/test/legacy_test/test_fmin_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fmin_op.py rename to test/legacy_test/test_fmin_op.py diff --git a/test/deprecated/legacy_test/test_fold_op.py b/test/legacy_test/test_fold_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fold_op.py rename to test/legacy_test/test_fold_op.py diff --git a/test/deprecated/legacy_test/test_fractional_max_pool2d_op.py b/test/legacy_test/test_fractional_max_pool2d_op.py similarity index 100% rename from test/deprecated/legacy_test/test_fractional_max_pool2d_op.py rename to test/legacy_test/test_fractional_max_pool2d_op.py diff --git a/test/deprecated/legacy_test/test_full_like_op.py b/test/legacy_test/test_full_like_op.py similarity index 100% rename from test/deprecated/legacy_test/test_full_like_op.py rename to test/legacy_test/test_full_like_op.py diff --git a/test/legacy_test/test_functional_conv2d.py b/test/legacy_test/test_functional_conv2d.py new file mode 100644 index 0000000000000..2f7d18e29566f --- /dev/null +++ b/test/legacy_test/test_functional_conv2d.py @@ -0,0 +1,284 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import TestCase + +import numpy as np + +import paddle +import paddle.base.dygraph as dg +import paddle.nn.functional as F +from paddle import base + + +class TestFunctionalConv2DError(TestCase): + batch_size = 4 + spatial_shape = (16, 16) + dtype = "float32" + + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = "not_valid" + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NHWC" + + def test_exception(self): + self.prepare() + with self.assertRaises(ValueError): + self.static_graph_case() + + def prepare(self): + if isinstance(self.filter_shape, int): + filter_shape = (self.filter_shape,) * 2 + else: + filter_shape = tuple(self.filter_shape) + self.weight_shape = ( + self.out_channels, + self.in_channels // self.groups, + ) + filter_shape + self.bias_shape = (self.out_channels,) + + def static_graph_case(self): + main = base.Program() + start = base.Program() + with base.unique_name.guard(): + with base.program_guard(main, start): + self.channel_last = self.data_format == "NHWC" + if self.channel_last: + x = x = paddle.static.data( + "input", + (-1, -1, -1, self.in_channels), + dtype=self.dtype, + ) + else: + x = paddle.static.data( + "input", + (-1, self.in_channels, -1, -1), + dtype=self.dtype, + ) + weight = paddle.static.data( + "weight", self.weight_shape, dtype=self.dtype + ) + if not self.no_bias: + bias = paddle.static.data( + "bias", self.bias_shape, dtype=self.dtype + ) + y = F.conv2d( + x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + + +class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [[0, 0], [1, 2], [3, 4], [5, 6]] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "NCHW" + + +class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "not_valid" + + +class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 4 + self.out_channels = 3 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "NCHW" + + +class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = True + self.data_format = "not_valid" + + +class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [1, 2, 1, 2, 1] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = True + self.data_format = "NCHW" + + +class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = -5 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [[0, 0], [0, 0], [3, 2], [1, 2]] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "NCHW" + + +class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "NHWC" + + +class TestFunctionalConv2DErrorCase11(TestFunctionalConv2DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.use_cudnn = False + self.data_format = "NHCW" + + +class TestFunctionalConv2DErrorCase12(TestCase): + def setUp(self): + self.input = np.array([]) + self.filter = np.array([]) + self.num_filters = 0 + self.filter_size = 0 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCHW" + + def dygraph_case(self): + with dg.guard(): + x = paddle.to_tensor(self.input, dtype=paddle.float32) + w = paddle.to_tensor(self.filter, dtype=paddle.float32) + b = ( + None + if self.bias is None + else paddle.to_tensor(self.bias, dtype=paddle.float32) + ) + y = F.conv2d( + x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + + def test_dygraph_exception(self): + with self.assertRaises(ValueError): + self.dygraph_case() + + +class TestFunctionalConv2DErrorCase13(TestFunctionalConv2DErrorCase12): + def setUp(self): + self.input = np.random.randn(1, 3, 3, 3) + self.filter = np.random.randn(3, 3, 1, 1) + self.num_filters = 3 + self.filter_size = 1 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 0 + self.data_format = "NCHW" + + +class TestFunctionalConv2DErrorCase14(TestFunctionalConv2DErrorCase12): + def setUp(self): + self.input = np.random.randn(0, 0, 0, 0) + self.filter = np.random.randn(1, 0, 0, 0) + self.num_filters = 0 + self.filter_size = 0 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCHW" + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/test/legacy_test/test_functional_conv3d.py b/test/legacy_test/test_functional_conv3d.py new file mode 100644 index 0000000000000..bdfd2c7e6116f --- /dev/null +++ b/test/legacy_test/test_functional_conv3d.py @@ -0,0 +1,265 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import TestCase + +import numpy as np + +import paddle +import paddle.base.dygraph as dg +import paddle.nn.functional as F +from paddle import base + + +class TestFunctionalConv3DError(TestCase): + batch_size = 4 + spatial_shape = (8, 8, 8) + dtype = "float32" + + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = "not_valid" + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + def test_exception(self): + self.prepare() + with self.assertRaises(ValueError): + self.static_graph_case() + + def prepare(self): + if isinstance(self.filter_shape, int): + filter_shape = (self.filter_shape,) * 3 + else: + filter_shape = tuple(self.filter_shape) + self.weight_shape = ( + self.out_channels, + self.in_channels // self.groups, + ) + filter_shape + self.bias_shape = (self.out_channels,) + + def static_graph_case(self): + main = base.Program() + start = base.Program() + with base.unique_name.guard(): + with base.program_guard(main, start): + self.channel_last = self.data_format == "NDHWC" + if self.channel_last: + x = x = paddle.static.data( + "input", + (-1, -1, -1, -1, self.in_channels), + dtype=self.dtype, + ) + else: + x = paddle.static.data( + "input", + (-1, self.in_channels, -1, -1, -1), + dtype=self.dtype, + ) + weight = paddle.static.data( + "weight", self.weight_shape, dtype=self.dtype + ) + if not self.no_bias: + bias = paddle.static.data( + "bias", self.bias_shape, dtype=self.dtype + ) + y = F.conv3d( + x, + weight, + None if self.no_bias else bias, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + + if self.act == 'sigmoid': + y = F.sigmoid(y) + + +class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [[0, 0], [1, 1], [1, 2], [3, 4], [5, 6]] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "not_valid" + + +class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 4 + self.out_channels = 3 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "not_valid" + + +class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [1, 2, 1, 2, 1] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = -5 + self.out_channels = 5 + self.filter_shape = 3 + self.padding = [[0, 0], [0, 0], [3, 2], [1, 2], [1, 1]] + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NCDHW" + + +class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError): + def setUp(self): + self.in_channels = 3 + self.out_channels = 4 + self.filter_shape = 3 + self.padding = "same" + self.stride = 1 + self.dilation = 1 + self.groups = 2 + self.no_bias = False + self.act = "sigmoid" + self.data_format = "NDHWC" + + +class TestFunctionalConv3DErrorCase11(TestCase): + def setUp(self): + self.input = np.array([]) + self.filter = np.array([]) + self.num_filters = 0 + self.filter_size = 0 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCDHW" + + def dygraph_case(self): + with dg.guard(): + x = paddle.to_tensor(self.input, dtype=paddle.float32) + w = paddle.to_tensor(self.filter, dtype=paddle.float32) + b = ( + None + if self.bias is None + else paddle.to_tensor(self.bias, dtype=paddle.float32) + ) + y = F.conv3d( + x, + w, + b, + padding=self.padding, + stride=self.stride, + dilation=self.dilation, + groups=self.groups, + data_format=self.data_format, + ) + + def test_dygraph_exception(self): + with self.assertRaises(ValueError): + self.dygraph_case() + + +class TestFunctionalConv3DErrorCase12(TestFunctionalConv3DErrorCase11): + def setUp(self): + self.input = np.random.randn(1, 3, 3, 3, 3) + self.filter = np.random.randn(3, 3, 1, 1, 1) + self.num_filters = 3 + self.filter_size = 1 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 0 + self.data_format = "NCDHW" + + +class TestFunctionalConv3DErrorCase13(TestFunctionalConv3DErrorCase11): + def setUp(self): + self.input = np.random.randn(0, 0, 0, 0, 0) + self.filter = np.random.randn(1, 0, 0, 0, 0) + self.num_filters = 1 + self.filter_size = 1 + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCDHW" + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/test/legacy_test/test_fused_elemwise_activation_op.py b/test/legacy_test/test_fused_elemwise_activation_op.py index b14e86aba9ff8..301985fff8ff6 100644 --- a/test/legacy_test/test_fused_elemwise_activation_op.py +++ b/test/legacy_test/test_fused_elemwise_activation_op.py @@ -18,8 +18,6 @@ import numpy as np from op_test import OpTest -from paddle.base import core - # TestFusedElementwiseActivationOp # TestFusedElementwiseActivationOp_scalar # TestFusedElementwiseActivationOp_scalar2 @@ -32,6 +30,25 @@ # TestFusedElementwiseActivationOp_rowwise_add_0 # TestFusedElementwiseActivationOp_rowwise_add_1 # TestFusedElementwiseActivationOp_channelwise_add +import paddle +from paddle.base import core + + +def api_wrapper( + x, y, functor_list=[], axis=-1, scale=0.0, save_intermediate_out=False +): + return paddle._legacy_C_ops.fused_elemwise_activation( + x, + y, + "axis", + axis, + "scale", + scale, + "save_intermediate_out", + save_intermediate_out, + "functor_list", + functor_list, + ) def create_test_class( @@ -40,6 +57,8 @@ def create_test_class( class TestFusedElementwiseActivationOp_base(OpTest): def setUp(self): self.op_type = "fused_elemwise_activation" + self.python_api = api_wrapper + self.python_out_sig = ['Out'] self.dtype = dtype self.axis = -1 diff --git a/test/legacy_test/test_fused_groupnorm.py b/test/legacy_test/test_fused_groupnorm.py new file mode 100644 index 0000000000000..5dbaa4d5a569d --- /dev/null +++ b/test/legacy_test/test_fused_groupnorm.py @@ -0,0 +1,321 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np + +import paddle +import paddle.nn.functional as F +from paddle import base +from paddle.base import core +from paddle.base.layer_helper import LayerHelper + + +def naive_residual_add(x, residual): + return np.add(x, residual) + + +def naive_group_norm(x, scale, bias, epsilon, groups, data_layout): + dim = x.ndim + if dim == 3: + if data_layout == "NHWC": + x = np.transpose(x, (0, 2, 1)) # NLC => NCL + N, C, L = x.shape + G = groups + x = x.reshape((N * G, -1)) + mean = np.mean(x, axis=1, keepdims=True) + var = np.var(x, axis=1, keepdims=True) + output = (x - mean) / np.sqrt(var + epsilon) + output = output.reshape((N, C, L)) * scale.reshape( + (-1, 1) + ) + bias.reshape((-1, 1)) + if data_layout == "NHWC": + output = np.transpose(output, (0, 2, 1)) # NCL => NLC + return [output, mean.reshape((N, G)), var.reshape((N, G))] + elif dim == 4: + if data_layout == "NHWC": + x = np.transpose(x, (0, 3, 1, 2)) # NHWC => NCHW + N, C, H, W = x.shape + G = groups + x = x.reshape((N * G, -1)) + mean = np.mean(x, axis=1, keepdims=True) + var = np.var(x, axis=1, keepdims=True) + output = (x - mean) / np.sqrt(var + epsilon) + output = output.reshape((N, C, H, W)) * scale.reshape( + (-1, 1, 1) + ) + bias.reshape((-1, 1, 1)) + if data_layout == "NHWC": + output = np.transpose(output, (0, 2, 3, 1)) # NCHW => NHWC + return [output, mean.reshape((N, G)), var.reshape((N, G))] + else: + if data_layout == "NHWC": + x = np.transpose(x, (0, 4, 1, 2, 3)) # NDHWC => NCDHW + N, C, D, H, W = x.shape + G = groups + x = x.reshape((N * G, -1)) + mean = np.mean(x, axis=1, keepdims=True) + var = np.var(x, axis=1, keepdims=True) + output = (x - mean) / np.sqrt(var + epsilon) + output = output.reshape((N, C, D, H, W)) * scale.reshape( + (-1, 1, 1, 1) + ) + bias.reshape((-1, 1, 1, 1)) + if data_layout == "NHWC": + output = np.transpose(output, (0, 2, 3, 4, 1)) # NCDHW => NDHWC + return [output, mean.reshape((N, G)), var.reshape((N, G))] + + +def naive_residual_biasadd_layer_norm( + x, residual, scale, bias, epsilon, groups, data_layout, activation +): + x = x + residual + out = naive_group_norm(x, scale, bias, epsilon, groups, data_layout) + if activation == "silu": + out[0] = F.silu(paddle.to_tensor(out[0])).numpy() + return out + + +def add_group_norm_silu_static_wrapper( + x, residual, scale, bias, epsilon, groups, data_layout="NHWC", activation="" +): + helper = LayerHelper('add_group_norm_silu', **locals()) + mean_out = helper.create_variable_for_type_inference( + dtype=x.dtype, stop_gradient=True + ) + variance_out = helper.create_variable_for_type_inference( + dtype=x.dtype, stop_gradient=True + ) + + inputs = {'x': x} + if bias is not None: + inputs['bias'] = bias + if scale is not None: + inputs['scale'] = scale + if residual is not None: + inputs['residual'] = residual + + # create output + group_norm_out = helper.create_variable_for_type_inference(dtype=x.dtype) + residual_out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type="add_group_norm_silu", + inputs=inputs, + outputs={ + "y": group_norm_out, + "residual_out": residual_out, + "mean": mean_out, + "variance": variance_out, + }, + attrs={ + "epsilon": epsilon, + "groups": groups, + "data_format": data_layout, + "activation": activation, + }, + ) + + return group_norm_out, residual_out + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestGroupNormNHWC_StaticOp(unittest.TestCase): + def setUp(self): + np.random.seed(20) + self.shape = (2, 4, 2, 6) + self.r_shape = (1, 1, 1, 6) + self.x_np = np.random.uniform(-0.05, 0.05, self.shape) + self.residual_np = np.random.uniform(-0.05, 0.05, self.r_shape) + self.scale_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.bias_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.epsilon = 1e-5 + self.groups = 2 + self.data_layout = 'NHWC' + self.activation = '' + self.place = paddle.CUDAPlace(0) + + def check_residual_add_groupnorm( + self, x_np, scale_np, bias_np, residual_np, activation, dtype + ): + paddle.disable_static() + navie_groupnorm_out = naive_residual_biasadd_layer_norm( + x_np, + residual_np, + scale_np, + bias_np, + self.epsilon, + self.groups, + self.data_layout, + self.activation, + ) + navie_residual_out = naive_residual_add(x_np, residual_np) + paddle.enable_static() + + with paddle.static.program_guard(paddle.static.Program()): + x_static = paddle.static.data( + name="x_static", shape=self.shape, dtype=dtype + ) + residual_static = paddle.static.data( + name="residual_static", + shape=self.r_shape, + dtype=dtype, + ) + + scale_static = paddle.static.data( + name="scale_static", shape=[self.shape[-1]], dtype=dtype + ) + bias_static = paddle.static.data( + name="bias_static", shape=[self.shape[-1]], dtype=dtype + ) + outs = add_group_norm_silu_static_wrapper( + x_static, + residual_static, + scale_static, + bias_static, + self.epsilon, + self.groups, + self.data_layout, + activation, + ) + + exe = base.Executor(self.place) + out_s = exe.run( + feed={ + "x_static": x_np.astype(dtype), + "scale_static": scale_np.astype(dtype), + "residual_static": residual_np.astype(dtype), + "bias_static": bias_np.astype(dtype), + }, + fetch_list=[outs], + ) + return (out_s[0], out_s[1]), navie_groupnorm_out, navie_residual_out + + def test_residual_add_groupnorm_fp16(self): + if not paddle.is_compiled_with_cuda(): + return + self.dtype = np.float16 + ( + paddle_group_list, + paddle_naive_group_out, + paddle_naive_group_residual, + ) = self.check_residual_add_groupnorm( + self.x_np.astype(self.dtype), + self.scale_np.astype(self.dtype), + self.bias_np.astype(self.dtype), + self.residual_np.astype(self.dtype), + self.activation, + self.dtype, + ) + np.testing.assert_allclose( + paddle_group_list[1], + paddle_naive_group_residual, + rtol=1e-5, + atol=1e-5, + ) + np.testing.assert_allclose( + paddle_group_list[0], + paddle_naive_group_out[0], + rtol=1e-4, + atol=1e-4, + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestGroupNormNHWCSilu_StaticOp(TestGroupNormNHWC_StaticOp): + def setUp(self): + np.random.seed(20) + self.shape = (2, 4, 2, 6) + self.r_shape = (1, 1, 1, 6) + self.x_np = np.random.uniform(-0.05, 0.05, self.shape) + self.residual_np = np.random.uniform(-0.05, 0.05, self.r_shape) + self.scale_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.bias_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.epsilon = 1e-5 + self.groups = 2 + self.data_layout = 'NHWC' + self.activation = 'silu' + self.place = paddle.CUDAPlace(0) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestGroupNormNHWC_StaticOp_1(TestGroupNormNHWC_StaticOp): + def setUp(self): + np.random.seed(20) + self.shape = (2, 4, 2, 6) + self.r_shape = (2, 4, 2, 6) + self.x_np = np.random.uniform(-0.05, 0.05, self.shape) + self.residual_np = np.random.uniform(-0.05, 0.05, self.r_shape) + self.scale_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.bias_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.epsilon = 1e-5 + self.groups = 2 + self.data_layout = 'NHWC' + self.activation = 'silu' + self.place = paddle.CUDAPlace(0) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestGroupNormNHWCSilu_StaticOp_1(TestGroupNormNHWC_StaticOp): + def setUp(self): + np.random.seed(20) + self.shape = (2, 4, 2, 6) + self.r_shape = (2, 4, 2, 6) + self.x_np = np.random.uniform(-0.05, 0.05, self.shape) + self.residual_np = np.random.uniform(-0.05, 0.05, self.r_shape) + self.scale_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.bias_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.epsilon = 1e-5 + self.groups = 2 + self.data_layout = 'NHWC' + self.activation = 'silu' + self.place = paddle.CUDAPlace(0) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestGroupNormNHWCSingleC_StaticOp(TestGroupNormNHWC_StaticOp): + def setUp(self): + np.random.seed(20) + self.shape = (2, 4, 2, 6) + self.r_shape = (2, 4, 2, 6) + self.x_np = np.random.uniform(-0.05, 0.05, self.shape) + self.residual_np = np.random.uniform(-0.05, 0.05, self.r_shape) + self.scale_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.bias_np = np.random.uniform(-0.05, 0.05, [self.shape[-1]]) + self.epsilon = 1e-5 + self.groups = 6 + self.data_layout = 'NHWC' + self.activation = '' + self.place = paddle.CUDAPlace(0) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_gammaln_op.py b/test/legacy_test/test_gammaln_op.py similarity index 100% rename from test/deprecated/legacy_test/test_gammaln_op.py rename to test/legacy_test/test_gammaln_op.py diff --git a/test/deprecated/legacy_test/test_gaussian_random_op.py b/test/legacy_test/test_gaussian_random_op.py similarity index 100% rename from test/deprecated/legacy_test/test_gaussian_random_op.py rename to test/legacy_test/test_gaussian_random_op.py diff --git a/test/deprecated/legacy_test/test_graph_send_recv_op.py b/test/legacy_test/test_graph_send_recv_op.py similarity index 100% rename from test/deprecated/legacy_test/test_graph_send_recv_op.py rename to test/legacy_test/test_graph_send_recv_op.py diff --git a/test/deprecated/legacy_test/test_graph_send_ue_recv_op.py b/test/legacy_test/test_graph_send_ue_recv_op.py similarity index 100% rename from test/deprecated/legacy_test/test_graph_send_ue_recv_op.py rename to test/legacy_test/test_graph_send_ue_recv_op.py diff --git a/test/deprecated/legacy_test/test_graph_send_uv_op.py b/test/legacy_test/test_graph_send_uv_op.py similarity index 100% rename from test/deprecated/legacy_test/test_graph_send_uv_op.py rename to test/legacy_test/test_graph_send_uv_op.py diff --git a/test/deprecated/legacy_test/test_grid_sampler_op.py b/test/legacy_test/test_grid_sampler_op.py similarity index 100% rename from test/deprecated/legacy_test/test_grid_sampler_op.py rename to test/legacy_test/test_grid_sampler_op.py diff --git a/test/legacy_test/test_group_norm_op.py b/test/legacy_test/test_group_norm_op.py index f097df3b0b99c..7a6f57cc61ece 100644 --- a/test/legacy_test/test_group_norm_op.py +++ b/test/legacy_test/test_group_norm_op.py @@ -209,7 +209,7 @@ def do_compare_between_place(self): gpu_grads, inputs_to_check, 0.005, - "Gradient Check On %s" % str(place), + f"Gradient Check On {str(place)}", ) def test_check_grad(self): @@ -1748,7 +1748,7 @@ def test_jit_comp(self): fwd_actual[i], rtol=rtol, atol=atol, - err_msg='%s jit fwd' % self.places[i], + err_msg=f'{self.places[i]} jit fwd', ) # TODO: fix the diff between cpu and gpu grad is large in original op @@ -1762,7 +1762,7 @@ def test_jit_comp(self): rev_actual[i], rtol=rtol, atol=atol, - err_msg='%s jit rev' % self.places[i], + err_msg=f'{self.places[i]} jit rev', ) def test_jit_comp_with_cinn(self): @@ -1820,7 +1820,7 @@ def test_jit_comp_with_cinn(self): fwd_actual[i], rtol=rtol, # mean of uniform distribution, scale for avoid random failed atol=atol, - err_msg='%s jit_cinn fwd' % self.places[i], + err_msg=f'{self.places[i]} jit_cinn fwd', ) # TODO: fix the diff between cpu and gpu grad is large in original op # now use larger threshold when testing cpu grads to bypass cpu grad test @@ -1832,7 +1832,7 @@ def test_jit_comp_with_cinn(self): rev_actual[i], rtol=rtol, # mean of uniform distribution, scale for avoid random failed atol=atol, - err_msg='%s jit_cinn rev' % self.places[i], + err_msg=f'{self.places[i]} jit_cinn rev', ) i += 1 diff --git a/test/deprecated/legacy_test/test_gru_op.py b/test/legacy_test/test_gru_op.py similarity index 100% rename from test/deprecated/legacy_test/test_gru_op.py rename to test/legacy_test/test_gru_op.py diff --git a/test/deprecated/legacy_test/test_gru_unit_op.py b/test/legacy_test/test_gru_unit_op.py similarity index 100% rename from test/deprecated/legacy_test/test_gru_unit_op.py rename to test/legacy_test/test_gru_unit_op.py diff --git a/test/deprecated/legacy_test/test_gumbel_softmax_op.py b/test/legacy_test/test_gumbel_softmax_op.py similarity index 100% rename from test/deprecated/legacy_test/test_gumbel_softmax_op.py rename to test/legacy_test/test_gumbel_softmax_op.py diff --git a/test/deprecated/legacy_test/test_hinge_loss_op.py b/test/legacy_test/test_hinge_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_hinge_loss_op.py rename to test/legacy_test/test_hinge_loss_op.py diff --git a/test/deprecated/legacy_test/test_hsigmoid_op.py b/test/legacy_test/test_hsigmoid_op.py similarity index 89% rename from test/deprecated/legacy_test/test_hsigmoid_op.py rename to test/legacy_test/test_hsigmoid_op.py index ad3ae81821cdf..f481fc2ebee2f 100644 --- a/test/deprecated/legacy_test/test_hsigmoid_op.py +++ b/test/legacy_test/test_hsigmoid_op.py @@ -20,7 +20,6 @@ import paddle import paddle.nn.functional as F -from paddle import base from paddle.pir_utils import test_with_pir_api paddle.enable_static() @@ -283,91 +282,6 @@ def test_check_output(self): self.check_output(check_pir=True) -class TestHSigmoidOpWithSparseGrad(unittest.TestCase): - def hs_net_conf(self, is_sparse): - input_word = paddle.static.data(name="x", shape=[-1, 1], dtype='int64') - path_table = paddle.static.data( - name='path_table', shape=[-1, 3], dtype='int64' - ) - path_code = paddle.static.data( - name='path_code', shape=[-1, 3], dtype='int64' - ) - label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64') - - data_list = [input_word, path_table, path_code, label] - - emb = paddle.static.nn.embedding( - input=input_word, - is_sparse=is_sparse, - size=[3, 3], - param_attr=base.ParamAttr( - initializer=paddle.nn.initializer.Normal(std=1 / math.sqrt(3)) - ), - ) - - loss = paddle.nn.HSigmoidLoss( - feature_size=emb.shape[1], - num_classes=3, - bias_attr=True, - is_custom=True, - is_sparse=is_sparse, - ) - - cost = loss( - input=emb, - label=label, - path_table=path_table, - path_code=path_code, - ) - - avg_cost = paddle.mean(cost) - - return avg_cost, data_list - - def training_test(self, is_sparse): - with paddle.static.program_guard( - paddle.static.Program(), paddle.static.Program() - ): - paddle.seed(1) - start_up = paddle.static.default_startup_program() - x = np.arange(6).reshape(6) - path_table = np.array([(1, 2, -1), (1, 2, -1)]).astype('int64') - path_code = np.array([(1, 0, -1), (0, 0, -1)]).astype('int64') - label = np.array([1, 4]).astype('int64') - - loss, data_list = self.hs_net_conf(is_sparse) - optimizer = paddle.optimizer.SGD(learning_rate=1e-3) - optimizer.minimize(loss) - - main_program = paddle.static.default_main_program() - place = base.CPUPlace() - feeder = base.DataFeeder(feed_list=data_list, place=place) - exe = paddle.static.Executor(place) - - exe.run(start_up) - result = [] - for i in range(10): - data = [ - ( - [[x[i % 2]]], - [list(path_table[i % 2])], - [list(path_code[i % 2])], - [label[i % 2]], - ) - ] - - loss_val = exe.run( - main_program, feed=feeder.feed(data), fetch_list=[loss] - ) - result.append(loss_val) - return result - - def test_hs_grad_with_sparse(self): - dense_result = self.training_test(is_sparse=False) - sparse_result = self.training_test(is_sparse=True) - assert dense_result == sparse_result - - @skip_check_grad_ci( reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." ) diff --git a/test/deprecated/legacy_test/test_huber_loss_op.py b/test/legacy_test/test_huber_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_huber_loss_op.py rename to test/legacy_test/test_huber_loss_op.py diff --git a/test/deprecated/legacy_test/test_identity_loss_op.py b/test/legacy_test/test_identity_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_identity_loss_op.py rename to test/legacy_test/test_identity_loss_op.py diff --git a/test/deprecated/legacy_test/test_im2sequence_op.py b/test/legacy_test/test_im2sequence_op.py similarity index 100% rename from test/deprecated/legacy_test/test_im2sequence_op.py rename to test/legacy_test/test_im2sequence_op.py diff --git a/test/legacy_test/test_imperative_deepcf.py b/test/legacy_test/test_imperative_deepcf.py index 301ec4e0a468e..31e94078c7ca8 100644 --- a/test/legacy_test/test_imperative_deepcf.py +++ b/test/legacy_test/test_imperative_deepcf.py @@ -188,7 +188,7 @@ def get_data(self): ) def load_data(self): - sys.stderr.write('loading from %s\n' % self.data_path) + sys.stderr.write(f'loading from {self.data_path}\n') likes = {} num_users = -1 num_items = -1 @@ -299,7 +299,7 @@ def test_deefcf(self): }, fetch_list=[loss], )[0] - sys.stderr.write('static loss %s\n' % static_loss) + sys.stderr.write(f'static loss {static_loss}\n') with base.dygraph.guard(): paddle.seed(seed) diff --git a/test/deprecated/legacy_test/test_imperative_framework.py b/test/legacy_test/test_imperative_framework.py similarity index 77% rename from test/deprecated/legacy_test/test_imperative_framework.py rename to test/legacy_test/test_imperative_framework.py index 01f6d37eed4b1..b85eeb11df517 100644 --- a/test/deprecated/legacy_test/test_imperative_framework.py +++ b/test/legacy_test/test_imperative_framework.py @@ -15,7 +15,6 @@ import unittest import numpy as np -from test_imperative_base import new_program_scope import paddle from paddle import base @@ -53,21 +52,13 @@ def forward(self, inputs): class TestDygraphFramework(unittest.TestCase): - def test_dygraph_backward(self): - with new_program_scope(): - mlp = MLP(input_size=2) - var_inp = paddle.static.data("input", shape=[2, 2], dtype="float32") - out = mlp(var_inp) - try: - out.backward() - raise AssertionError( - "backward should not be usable in static graph mode" - ) - except AssertionError as e: - self.assertTrue(e is not None) - def test_dygraph_to_string(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with base.dygraph.guard(): var_inp = paddle.to_tensor(np_inp) print(str(var_inp)) + + +if __name__ == '__main__': + paddle.disable_static() + unittest.main() diff --git a/test/legacy_test/test_imperative_hook_for_layer.py b/test/legacy_test/test_imperative_hook_for_layer.py index e80a31d47805f..18335bfaf98f1 100644 --- a/test/legacy_test/test_imperative_hook_for_layer.py +++ b/test/legacy_test/test_imperative_hook_for_layer.py @@ -18,7 +18,7 @@ import numpy as np sys.path.append("../deprecated/legacy_test") -from test_imperative_lod_tensor_to_selected_rows import SimpleNet +from test_imperative_lod_tensor_to_selected_rows_deprecated import SimpleNet import paddle from paddle import base diff --git a/test/legacy_test/test_imperative_layers.py b/test/legacy_test/test_imperative_layers.py index 9906d3ba0ede0..947ab037ee89b 100644 --- a/test/legacy_test/test_imperative_layers.py +++ b/test/legacy_test/test_imperative_layers.py @@ -85,7 +85,9 @@ def test_layer_str(self): self.assertEqual(str(module), 'Tanhshrink()') module = nn.ThresholdedReLU() - self.assertEqual(str(module), 'ThresholdedReLU(threshold=1.0)') + self.assertEqual( + str(module), 'ThresholdedReLU(threshold=1.0, value=0.0)' + ) module = nn.LogSigmoid() self.assertEqual(str(module), 'LogSigmoid()') diff --git a/test/deprecated/legacy_test/test_imperative_star_gan_with_gradient_penalty.py b/test/legacy_test/test_imperative_star_gan_with_gradient_penalty.py similarity index 100% rename from test/deprecated/legacy_test/test_imperative_star_gan_with_gradient_penalty.py rename to test/legacy_test/test_imperative_star_gan_with_gradient_penalty.py diff --git a/test/deprecated/legacy_test/test_index_add_op.py b/test/legacy_test/test_index_add_op.py similarity index 100% rename from test/deprecated/legacy_test/test_index_add_op.py rename to test/legacy_test/test_index_add_op.py diff --git a/test/deprecated/legacy_test/test_index_sample_op.py b/test/legacy_test/test_index_sample_op.py similarity index 100% rename from test/deprecated/legacy_test/test_index_sample_op.py rename to test/legacy_test/test_index_sample_op.py diff --git a/test/deprecated/legacy_test/test_index_select_op.py b/test/legacy_test/test_index_select_op.py similarity index 100% rename from test/deprecated/legacy_test/test_index_select_op.py rename to test/legacy_test/test_index_select_op.py diff --git a/test/legacy_test/test_inference_api.py b/test/legacy_test/test_inference_api.py new file mode 100644 index 0000000000000..2bbf3ceb24431 --- /dev/null +++ b/test/legacy_test/test_inference_api.py @@ -0,0 +1,69 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + +paddle.enable_static() +import numpy as np + +from paddle.base.core import PaddleDType, PaddleTensor + + +class TestInferenceApi(unittest.TestCase): + def test_inference_api(self): + tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32') + paddletensor32 = PaddleTensor(tensor32) + dtype32 = paddletensor32.dtype + self.assertEqual(dtype32, PaddleDType.INT32) + self.assertEqual( + paddletensor32.data.tolist('int32'), tensor32.ravel().tolist() + ) + paddletensor32.data.reset(tensor32) + self.assertEqual( + paddletensor32.as_ndarray().ravel().tolist(), + tensor32.ravel().tolist(), + ) + + tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64') + paddletensor64 = PaddleTensor(tensor64) + dtype64 = paddletensor64.dtype + self.assertEqual(dtype64, PaddleDType.INT64) + self.assertEqual( + paddletensor64.data.tolist('int64'), tensor64.ravel().tolist() + ) + paddletensor64.data.reset(tensor64) + self.assertEqual( + paddletensor64.as_ndarray().ravel().tolist(), + tensor64.ravel().tolist(), + ) + + tensor_float = np.random.randn(20, 2).astype('float32') + paddletensor_float = PaddleTensor(tensor_float) + dtype_float = paddletensor_float.dtype + self.assertEqual(dtype_float, PaddleDType.FLOAT32) + self.assertEqual( + paddletensor_float.data.tolist('float32'), + tensor_float.ravel().tolist(), + ) + paddletensor_float.data.reset(tensor_float) + self.assertEqual( + paddletensor_float.as_ndarray().ravel().tolist(), + tensor_float.ravel().tolist(), + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_inference_model_io.py b/test/legacy_test/test_inference_model_io.py new file mode 100644 index 0000000000000..6b28a41bc4b08 --- /dev/null +++ b/test/legacy_test/test_inference_model_io.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle +from paddle.base import core, executor +from paddle.distributed.io import ( + load_inference_model_distributed, +) +from paddle.pir_utils import test_with_pir_api +from paddle.static.io import load_inference_model + +paddle.enable_static() + + +class TestLoadInferenceModelError(unittest.TestCase): + @test_with_pir_api + def test_load_model_not_exist(self): + place = core.CPUPlace() + exe = executor.Executor(place) + self.assertRaises( + ValueError, load_inference_model, './test_not_exist_dir/model', exe + ) + self.assertRaises( + ValueError, + load_inference_model_distributed, + './test_not_exist_dir', + exe, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_input_spec.py b/test/legacy_test/test_input_spec.py similarity index 96% rename from test/deprecated/legacy_test/test_input_spec.py rename to test/legacy_test/test_input_spec.py index 8f86d002da306..aa649b58ca2a8 100644 --- a/test/deprecated/legacy_test/test_input_spec.py +++ b/test/legacy_test/test_input_spec.py @@ -35,9 +35,17 @@ def test_default(self): self.assertIsNone(tensor_spec.name) def test_from_tensor(self): - x_bool = paddle.tensor.fill_constant( - shape=[1], dtype='bool', value=True - ) + if paddle.framework.use_pir_api(): + x_bool = paddle.pir.core.create_parameter( + dtype='float32', + shape=[1], + name='xx', + initializer=paddle.nn.initializer.Uniform(), + ) + else: + x_bool = paddle.tensor.fill_constant( + shape=[1], dtype='bool', value=True + ) bool_spec = InputSpec.from_tensor(x_bool) self.assertEqual(bool_spec.dtype, x_bool.dtype) self.assertEqual(list(bool_spec.shape), list(x_bool.shape)) diff --git a/test/deprecated/legacy_test/test_instance_norm_op_v2.py b/test/legacy_test/test_instance_norm_op_v2.py similarity index 100% rename from test/deprecated/legacy_test/test_instance_norm_op_v2.py rename to test/legacy_test/test_instance_norm_op_v2.py diff --git a/test/deprecated/legacy_test/test_is_integer.py b/test/legacy_test/test_is_integer.py similarity index 100% rename from test/deprecated/legacy_test/test_is_integer.py rename to test/legacy_test/test_is_integer.py diff --git a/test/legacy_test/test_isin.py b/test/legacy_test/test_isin.py new file mode 100644 index 0000000000000..101d89b4de84f --- /dev/null +++ b/test/legacy_test/test_isin.py @@ -0,0 +1,327 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import convert_float_to_uint16 + +import paddle +from paddle import base +from paddle.base import core +from paddle.pir_utils import test_with_pir_api + +DATA_CASES = [ + {'x_data': np.array(1.0), 'test_x_data': np.array(-1.0)}, + { + 'x_data': np.random.randint(-10, 10, (4, 8)), + 'test_x_data': np.random.randint(0, 20, (2, 3)), + }, + { + 'x_data': np.random.randint(-50, 50, (8, 64)), + 'test_x_data': np.random.randint(-20, 0, (4, 256)), + }, +] + +DATA_CASES_UNIQUE = [ + { + 'x_data': np.arange(0, 1000).reshape([2, 5, 100]), + 'test_x_data': np.arange(200, 700), + }, + { + 'x_data': np.arange(-100, 100).reshape([2, 2, 5, 10]), + 'test_x_data': np.arange(50, 150).reshape([4, 5, 5]), + }, +] + +DATA_CASES_BF16 = [ + {'x_data': np.array(1.0), 'test_x_data': np.array(0.0)}, + { + 'x_data': np.random.randint(0, 10, (4, 8)), + 'test_x_data': np.random.randint(5, 15, (2, 3)), + }, + { + 'x_data': np.random.randint(0, 50, (8, 64)), + 'test_x_data': np.random.randint(0, 20, (4, 256)), + }, +] + + +DATA_CASES_UNIQUE_BF16 = [ + { + 'x_data': np.arange(0, 100).reshape([2, 5, 10]), + 'test_x_data': np.arange(50, 150), + }, +] + + +DATA_TYPE = ['float32', 'float64', 'int32', 'int64'] + + +def run_dygraph( + x_data, + test_x_data, + type, + assume_unique=False, + invert=False, + use_gpu=False, +): + place = paddle.CPUPlace() + if use_gpu and base.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + paddle.disable_static(place) + x_data = x_data.astype(type) + test_x_data = test_x_data.astype(type) + x_e = paddle.to_tensor(x_data) + x_t = paddle.to_tensor(test_x_data) + return paddle.isin(x_e, x_t, assume_unique, invert) + + +def run_static( + x_data, + test_x_data, + type, + assume_unique=False, + invert=False, + use_gpu=False, +): + paddle.enable_static() + startup_program = paddle.static.Program() + main_program = paddle.static.Program() + place = paddle.CPUPlace() + if use_gpu and base.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + exe = base.Executor(place) + with paddle.static.program_guard(main_program, startup_program): + x_data = x_data.astype(type) + test_x_data = test_x_data.astype(type) + x_e = paddle.static.data(name='x_e', shape=x_data.shape, dtype=type) + x_t = paddle.static.data( + name='x_t', shape=test_x_data.shape, dtype=type + ) + res = paddle.isin(x_e, x_t, assume_unique, invert) + static_result = exe.run( + feed={'x_e': x_data, 'x_t': test_x_data}, + fetch_list=[res], + ) + return static_result + + +def test( + data_cases, type_cases, assume_unique=False, invert=False, use_gpu=False +): + for type in type_cases: + for case in data_cases: + x_data = case['x_data'] + test_x_data = case['test_x_data'] + dygraph_result = run_dygraph( + x_data, + test_x_data, + type, + assume_unique, + invert, + use_gpu, + ).numpy() + np_result = np.isin( + x_data.astype(type), + test_x_data.astype(type), + assume_unique=assume_unique, + invert=invert, + ) + np.testing.assert_equal(dygraph_result, np_result) + + @test_with_pir_api + def test_static(): + (static_result,) = run_static( + x_data, + test_x_data, + type, + assume_unique, + invert, + use_gpu, + ) + np.testing.assert_equal(static_result, np_result) + + test_static() + + +def run_dygraph_bf16( + x_data, + test_x_data, + assume_unique=False, + invert=False, + use_gpu=False, +): + place = paddle.CPUPlace() + if use_gpu and base.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + paddle.disable_static(place) + x_e = paddle.to_tensor(convert_float_to_uint16(x_data)) + x_t = paddle.to_tensor(convert_float_to_uint16(test_x_data)) + return paddle.isin(x_e, x_t, assume_unique, invert) + + +def run_static_bf16( + x_data, + test_x_data, + assume_unique=False, + invert=False, + use_gpu=False, +): + paddle.enable_static() + startup_program = paddle.static.Program() + main_program = paddle.static.Program() + place = paddle.CPUPlace() + if use_gpu and base.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + exe = base.Executor(place) + with paddle.static.program_guard(main_program, startup_program): + x_data = convert_float_to_uint16(x_data) + test_x_data = convert_float_to_uint16(test_x_data) + x_e = paddle.static.data( + name='x_e', shape=x_data.shape, dtype=np.uint16 + ) + x_t = paddle.static.data( + name='x_t', shape=test_x_data.shape, dtype=np.uint16 + ) + res = paddle.isin(x_e, x_t, assume_unique, invert) + static_result = exe.run( + feed={'x_e': x_data, 'x_t': test_x_data}, + fetch_list=[res], + ) + return static_result + + +def test_bf16(data_cases, assume_unique=False, invert=False, use_gpu=False): + for case in data_cases: + x_data = case['x_data'].astype("float32") + test_x_data = case['test_x_data'].astype("float32") + dygraph_result = run_dygraph_bf16( + x_data, + test_x_data, + assume_unique, + invert, + use_gpu, + ).numpy() + np_result = np.isin( + x_data, + test_x_data, + assume_unique=assume_unique, + invert=invert, + ) + np.testing.assert_equal(dygraph_result, np_result) + + @test_with_pir_api + def test_static(): + (static_result,) = run_static_bf16( + x_data, + test_x_data, + assume_unique, + invert, + use_gpu, + ) + np.testing.assert_equal(static_result, np_result) + + test_static() + + +class TestIsInError(unittest.TestCase): + def test_for_exception(self): + with self.assertRaises(TypeError): + paddle.isin(np.array([1, 2]), np.array([1, 2])) + + +class TestIsIn(unittest.TestCase): + def test_without_gpu(self): + test(DATA_CASES, DATA_TYPE) + + def test_with_gpu(self): + test(DATA_CASES, DATA_TYPE, use_gpu=True) + + def test_invert_without_gpu(self): + test(DATA_CASES, DATA_TYPE, invert=True) + + def test_invert_with_gpu(self): + test(DATA_CASES, DATA_TYPE, invert=True, use_gpu=True) + + def test_unique_without_gpu(self): + test(DATA_CASES_UNIQUE, DATA_TYPE, assume_unique=True) + + def test_unique_with_gpu(self): + test(DATA_CASES_UNIQUE, DATA_TYPE, assume_unique=True, use_gpu=True) + + def test_unique_invert_without_gpu(self): + test(DATA_CASES_UNIQUE, DATA_TYPE, assume_unique=True, invert=True) + + def test_unique_invert_with_gpu(self): + test( + DATA_CASES_UNIQUE, + DATA_TYPE, + assume_unique=True, + invert=True, + use_gpu=True, + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA and not support the float16", +) +class TestIsInFP16(unittest.TestCase): + def test_default(self): + test(DATA_CASES, ['float16'], use_gpu=True) + + def test_invert(self): + test(DATA_CASES, ['float16'], invert=True, use_gpu=True) + + def test_unique(self): + test(DATA_CASES_UNIQUE, ['float16'], assume_unique=True, use_gpu=True) + + def test_unique_invert(self): + test( + DATA_CASES_UNIQUE, + ['float16'], + assume_unique=True, + invert=True, + use_gpu=True, + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA and not support the float16", +) +class TestIsInBF16(unittest.TestCase): + def test_default(self): + test_bf16(DATA_CASES_BF16, use_gpu=True) + + def test_invert(self): + test_bf16(DATA_CASES_BF16, invert=True, use_gpu=True) + + def test_unique(self): + test_bf16(DATA_CASES_UNIQUE_BF16, assume_unique=True, use_gpu=True) + + def test_unique_invert(self): + test_bf16( + DATA_CASES_UNIQUE_BF16, + assume_unique=True, + invert=True, + use_gpu=True, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_jit_save_load.py b/test/legacy_test/test_jit_save_load.py index 09f5a7b9a4e4b..04b86c6864685 100644 --- a/test/legacy_test/test_jit_save_load.py +++ b/test/legacy_test/test_jit_save_load.py @@ -329,7 +329,6 @@ def train(layer, input_size=784, label_size=1): for data in train_loader(): img, label = data label.stop_gradient = True - cost = layer(img) loss = paddle.nn.functional.cross_entropy( @@ -396,6 +395,8 @@ def train_and_save_model(self, model_path=None): @test_with_dygraph_pir def test_save_load(self): # train and save model + if not paddle.framework.use_pir_api(): + return train_layer = self.train_and_save_model() # load model loaded_layer = paddle.jit.load(self.model_path) @@ -496,6 +497,7 @@ def setUp(self): def tearDown(self): self.temp_dir.cleanup() + @test_with_dygraph_pir def test_output_same_order(self): x = paddle.to_tensor(np.random.random((4, 8)).astype('float32')) @@ -1712,6 +1714,7 @@ def setUp(self): def tearDown(self): self.temp_dir.cleanup() + @test_with_dygraph_pir def test_save_load_finetune_load(self): model_path = os.path.join( self.temp_dir.name, "test_jit_save_load_save_without_running/model" @@ -1788,7 +1791,6 @@ def forward(self, x): return y -''' class TestJitSaveLoadFinetuneLoad(unittest.TestCase): def setUp(self): # enable dygraph mode @@ -1798,8 +1800,10 @@ def setUp(self): def tearDown(self): self.temp_dir.cleanup() - #@test_with_dygraph_pir + @test_with_dygraph_pir def test_save_load_finetune_load(self): + if not paddle.framework.use_pir_api(): + return model_path = os.path.join( self.temp_dir.name, "test_jit_save_load_finetune_load/model" ) @@ -1830,7 +1834,6 @@ def test_save_load_finetune_load(self): self.assertTrue(float((result_00 - result_10).abs().max()) < 1e-5) self.assertTrue(float((result_01 - result_11).abs().max()) < 1e-5) -''' # NOTE(weixin): When there are multiple test functions in an diff --git a/test/deprecated/legacy_test/test_kldiv_loss_op.py b/test/legacy_test/test_kldiv_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_kldiv_loss_op.py rename to test/legacy_test/test_kldiv_loss_op.py diff --git a/test/deprecated/legacy_test/test_kron_op.py b/test/legacy_test/test_kron_op.py similarity index 100% rename from test/deprecated/legacy_test/test_kron_op.py rename to test/legacy_test/test_kron_op.py diff --git a/test/deprecated/legacy_test/test_kthvalue_op.py b/test/legacy_test/test_kthvalue_op.py similarity index 100% rename from test/deprecated/legacy_test/test_kthvalue_op.py rename to test/legacy_test/test_kthvalue_op.py diff --git a/test/deprecated/legacy_test/test_l1_norm_op.py b/test/legacy_test/test_l1_norm_op.py similarity index 100% rename from test/deprecated/legacy_test/test_l1_norm_op.py rename to test/legacy_test/test_l1_norm_op.py diff --git a/test/deprecated/legacy_test/test_label_smooth_op.py b/test/legacy_test/test_label_smooth_op.py similarity index 100% rename from test/deprecated/legacy_test/test_label_smooth_op.py rename to test/legacy_test/test_label_smooth_op.py diff --git a/test/deprecated/legacy_test/test_layer_norm_op.py b/test/legacy_test/test_layer_norm_op.py similarity index 73% rename from test/deprecated/legacy_test/test_layer_norm_op.py rename to test/legacy_test/test_layer_norm_op.py index 29e129781bfe0..2fd1eb2b1a747 100644 --- a/test/deprecated/legacy_test/test_layer_norm_op.py +++ b/test/legacy_test/test_layer_norm_op.py @@ -21,7 +21,6 @@ import paddle import paddle.nn.functional as F -from paddle import base from paddle.base import Program, core, program_guard from paddle.static.amp.fp16_utils import _keep_layer_norm_scale_bias_to_fp32 @@ -580,268 +579,6 @@ def initConfig(self): self.check_pir = True -class TestLayerNormOp(unittest.TestCase): - def setUp(self): - self.use_cudnn = True - paddle.enable_static() - - def __assert_close(self, tensor, np_array, msg, atol=1e-4): - np.testing.assert_allclose( - np.array(tensor).flatten(), - np_array.flatten(), - rtol=1e-3, - atol=atol, - err_msg=msg, - ) - - def check_forward_backward( - self, - shape, - begin_norm_axis, - has_scale=True, - has_bias=True, - y_grad_scale=1.0, - use_mkldnn=False, - ): - def test_with_place( - place, shape, begin_norm_axis, use_mkldnn=use_mkldnn - ): - # attr - epsilon = 0.00001 - x_shape = shape - D = reduce(mul, x_shape[begin_norm_axis : len(x_shape)], 1) - scale_shape = [D] - - np.random.seed(123) - x = np.random.random_sample(x_shape).astype(np.float32) - scale = ( - np.random.random_sample(scale_shape).astype(np.float32) - if has_scale - else None - ) - bias = ( - np.random.random_sample(scale_shape).astype(np.float32) - if has_bias - else None - ) - y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( - np.float32 - ) - - # reference forward & backward - y, mean, variance = _reference_layer_norm_naive( - x, scale, bias, epsilon, begin_norm_axis - ) - x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( - x, y_grad, scale, bias, mean, variance, begin_norm_axis - ) - - var_dict = locals() - var_dict['y@GRAD'] = y_grad - var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD'] - if has_scale: - var_names += ['scale'] - if has_bias: - var_names += ['bias'] - ground_truth = {name: var_dict[name] for name in var_names} - - program = base.Program() - with base.program_guard(program): - block = program.global_block() - for name in ground_truth: - block.create_var( - name=name, - dtype='float32', - shape=ground_truth[name].shape, - ) - inputs = {"X": block.var('x')} - fetch_list = [ - 'y', - 'mean', - 'variance', - 'x@GRAD', - ] - if has_scale: - inputs["Scale"] = block.var('scale') - fetch_list += ['scale@GRAD'] - if has_bias: - inputs["Bias"] = block.var('bias') - fetch_list += ['bias@GRAD'] - layer_norm_op = block.append_op( - type="layer_norm", - inputs=inputs, - outputs={ - "Y": block.var('y'), - "Mean": block.var('mean'), # share the same memory - "Variance": block.var( - 'variance' - ), # share the same memory - }, - attrs={ - "epsilon": epsilon, - "begin_norm_axis": begin_norm_axis, - "use_mkldnn": use_mkldnn, - }, - ) - # generate backward op_desc - grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( - layer_norm_op.desc, set(), [] - ) - grad_op_desc = grad_op_desc_list[0] - new_op_desc = block.desc.append_op() - new_op_desc.copy_from(grad_op_desc) - for var_name in grad_op_desc.output_arg_names(): - block.desc.var(var_name.encode("ascii")) - grad_op_desc.infer_var_type(block.desc) - grad_op_desc.infer_shape(block.desc) - for arg in grad_op_desc.output_arg_names(): - grad_var = block.desc.find_var(arg.encode("ascii")) - grad_var.set_dtype(core.VarDesc.VarType.FP32) - - program._sync_with_cpp() - exe = base.Executor(place) - name_list = ['x', 'y@GRAD'] - if has_scale: - name_list += ['scale'] - if has_bias: - name_list += ['bias'] - - out = exe.run( - program, - feed={name: var_dict[name] for name in name_list}, - fetch_list=fetch_list, - ) - # print(y) - # print(out[0]) - self.__assert_close(y, out[0], "y") - self.__assert_close(mean, out[1], "mean") - self.__assert_close(variance, out[2], "variance", 1e-3) - self.__assert_close(x_grad, out[3], "x_grad") - if has_scale: - self.__assert_close( - scale_grad, - out[fetch_list.index('scale@GRAD')], - "scale_grad", - 1e-3, - ) - if has_bias: - self.__assert_close( - bias_grad, - out[fetch_list.index('bias@GRAD')], - "bias_grad", - ) - - places = [core.CPUPlace()] - if ( - core.is_compiled_with_cuda() - and core.op_support_gpu("layer_norm") - and self.use_cudnn - ): - places.append(core.CUDAPlace(0)) - - for place in places: - test_with_place(place, shape, begin_norm_axis) - - def test_check_forward_backward_with_scale_and_bias(self): - self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) - self.check_forward_backward(shape=[1, 3, 4, 5], begin_norm_axis=1) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=True, - ) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=True, - has_bias=False, - ) - self.check_forward_backward( - shape=[2, 3, 4, 5], - begin_norm_axis=1, - has_scale=False, - has_bias=False, - ) - self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) - self.check_forward_backward( - shape=[92, 513, 129], begin_norm_axis=2, y_grad_scale=0.1 - ) - self.check_forward_backward(shape=[3, 34, 1134], begin_norm_axis=2) - self.check_forward_backward(shape=[3, 2, 1133], begin_norm_axis=2) - self.check_forward_backward( - shape=[92, 513, 1134], begin_norm_axis=2, y_grad_scale=0.1 - ) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=True, - y_grad_scale=0.1, - ) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=True, - has_bias=False, - y_grad_scale=0.1, - ) - self.check_forward_backward( - shape=[92, 513, 1134], - begin_norm_axis=2, - has_scale=False, - has_bias=False, - y_grad_scale=0.1, - ) - self.check_forward_backward( - shape=[512, 1024], begin_norm_axis=1, has_scale=True, has_bias=True - ) - self.check_forward_backward( - shape=[1, 128, 256, 256], - begin_norm_axis=3, - has_scale=True, - has_bias=True, - ) - self.check_forward_backward( - shape=[1, 256, 384], - begin_norm_axis=2, - has_scale=True, - has_bias=True, - ) - - -class TestLayerNormAPI(unittest.TestCase): - def test_case(self): - x = paddle.static.data(name='x', shape=[64, 32, 256], dtype='float32') - x = paddle.static.nn.layer_norm( - x, - scale=True, - shift=True, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - ) - x = paddle.static.nn.layer_norm( - x, - scale=False, - shift=False, - begin_norm_axis=1, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - ) - x = paddle.static.nn.layer_norm( - x, - scale=True, - shift=True, - begin_norm_axis=1, - epsilon=1e-05, - param_attr="scale", - bias_attr="shift", - ) - - class TestDygraphLayerNormAPIError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): diff --git a/test/deprecated/legacy_test/test_lerp_op.py b/test/legacy_test/test_lerp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_lerp_op.py rename to test/legacy_test/test_lerp_op.py diff --git a/test/deprecated/legacy_test/test_lgamma_op.py b/test/legacy_test/test_lgamma_op.py similarity index 100% rename from test/deprecated/legacy_test/test_lgamma_op.py rename to test/legacy_test/test_lgamma_op.py diff --git a/test/deprecated/legacy_test/test_linear_interp_op.py b/test/legacy_test/test_linear_interp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_linear_interp_op.py rename to test/legacy_test/test_linear_interp_op.py diff --git a/test/deprecated/legacy_test/test_linear_interp_v2_op.py b/test/legacy_test/test_linear_interp_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_linear_interp_v2_op.py rename to test/legacy_test/test_linear_interp_v2_op.py diff --git a/test/legacy_test/test_listen_and_serv.sh b/test/legacy_test/test_listen_and_serv.sh index d9d64e4dfa693..62cf4c359f0b1 100644 --- a/test/legacy_test/test_listen_and_serv.sh +++ b/test/legacy_test/test_listen_and_serv.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,7 +22,7 @@ pid=$! flag1=test_handle_signal_in_serv_op.flag flag2=test_list_and_serv_run_empty_optimize_block.flag -for i in {1..10}; do +for i in {1..10}; do sleep 6s if [[ -f "${flag1}" && -f "${flag2}" ]]; then echo "test_listen_and_serv_op exit" @@ -34,8 +34,8 @@ echo "test_listen_and_serv_op.log context" cat test_listen_and_serv_op.log #display system context -for i in {1..4}; do - sleep 2 +for i in {1..4}; do + sleep 2 top -b -n1 | head -n 50 echo "${i}" top -b -n1 -i | head -n 50 @@ -54,8 +54,8 @@ kill -9 $pid echo "after kill ${pid}" #display system context -for i in {1..4}; do - sleep 2 +for i in {1..4}; do + sleep 2 top -b -n1 | head -n 50 top -b -n1 -i | head -n 50 nvidia-smi diff --git a/test/deprecated/legacy_test/test_load_state_dict_from_old_format.py b/test/legacy_test/test_load_state_dict_from_old_format.py similarity index 100% rename from test/deprecated/legacy_test/test_load_state_dict_from_old_format.py rename to test/legacy_test/test_load_state_dict_from_old_format.py diff --git a/test/deprecated/legacy_test/test_log_loss_op.py b/test/legacy_test/test_log_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_log_loss_op.py rename to test/legacy_test/test_log_loss_op.py diff --git a/test/deprecated/legacy_test/test_log_softmax.py b/test/legacy_test/test_log_softmax.py similarity index 100% rename from test/deprecated/legacy_test/test_log_softmax.py rename to test/legacy_test/test_log_softmax.py diff --git a/test/deprecated/legacy_test/test_logsumexp.py b/test/legacy_test/test_logsumexp.py similarity index 100% rename from test/deprecated/legacy_test/test_logsumexp.py rename to test/legacy_test/test_logsumexp.py diff --git a/test/deprecated/legacy_test/test_lr_scheduler.py b/test/legacy_test/test_lr_scheduler.py similarity index 100% rename from test/deprecated/legacy_test/test_lr_scheduler.py rename to test/legacy_test/test_lr_scheduler.py diff --git a/test/deprecated/legacy_test/test_lrn_op.py b/test/legacy_test/test_lrn_op.py similarity index 100% rename from test/deprecated/legacy_test/test_lrn_op.py rename to test/legacy_test/test_lrn_op.py diff --git a/test/legacy_test/test_lstm_cudnn_op.py b/test/legacy_test/test_lstm_cudnn_op.py index ade1f61c0d5a9..3362297747b63 100644 --- a/test/legacy_test/test_lstm_cudnn_op.py +++ b/test/legacy_test/test_lstm_cudnn_op.py @@ -35,7 +35,7 @@ class RandomWeight: def __init__(self): pass - def updata_weight(self, hidden_size, input_size, dtype): + def update_weight(self, hidden_size, input_size, dtype): std = 1.0 / math.sqrt(hidden_size) self.hidden_size = hidden_size self.input_size = input_size @@ -432,7 +432,7 @@ def setUp(self): input[9][3:][:] = 0 input[8][4:][:] = 0 - weight.updata_weight(hidden_size, input_size, self.dtype) + weight.update_weight(hidden_size, input_size, self.dtype) rnn1 = LSTM( input_size, hidden_size, diff --git a/test/deprecated/legacy_test/test_lstm_op.py b/test/legacy_test/test_lstm_op.py similarity index 100% rename from test/deprecated/legacy_test/test_lstm_op.py rename to test/legacy_test/test_lstm_op.py diff --git a/test/deprecated/legacy_test/test_lu_unpack_op.py b/test/legacy_test/test_lu_unpack_op.py similarity index 100% rename from test/deprecated/legacy_test/test_lu_unpack_op.py rename to test/legacy_test/test_lu_unpack_op.py diff --git a/test/deprecated/legacy_test/test_masked_scatter.py b/test/legacy_test/test_masked_scatter.py similarity index 100% rename from test/deprecated/legacy_test/test_masked_scatter.py rename to test/legacy_test/test_masked_scatter.py diff --git a/test/deprecated/legacy_test/test_matmul_op.py b/test/legacy_test/test_matmul_op.py similarity index 100% rename from test/deprecated/legacy_test/test_matmul_op.py rename to test/legacy_test/test_matmul_op.py diff --git a/test/deprecated/legacy_test/test_matmul_v2_op.py b/test/legacy_test/test_matmul_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_matmul_v2_op.py rename to test/legacy_test/test_matmul_v2_op.py diff --git a/test/deprecated/legacy_test/test_maxout_op.py b/test/legacy_test/test_maxout_op.py similarity index 100% rename from test/deprecated/legacy_test/test_maxout_op.py rename to test/legacy_test/test_maxout_op.py diff --git a/test/deprecated/legacy_test/test_meshgrid_op.py b/test/legacy_test/test_meshgrid_op.py similarity index 80% rename from test/deprecated/legacy_test/test_meshgrid_op.py rename to test/legacy_test/test_meshgrid_op.py index b72f51cd04144..869e2c4e88281 100644 --- a/test/deprecated/legacy_test/test_meshgrid_op.py +++ b/test/legacy_test/test_meshgrid_op.py @@ -42,16 +42,28 @@ def init_data_type(self): self.dtype = np.float64 def test_check_output(self): - self.check_output(check_prim=True, check_pir=True, check_prim_pir=True) + if self.dtype == np.complex64 or self.dtype == np.complex128: + self.check_output(check_pir=True) + else: + self.check_output( + check_prim=True, check_pir=True, check_prim_pir=True + ) def test_check_grad(self): - self.check_grad( - ['x0'], - ['out0', 'out1'], - check_prim=True, - check_pir=True, - check_prim_pir=True, - ) + if self.dtype == np.complex64 or self.dtype == np.complex128: + self.check_grad( + ['x0'], + ['out0', 'out1'], + check_pir=True, + ) + else: + self.check_grad( + ['x0'], + ['out0', 'out1'], + check_prim=True, + check_pir=True, + check_prim_pir=True, + ) def init_inputs_and_outputs(self): self.shape = self.get_x_shape() @@ -91,6 +103,22 @@ def init_data_type(self): self.dtype = np.float16 +class TestMeshgridOp2Complex64(TestMeshgridOp): + def get_x_shape(self): + return [100, 300] + + def init_data_type(self): + self.dtype = np.complex64 + + +class TestMeshgridOp2Complex128(TestMeshgridOp): + def get_x_shape(self): + return [100, 300] + + def init_data_type(self): + self.dtype = np.complex128 + + @unittest.skipIf( not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), @@ -336,6 +364,70 @@ def test_api_with_dygraph_tuple_input(self): np.testing.assert_array_equal(res_4.shape, [100, 200]) +class TestMeshgridOpComplexStatic(unittest.TestCase): + @test_with_pir_api + def test_tuple_input(self): + input_1 = np.random.randint( + 0, + 100, + [ + 100, + ], + ).astype('complex64') + input_2 = np.random.randint( + 0, + 100, + [ + 200, + ], + ).astype('complex64') + + out_1 = np.reshape(input_1, [100, 1]) + out_1 = np.broadcast_to(out_1, [100, 200]) + out_2 = np.reshape(input_2, [1, 200]) + out_2 = np.broadcast_to(out_2, [100, 200]) + + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(shape=[100], dtype='complex64', name='x') + y = paddle.static.data(shape=[200], dtype='complex64', name='y') + + exe = base.Executor(place=base.CPUPlace()) + grid_x, grid_y = paddle.tensor.meshgrid((x, y)) + res_1, res_2 = exe.run( + paddle.static.default_main_program(), + feed={'x': input_1, 'y': input_2}, + fetch_list=[grid_x, grid_y], + ) + np.testing.assert_array_equal(res_1, out_1) + np.testing.assert_array_equal(res_2, out_2) + + +class TestMeshgridOpComplexDygraph(unittest.TestCase): + def test_api_with_dygraph_tuple_input(self): + input_3 = np.random.randint( + 0, + 100, + [ + 100, + ], + ).astype('complex64') + input_4 = np.random.randint( + 0, + 100, + [ + 200, + ], + ).astype('complex64') + + with base.dygraph.guard(): + tensor_3 = paddle.to_tensor(input_3) + tensor_4 = paddle.to_tensor(input_4) + res_3, res_4 = paddle.tensor.meshgrid((tensor_3, tensor_4)) + + np.testing.assert_array_equal(res_3.shape, [100, 200]) + np.testing.assert_array_equal(res_4.shape, [100, 200]) + + class TestMeshGrid_ZeroDim(TestMeshgridOp): def init_inputs_and_outputs(self): self.shape = self.get_x_shape() diff --git a/test/deprecated/legacy_test/test_modified_huber_loss_op.py b/test/legacy_test/test_modified_huber_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_modified_huber_loss_op.py rename to test/legacy_test/test_modified_huber_loss_op.py diff --git a/test/deprecated/legacy_test/test_mul_op.py b/test/legacy_test/test_mul_op.py similarity index 100% rename from test/deprecated/legacy_test/test_mul_op.py rename to test/legacy_test/test_mul_op.py diff --git a/test/deprecated/legacy_test/test_multi_dot_op.py b/test/legacy_test/test_multi_dot_op.py similarity index 100% rename from test/deprecated/legacy_test/test_multi_dot_op.py rename to test/legacy_test/test_multi_dot_op.py diff --git a/test/deprecated/legacy_test/test_mv_op.py b/test/legacy_test/test_mv_op.py similarity index 100% rename from test/deprecated/legacy_test/test_mv_op.py rename to test/legacy_test/test_mv_op.py diff --git a/test/legacy_test/test_nce.py b/test/legacy_test/test_nce.py new file mode 100644 index 0000000000000..c8a57ee5be488 --- /dev/null +++ b/test/legacy_test/test_nce.py @@ -0,0 +1,154 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest + + +def nce( + input, weight, bias, sample_weight, labels, num_classes, num_sample_class +): + samples = [] + sample_labels = [] + batch_size = input.shape[0] + num_true_class = labels.shape[1] + for i in range(batch_size): + w = 1 if sample_weight is None else sample_weight[i] + for label in labels[i]: + samples.append((i, label, True, w)) + sample_labels.append(label) + for num in range(num_sample_class): + samples.append((i, num, False, w)) + sample_labels.append(num) + # forward bias + sample_out = np.zeros(len(samples)).astype(np.float32) + if bias is not None: + for i in range(len(samples)): + sample_out[i] = bias[samples[i][1]] + # forward weight + for i in range(len(samples)): + sample_out[i] += np.dot(input[samples[i][0]], weight[samples[i][1]]) + + # forward activation + sample_out = 1.0 / (1.0 + np.exp(-sample_out)) + # forward cost + out = np.zeros(batch_size).astype(np.float32) + b = 1.0 / num_classes * num_sample_class + for i in range(len(samples)): + o = sample_out[i] + cost = -np.log(o / (o + b)) if samples[i][2] else -np.log(b / (o + b)) + out[samples[i][0]] += cost * samples[i][3] + return ( + out[:, np.newaxis], + np.array(sample_out).reshape( + batch_size, num_sample_class + num_true_class + ), + np.array(sample_labels).reshape( + batch_size, num_sample_class + num_true_class + ), + ) + + +class TestNCE(OpTest): + def generate_data( + self, + dim, + batch_size, + num_classes, + num_true_class, + num_neg_samples, + is_sparse, + ): + input = np.random.randn(batch_size, dim).astype(np.float32) + weight = np.random.randn(num_classes, dim).astype(np.float32) + bias = np.random.randn(num_classes).astype(np.float32) + sample_weight = np.random.randn(batch_size).astype(np.float32) + labels = np.random.randint( + 0, num_classes, (batch_size, num_true_class) + ).astype("int64") + self.attrs = { + 'num_total_classes': num_classes, + 'num_neg_samples': num_neg_samples, + 'custom_neg_classes': list(range(num_neg_samples)), + 'seed': 0, + 'sampler': 0, + 'is_sparse': is_sparse, + 'is_test': self.is_test, + } + self.inputs = { + 'Input': input, + 'Label': labels, + 'Weight': weight, + 'Bias': bias, + 'SampleWeight': sample_weight, + } + + def set_is_test(self): + self.is_test = False + + def set_data(self): + self.generate_data(5, 25, 100, 1, 2, False) + + def compute(self): + out = nce( + self.inputs['Input'], + self.inputs['Weight'], + self.inputs['Bias'], + self.inputs['SampleWeight'], + self.inputs['Label'], + self.attrs['num_total_classes'], + self.attrs['num_neg_samples'], + ) + if self.is_test: + self.outputs = {'Cost': out[0]} + else: + self.outputs = { + 'Cost': out[0], + 'SampleLogits': out[1], + 'SampleLabels': out[2], + } + + def setUp(self): + self.op_type = 'nce' + self.set_is_test() + self.set_data() + self.compute() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ["Input", "Weight", "Bias"], "Cost", max_relative_error=0.02 + ) + + +class TestNCECase1Tensor(TestNCE): + def set_data(self): + self.generate_data(10, 20, 100, 2, 5, False) + + +class TestNCETensorIsTest(TestNCE): + # if is_test = True, there's no need to calculate grad + def set_is_test(self): + self.is_test = True + + def test_check_grad(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_nearest_interp_op.py b/test/legacy_test/test_nearest_interp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_nearest_interp_op.py rename to test/legacy_test/test_nearest_interp_op.py diff --git a/test/deprecated/legacy_test/test_nearest_interp_v2_op.py b/test/legacy_test/test_nearest_interp_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_nearest_interp_v2_op.py rename to test/legacy_test/test_nearest_interp_v2_op.py diff --git a/test/deprecated/legacy_test/test_ops_nms.py b/test/legacy_test/test_ops_nms.py similarity index 100% rename from test/deprecated/legacy_test/test_ops_nms.py rename to test/legacy_test/test_ops_nms.py diff --git a/test/legacy_test/test_optimizer.py b/test/legacy_test/test_optimizer.py new file mode 100644 index 0000000000000..63273c2eb9928 --- /dev/null +++ b/test/legacy_test/test_optimizer.py @@ -0,0 +1,169 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy +import numpy as np + +import paddle +from paddle import base +from paddle.base import core +from paddle.base.framework import ( + convert_np_dtype_to_dtype_, +) +from paddle.io import Dataset + + +class TestOptimizerDtype(unittest.TestCase): + ''' + The dtype of optimizer should be inferred by parameters, and the learning rate + is cteated with the same dtype. + ''' + + def check_with_dtype(self, dtype): + class MyLayer(paddle.nn.Layer): + def __init__(self, dtype): + super().__init__() + self._w = self.create_parameter([2, 3], dtype=dtype) + self._b = self.create_parameter([2, 3], dtype=dtype) + + def forward(self, x): + return x * self._w + self._b + + with paddle.base.dygraph.guard(): + model = MyLayer(dtype) + x = paddle.rand([10, 2, 3], dtype=dtype) + loss = model(x) + adam = paddle.optimizer.Adam(parameters=model.parameters()) + loss.backward() + adam.step() + self.assertEqual(adam._dtype, convert_np_dtype_to_dtype_(dtype)) + + def test_float64(self): + self.check_with_dtype('float64') + + def test_float32(self): + self.check_with_dtype('float32') + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or paddle.device.cuda.get_device_capability()[0] < 7.0, + "run test when gpu's compute capability is at least 7.0.", +) +class TestMasterWeightSaveForFP16(unittest.TestCase): + ''' + For Amp-O2, some optimizer(Momentum, Adam ...) will create master weights for parameters to improve the accuracy. + Master weights will be saved by optimizer::state_dict. + ''' + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.temp_dir.cleanup() + + def check_with_opt_state_dict(self, use_save_load=True): + paddle.seed(100) + numpy.random.seed(100) + + class SimpleNet(paddle.nn.Layer): + def __init__(self, input_size, output_size): + super().__init__() + self.linears = paddle.nn.LayerList( + [ + paddle.nn.Linear(input_size, output_size) + for i in range(1) + ] + ) + + def forward(self, x): + for i, l in enumerate(self.linears): + x = self.linears[i](x) + return x + + input_size = 2 # 设为较大的值 + output_size = 2 # 设为较大的值 + batch_size = 2 # batch_size 为8的倍数 + nums_batch = 10 + + class RandomDataset(Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + data = numpy.random.random([input_size]).astype('float16') + label = numpy.random.random([output_size]).astype('float16') + return data, label + + def __len__(self): + return self.num_samples + + dataset = RandomDataset(nums_batch * batch_size) + loader = paddle.io.DataLoader( + dataset, + batch_size=batch_size, + shuffle=False, + drop_last=True, + num_workers=0, + ) + + mse = paddle.nn.MSELoss() + model = SimpleNet(input_size, output_size) # 定义模型 + optimizer = paddle.optimizer.Momentum( + learning_rate=0.0001, + parameters=model.parameters(), + multi_precision=True, + ) # 定义优化器 + scaler = paddle.amp.GradScaler(init_loss_scaling=1024) + model = paddle.amp.decorate(models=model, level='O2') + + for i, (data, label) in enumerate(loader): + with paddle.amp.auto_cast(level='O2'): + output = model(data) + loss = mse(output, label) + scaled = scaler.scale(loss) + scaled.backward() + scaler.step(optimizer) + scaler.update() + optimizer.clear_grad(set_to_zero=False) + + if use_save_load and i == 5: + model_path = os.path.join(self.temp_dir.name, "model.pdparams") + optimizer_path = os.path.join(self.temp_dir.name, "opt.pdopt") + paddle.save(model.state_dict(), model_path) + paddle.save(optimizer.state_dict(), optimizer_path) + model.set_state_dict(paddle.load(model_path)) + optimizer.set_state_dict(paddle.load(optimizer_path)) + + return loss.numpy() + + def test_with_state_dict(self): + if core.is_compiled_with_cuda(): + with base.dygraph.guard(): + out_use_state_dict = self.check_with_opt_state_dict( + use_save_load=True + ) + out_no_state_dict = self.check_with_opt_state_dict( + use_save_load=False + ) + np.testing.assert_array_equal(out_use_state_dict, out_no_state_dict) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/test/deprecated/legacy_test/test_overlap_add_op.py b/test/legacy_test/test_overlap_add_op.py similarity index 100% rename from test/deprecated/legacy_test/test_overlap_add_op.py rename to test/legacy_test/test_overlap_add_op.py diff --git a/test/deprecated/legacy_test/test_pad3d_op.py b/test/legacy_test/test_pad3d_op.py similarity index 100% rename from test/deprecated/legacy_test/test_pad3d_op.py rename to test/legacy_test/test_pad3d_op.py diff --git a/test/deprecated/legacy_test/test_paddle_save_load_binary.py b/test/legacy_test/test_paddle_save_load_binary.py similarity index 100% rename from test/deprecated/legacy_test/test_paddle_save_load_binary.py rename to test/legacy_test/test_paddle_save_load_binary.py diff --git a/test/legacy_test/test_parallel_dygraph_dataparallel.py b/test/legacy_test/test_parallel_dygraph_dataparallel.py index 648f6ddd97ef2..166687ce098e4 100644 --- a/test/legacy_test/test_parallel_dygraph_dataparallel.py +++ b/test/legacy_test/test_parallel_dygraph_dataparallel.py @@ -66,7 +66,7 @@ def start_local_trainers_cpu( proc_env = { "PADDLE_DISTRI_BACKEND": "gloo", "PADDLE_TRAINER_ID": "%d" % rank_id, - "PADDLE_CURRENT_ENDPOINT": "%s" % endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{endpoint}", "PADDLE_TRAINERS_NUM": "%d" % n_rank, "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints), } @@ -118,10 +118,11 @@ def start_local_trainers( procs = [] for t in pod.trainers: proc_env = { - f"FLAGS_selected_{accelerator_type}s": "%s" - % ",".join([str(g) for g in t.gpus]), + f"FLAGS_selected_{accelerator_type}s": "{}".format( + ",".join([str(g) for g in t.gpus]) + ), "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}", "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), "FLAGS_dynamic_static_unified_comm": "0", diff --git a/test/legacy_test/test_parallel_dygraph_dataparallel_cpuonly.py b/test/legacy_test/test_parallel_dygraph_dataparallel_cpuonly.py index 5a944284414bf..cd1b89e064d6e 100644 --- a/test/legacy_test/test_parallel_dygraph_dataparallel_cpuonly.py +++ b/test/legacy_test/test_parallel_dygraph_dataparallel_cpuonly.py @@ -66,7 +66,7 @@ def start_local_trainers( for t in pod.trainers: proc_env = { "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}", "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), "MASTER_ADDR": "127.0.0.1", diff --git a/test/deprecated/legacy_test/test_partial_concat_op.py b/test/legacy_test/test_partial_concat_op.py similarity index 100% rename from test/deprecated/legacy_test/test_partial_concat_op.py rename to test/legacy_test/test_partial_concat_op.py diff --git a/test/deprecated/legacy_test/test_partial_sum_op.py b/test/legacy_test/test_partial_sum_op.py similarity index 100% rename from test/deprecated/legacy_test/test_partial_sum_op.py rename to test/legacy_test/test_partial_sum_op.py diff --git a/test/deprecated/legacy_test/test_pixel_shuffle_op.py b/test/legacy_test/test_pixel_shuffle_op.py similarity index 100% rename from test/deprecated/legacy_test/test_pixel_shuffle_op.py rename to test/legacy_test/test_pixel_shuffle_op.py diff --git a/test/legacy_test/test_pool1d_api.py b/test/legacy_test/test_pool1d_api.py index 6fac04f468ebe..fce76e98f9f8e 100644 --- a/test/legacy_test/test_pool1d_api.py +++ b/test/legacy_test/test_pool1d_api.py @@ -115,6 +115,62 @@ def avg_pool1D_forward_naive( return out +def lp_pool1D_forward_naive( + x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + data_format='NCL', + norm_type=None, +): + assert norm_type is not None + if x.dtype == np.float16: + x = x.astype(np.float32) + if data_format == "NCL": + N, C, L = x.shape + else: + N, L, C = x.shape + + if global_pool == 1: + ksize = [L] + L_out = ( + (L - ksize[0] + 2 * paddings[0] + strides[0] - 1) // strides[0] + 1 + if ceil_mode + else (L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 + ) + + if data_format == "NCL": + out = np.zeros((N, C, L_out)) + else: + out = np.zeros((N, L_out, C)) + for i in range(L_out): + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L)) + if data_format == "NCL": + x_masked = x[:, :, r_start:r_end] + else: + x_masked = x[:, r_start:r_end, :] + if data_format == "NCL": + if norm_type == float('inf'): + out[:, :, i] = np.max(x_masked, axis=(2)) + else: + out[:, :, i] = np.power( + np.sum(np.power(x_masked, norm_type), axis=(2)), + 1 / norm_type, + ) + else: + if norm_type == float('inf'): + out[:, i, :] = np.max(x_masked, axis=(1)) + else: + out[:, i, :] = np.power( + np.sum(np.power(x_masked, norm_type), axis=(1)), + 1 / norm_type, + ) + return out + + class TestPool1D_API(unittest.TestCase): def setUp(self): np.random.seed(123) @@ -296,6 +352,270 @@ def check_avg_dygraph_padding_same(self, place): np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + @test_with_pir_api + def check_lp_static_results(self, place): + with paddle.static.program_guard(paddle.static.Program()): + input = paddle.static.data( + name="input", shape=[2, 3, 32], dtype="float32" + ) + result = F.lp_pool1d( + input, norm_type=2, kernel_size=2, stride=2, padding=0 + ) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[0], + ceil_mode=False, + norm_type=2, + ) + + exe = paddle.static.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose(fetches[0], result_np, rtol=1e-05) + + @test_with_pir_api + def check_lp_static_results_fp16(self, place): + if core.is_compiled_with_cuda(): + with paddle.static.program_guard(paddle.static.Program()): + input = paddle.static.data( + name="input", shape=[2, 3, 32], dtype="float16" + ) + result = F.lp_pool1d( + input, norm_type=3, kernel_size=2, stride=2, padding=0 + ) + + input_np = np.random.random([2, 3, 32]).astype("float16") + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[0], + ceil_mode=False, + norm_type=3, + ) + + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose( + fetches[0], result_np.astype(np.float16), rtol=1e-05 + ) + + @test_with_pir_api + def check_lp_static_results_fp64(self, place): + if core.is_compiled_with_cuda(): + with paddle.static.program_guard(paddle.static.Program()): + input = paddle.static.data( + name="input", shape=[2, 3, 32], dtype="float64" + ) + result = F.lp_pool1d( + input, norm_type=3, kernel_size=2, stride=2, padding=0 + ) + + input_np = np.random.random([2, 3, 32]).astype("float64") + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[0], + ceil_mode=False, + norm_type=3, + ) + + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose(fetches[0], result_np, rtol=1e-05) + + def check_lp_dygraph_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, norm_type=4, kernel_size=3, stride=2, padding=[1] + ) + + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[3], + strides=[2], + paddings=[1], + norm_type=4, + ) + + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool1d_dg = paddle.nn.layer.LPPool1D( + norm_type=4, kernel_size=3, stride=2, padding=1 + ) + result = lp_pool1d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_float16_results(self, place): + if isinstance(place, base.CUDAPlace): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float16") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, norm_type=5, kernel_size=5, stride=3, padding=[0] + ) + + result_np = lp_pool1D_forward_naive( + input_np, ksize=[5], strides=[3], paddings=[0], norm_type=5 + ) + + np.testing.assert_allclose( + result.numpy(), result_np.astype(np.float16), rtol=1e-05 + ) + + lp_pool1d_dg = paddle.nn.layer.LPPool1D( + norm_type=5, kernel_size=5, stride=3, padding=0 + ) + result = lp_pool1d_dg(input) + np.testing.assert_allclose( + result.numpy(), result_np.astype(np.float16), rtol=1e-05 + ) + + def check_lp_dygraph_float64_results(self, place): + if isinstance(place, base.CUDAPlace): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float64") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, norm_type=5, kernel_size=5, stride=3, padding=[0] + ) + + result_np = lp_pool1D_forward_naive( + input_np, ksize=[5], strides=[3], paddings=[0], norm_type=5 + ) + + np.testing.assert_allclose( + result.numpy(), result_np, rtol=1e-05 + ) + + lp_pool1d_dg = paddle.nn.layer.LPPool1D( + norm_type=5, kernel_size=5, stride=3, padding=0 + ) + result = lp_pool1d_dg(input) + np.testing.assert_allclose( + result.numpy(), result_np, rtol=1e-05 + ) + + def check_lp_dygraph_ceil_mode_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, + norm_type=7, + kernel_size=2, + stride=2, + padding=[1], + ceil_mode=True, + ) + + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[1], + ceil_mode=True, + norm_type=7, + ) + + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool1d_dg = paddle.nn.LPPool1D( + norm_type=7, + kernel_size=2, + stride=None, + ceil_mode=True, + padding=1, + ) + + result = lp_pool1d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_data_format_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 32, 3]).astype("float32") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, + norm_type=7, + kernel_size=2, + stride=2, + padding=[1], + ceil_mode=True, + data_format="NLC", + ) + + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[1], + ceil_mode=True, + data_format="NLC", + norm_type=7, + ) + + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool1d_dg = paddle.nn.LPPool1D( + norm_type=7, + kernel_size=2, + stride=None, + data_format="NLC", + padding=1, + ) + + result = lp_pool1d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_inf_norm_type(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = paddle.to_tensor(input_np) + result = F.lp_pool1d( + input, + norm_type=float('inf'), + kernel_size=2, + stride=2, + padding=[1], + ceil_mode=True, + ) + + result_np = lp_pool1D_forward_naive( + input_np, + ksize=[2], + strides=[2], + paddings=[1], + ceil_mode=True, + norm_type=float("inf"), + ) + + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool1d_dg = paddle.nn.LPPool1D( + norm_type=float('inf'), kernel_size=2, stride=None, padding=1 + ) + + result = lp_pool1d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + def test_pool1d(self): for place in self.places: self.check_max_dygraph_results(place) @@ -306,6 +626,15 @@ def test_pool1d(self): self.check_avg_dygraph_padding_same(place) self.check_max_dygraph_return_index_results(place) self.check_avg_static_results_fp16(place) + self.check_lp_static_results(place) + self.check_lp_dygraph_results(place) + self.check_lp_static_results_fp16(place) + self.check_lp_static_results_fp64(place) + self.check_lp_dygraph_inf_norm_type(place) + self.check_lp_dygraph_float16_results(place) + self.check_lp_dygraph_float64_results(place) + self.check_lp_dygraph_ceil_mode_results(place) + self.check_lp_dygraph_data_format_results(place) class TestPool1DError_API(unittest.TestCase): diff --git a/test/legacy_test/test_pool2d_api.py b/test/legacy_test/test_pool2d_api.py index ff4084d112301..f125bf7315a93 100644 --- a/test/legacy_test/test_pool2d_api.py +++ b/test/legacy_test/test_pool2d_api.py @@ -27,7 +27,7 @@ import paddle from paddle import base from paddle.base import core -from paddle.nn.functional import avg_pool2d, max_pool2d +from paddle.nn.functional import avg_pool2d, lp_pool2d, max_pool2d from paddle.pir_utils import test_with_pir_api @@ -360,6 +360,400 @@ def check_avg_divisor(self, place): result = avg_pool2d_dg(input) np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + def check_lp_static_results(self, place): + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + input = paddle.static.data( + name="input", shape=[2, 3, 128, 128], dtype="float32" + ) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=4, + stride=4, + ceil_mode=True, + ) + + input_np = np.random.random([2, 3, 128, 128]).astype("float32") + result_np = pool2D_forward_naive( + input_np, + ksize=[4, 4], + paddings=[0, 0], + strides=[4, 4], + ceil_mode=True, + norm_type=norm_type, + pool_type='lp', + ) + + exe = base.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose(fetches[0], result_np, rtol=1e-05) + + def check_lp_dygraph_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(input_np) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=2, + stride=1, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + paddings=[0, 0], + strides=[1, 1], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=2, + stride=1, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_results_norm_type_is_inf(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(input_np) + norm_type = np.inf + result = lp_pool2d( + input, + norm_type, + kernel_size=[2, 4], + stride=2, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 4], + paddings=[0, 0], + strides=[2, 2], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=[2, 4], + stride=2, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_results_norm_type_is_negative_inf(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(input_np) + norm_type = -np.inf + result = lp_pool2d( + input, + norm_type, + kernel_size=2, + stride=2, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + paddings=[0, 0], + strides=[2, 2], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=2, + stride=2, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_ceilmode_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(input_np) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=5, + stride=3, + ceil_mode=True, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[5, 5], + paddings=[0, 0], + strides=[3, 3], + ceil_mode=True, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=5, + stride=3, + ceil_mode=True, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_dygraph_nhwc_results(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(np.transpose(input_np, [0, 2, 3, 1])) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=2, + stride=2, + ceil_mode=False, + data_format="NHWC", + ) + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + paddings=[0, 0], + strides=[2, 2], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose( + np.transpose(result.numpy(), [0, 3, 1, 2]), + result_np, + rtol=1e-05, + ) + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=2, + stride=[2, 2], + ceil_mode=False, + data_format="NHWC", + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose( + np.transpose(result.numpy(), [0, 3, 1, 2]), + result_np, + rtol=1e-05, + ) + + def check_lp_dygraph_stride_is_none(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = paddle.to_tensor(input_np) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=2, + stride=None, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + paddings=[0, 0], + ksize=[2, 2], + strides=[2, 2], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=2, + stride=None, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + def check_lp_float16_static(self, place): + if isinstance(place, base.CUDAPlace): + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + input = paddle.static.data( + name="input", shape=[2, 3, 64, 64], dtype="float16" + ) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=4, + stride=[2, 4], + ceil_mode=True, + ) + + input_np = np.random.random([2, 3, 64, 64]).astype("float16") + result_np = pool2D_forward_naive( + input_np, + ksize=[4, 4], + paddings=[0, 0], + strides=[2, 4], + ceil_mode=True, + norm_type=norm_type, + pool_type='lp', + ) + + exe = base.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose( + fetches[0], result_np.astype(np.float16), rtol=1e-03 + ) + + def check_lp_float64_static(self, place): + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + input = paddle.static.data( + name="input", shape=[2, 3, 64, 64], dtype="float64" + ) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=5, + stride=3, + ceil_mode=True, + ) + + input_np = np.random.random([2, 3, 64, 64]).astype("float64") + result_np = pool2D_forward_naive( + input_np, + ksize=[5, 5], + paddings=[0, 0], + strides=[3, 3], + ceil_mode=True, + norm_type=norm_type, + pool_type='lp', + ) + + exe = base.Executor(place) + fetches = exe.run( + feed={"input": input_np}, + fetch_list=[result], + ) + np.testing.assert_allclose(fetches[0], result_np, rtol=1e-05) + + def check_lp_dygraph_float16(self, place): + if isinstance(place, base.CUDAPlace): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float16") + input = paddle.to_tensor(input_np) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=3, + stride=2, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[3, 3], + paddings=[0, 0], + strides=[2, 2], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose( + result.numpy(), result_np, rtol=1e-03 + ) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=3, + stride=2, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose( + result.numpy(), result_np.astype(np.float16), rtol=1e-03 + ) + + def check_lp_dygraph_float64(self, place): + with base.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float64") + input = paddle.to_tensor(input_np) + norm_type = 2 + result = lp_pool2d( + input, + norm_type, + kernel_size=5, + stride=3, + ceil_mode=False, + ) + + result_np = pool2D_forward_naive( + input_np, + ksize=[5, 5], + paddings=[0, 0], + strides=[3, 3], + ceil_mode=False, + norm_type=norm_type, + pool_type='lp', + ) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + lp_pool2d_dg = paddle.nn.layer.LPPool2D( + norm_type=norm_type, + kernel_size=5, + stride=3, + ceil_mode=False, + ) + result = lp_pool2d_dg(input) + np.testing.assert_allclose(result.numpy(), result_np, rtol=1e-05) + + @test_with_pir_api + def test_pool2d_static(self): + paddle.enable_static() + for place in self.places: + self.check_max_static_results(place) + self.check_avg_static_results(place) + self.check_lp_static_results(place) + self.check_lp_float64_static(place) + self.check_lp_float16_static(place) + paddle.disable_static() + def test_pool2d(self): for place in self.places: self.check_max_dygraph_results(place) @@ -371,14 +765,14 @@ def test_pool2d(self): self.check_max_dygraph_padding_results(place) self.check_max_dygraph_ceilmode_results(place) self.check_max_dygraph_nhwc_results(place) - - @test_with_pir_api - def test_pool2d_static(self): - paddle.enable_static() - for place in self.places: - self.check_max_static_results(place) - self.check_avg_static_results(place) - paddle.disable_static() + self.check_lp_dygraph_results(place) + self.check_lp_dygraph_stride_is_none(place) + self.check_lp_dygraph_ceilmode_results(place) + self.check_lp_dygraph_nhwc_results(place) + self.check_lp_dygraph_results_norm_type_is_inf(place) + self.check_lp_dygraph_results_norm_type_is_negative_inf(place) + self.check_lp_dygraph_float64(place) + self.check_lp_dygraph_float16(place) class TestPool2DError_API(unittest.TestCase): @@ -630,6 +1024,16 @@ def run_zero_tuple_stride(): self.assertRaises(ValueError, run_zero_tuple_stride) + def run_zero_norm_type(): + with base.dygraph.guard(): + array = np.array([1], dtype=np.float32) + x = paddle.to_tensor( + np.reshape(array, [1, 1, 1, 1]), dtype='float32' + ) + out = lp_pool2d(x, 0, 2) + + self.assertRaises(ValueError, run_zero_norm_type) + if __name__ == '__main__': unittest.main() diff --git a/test/legacy_test/test_pool2d_op.py b/test/legacy_test/test_pool2d_op.py index b2f10e3af1b26..8fc52ffec9c99 100644 --- a/test/legacy_test/test_pool2d_op.py +++ b/test/legacy_test/test_pool2d_op.py @@ -154,7 +154,11 @@ def pool2D_forward_naive( data_format='NCHW', pool_type="max", padding_algorithm="EXPLICIT", + norm_type=0, ): + if norm_type == float("inf"): + pool_type = 'max' + # update paddings def _get_padding_with_SAME(input_shape, pool_size, pool_stride): padding = [] @@ -273,6 +277,14 @@ def _get_padding_with_SAME(input_shape, pool_size, pool_stride): out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size elif pool_type == 'max': out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + else: # lp_pool2d + if norm_type == 0: + out[:, :, i, j] = 1 + else: + out[:, :, i, j] = np.power( + np.sum(np.power(x_masked, norm_type), axis=(2, 3)), + 1.0 / norm_type, + ) elif data_format == 'NHWC': x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] if pool_type == 'avg': @@ -283,6 +295,14 @@ def _get_padding_with_SAME(input_shape, pool_size, pool_stride): out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size elif pool_type == 'max': out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) + else: # lp_pool2d + if norm_type == 0: + out[:, i, j, :] = 1 + else: + out[:, i, j, :] = np.power( + np.sum(np.power(x_masked, norm_type), axis=(2, 3)), + 1.0 / norm_type, + ) return out @@ -348,6 +368,37 @@ def pool2d_wrapper_use_cudnn( ) +def lp_pool2d_wrapper( + X, + ksize=[], + strides=[], + paddings=[], + ceil_mode=False, + exclusive=True, + data_format="NCDHW", + pooling_type="lp", + global_pooling=False, + adaptive=False, + padding_algorithm="EXPLICIT", +): + if data_format == "AnyLayout": + data_format = "NCDHW" + return paddle._C_ops.lp_pool2d( + X, + ksize, + strides, + paddings, + ceil_mode, + exclusive, + data_format, + pooling_type, + global_pooling, + adaptive, + padding_algorithm, + 2, + ) + + class TestPool2D_Op_Mixin: def setUp(self): self.op_type = "pool2d" @@ -503,6 +554,85 @@ class TestPool2D_Op(TestPool2D_Op_Mixin, OpTest): pass +class TestLPPool2D_Op(TestPool2D_Op): + def setUp(self): + self.op_type = "lp_pool2d" + self.use_cudnn = False + self.init_kernel_type() + self.use_mkldnn = False + self.init_data_type() + self.init_test_case() + self.padding_algorithm = "EXPLICIT" + self.init_paddings() + self.init_global_pool() + self.init_kernel_type() + self.init_ceil_mode() + self.init_exclusive() + self.init_adaptive() + self.init_data_format() + self.init_shape() + self.norm_type = 2 + self.pool_type = 'lp' + + if self.is_bfloat16_op(): + input = np.random.random(self.shape).astype(np.float32) + else: + input = np.random.random(self.shape).astype(self.dtype) + + output = pool2D_forward_naive( + input, + self.ksize, + self.strides, + self.paddings, + self.global_pool, + self.ceil_mode, + self.exclusive, + self.adaptive, + self.data_format, + self.pool_type, + self.padding_algorithm, + self.norm_type, + ) + + if self.is_bfloat16_op(): + output = convert_float_to_uint16(output) + self.inputs = {'x': convert_float_to_uint16(input)} + else: + output = output.astype(self.dtype) + self.inputs = {'x': OpTest.np_dtype_to_base_dtype(input)} + + self.attrs = { + 'strides': self.strides, + 'paddings': self.paddings, + 'kernel_size': self.ksize, + 'pooling_type': self.pool_type, + 'global_pooling': self.global_pool, + 'ceil_mode': self.ceil_mode, + 'data_format': self.data_format, + "padding_algorithm": self.padding_algorithm, + 'norm_type': self.norm_type, + } + + self.outputs = {'out': output} + + self.python_api = lp_pool2d_wrapper + + def has_cudnn(self): + return False + + def test_check_grad(self): + if self.dtype == np.float16: + return + self.check_grad( + {'x'}, + 'out', + max_relative_error=0.07, + check_dygraph=(not self.use_mkldnn), + check_pir=True, + check_pir_onednn=self.check_pir_onednn, + ) + + class TestCase1(TestPool2D_Op): def init_test_case(self): self.ksize = [3, 3] diff --git a/test/deprecated/legacy_test/test_pool3d_op.py b/test/legacy_test/test_pool3d_op.py similarity index 100% rename from test/deprecated/legacy_test/test_pool3d_op.py rename to test/legacy_test/test_pool3d_op.py diff --git a/test/deprecated/legacy_test/test_prelu_op.py b/test/legacy_test/test_prelu_op.py similarity index 88% rename from test/deprecated/legacy_test/test_prelu_op.py rename to test/legacy_test/test_prelu_op.py index 75bb9b69beed4..aecc3af208225 100644 --- a/test/deprecated/legacy_test/test_prelu_op.py +++ b/test/legacy_test/test_prelu_op.py @@ -20,7 +20,7 @@ import paddle import paddle.nn.functional as F from paddle import base -from paddle.base import Program, core +from paddle.base import core from paddle.pir_utils import test_with_pir_api @@ -481,65 +481,5 @@ def test_check_grad(self): create_test_bf16_class(TestModeElementRank3NHWC) create_test_bf16_class(TestModeElementRank6NHWC) - -def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): - helper = base.layer_helper.LayerHelper('prelu', **locals()) - alpha_shape = [1, x.shape[1], 1, 1] - dtype = helper.input_dtype(input_param_name='x') - alpha = helper.create_parameter( - attr=helper.param_attr, - shape=alpha_shape, - dtype='float32', - is_bias=False, - default_initializer=paddle.nn.initializer.Constant(0.25), - ) - out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="prelu", - inputs={"X": x, 'Alpha': alpha}, - attrs={"mode": mode, 'data_format': data_format}, - outputs={"Out": out}, - ) - return out - - -# error message test if mode is not one of 'all', 'channel', 'element' -class TestModeError(unittest.TestCase): - def setUp(self): - self.place = ( - paddle.CUDAPlace(0) - if core.is_compiled_with_cuda() - else paddle.CPUPlace() - ) - self.x_np = np.ones([1, 2, 3, 4]).astype('float32') - - def test_mode_error(self): - main_program = Program() - with base.program_guard(main_program, Program()): - x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) - try: - y = prelu_t(x, 'any') - except Exception as e: - assert e.args[0].find('InvalidArgument') != -1 - - def test_data_format_error1(self): - main_program = Program() - with base.program_guard(main_program, Program()): - x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) - try: - y = prelu_t(x, 'channel', data_format='N') - except Exception as e: - assert e.args[0].find('InvalidArgument') != -1 - - def test_data_format_error2(self): - main_program = Program() - with base.program_guard(main_program, Program()): - x = paddle.static.data(name='x', shape=[2, 3, 4, 5]) - try: - y = paddle.static.nn.prelu(x, 'channel', data_format='N') - except ValueError as e: - pass - - if __name__ == "__main__": unittest.main() diff --git a/test/deprecated/legacy_test/test_put_along_axis_op.py b/test/legacy_test/test_put_along_axis_op.py similarity index 100% rename from test/deprecated/legacy_test/test_put_along_axis_op.py rename to test/legacy_test/test_put_along_axis_op.py diff --git a/test/deprecated/legacy_test/test_qr_op.py b/test/legacy_test/test_qr_op.py similarity index 100% rename from test/deprecated/legacy_test/test_qr_op.py rename to test/legacy_test/test_qr_op.py diff --git a/test/deprecated/legacy_test/test_random_seed.py b/test/legacy_test/test_random_seed.py similarity index 87% rename from test/deprecated/legacy_test/test_random_seed.py rename to test/legacy_test/test_random_seed.py index ead15119a9922..8fbaf9a3d6942 100644 --- a/test/deprecated/legacy_test/test_random_seed.py +++ b/test/legacy_test/test_random_seed.py @@ -359,57 +359,6 @@ def test_generator_randperm_static(self): np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) self.assertTrue(not np.allclose(out1_res2, out1_res1)) - def test_gen_TruncatedNormal_initializer(self): - base.disable_dygraph() - - gen = paddle.seed(123123143) - cur_state = gen.get_state() - - startup_program = base.Program() - train_program = base.Program() - with base.program_guard(train_program, startup_program): - # example 1: - # attr shape is a list which doesn't contain tensor Variable. - x = paddle.uniform(shape=[2, 10]) - result_1 = paddle.static.nn.fc( - x, - size=10, - weight_attr=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0 - ), - ) - result_2 = paddle.static.nn.fc( - x, - size=10, - weight_attr=paddle.nn.initializer.TruncatedNormal( - mean=0.0, std=2.0 - ), - ) - - exe = base.Executor(base.CPUPlace()) - exe.run(startup_program) - out1 = exe.run( - train_program, feed={}, fetch_list=[result_1, result_2] - ) - - gen.manual_seed(123123143) - with base.program_guard(train_program, startup_program): - exe.run(startup_program) - out2 = exe.run( - train_program, feed={}, fetch_list=[result_1, result_2] - ) - - out1_res1 = np.array(out1[0]) - out1_res2 = np.array(out1[1]) - out2_res1 = np.array(out2[0]) - out2_res2 = np.array(out2[1]) - - if not core.is_compiled_with_cuda(): - print(">>>>>>> sampling id static >>>>>>>") - np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) - np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) - self.assertTrue(not np.allclose(out1_res2, out1_res1)) - if __name__ == "__main__": unittest.main() diff --git a/test/legacy_test/test_regularizer.py b/test/legacy_test/test_regularizer.py new file mode 100644 index 0000000000000..a85f2fcb075da --- /dev/null +++ b/test/legacy_test/test_regularizer.py @@ -0,0 +1,217 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import random +import unittest + +import numpy as np + +import paddle +from paddle import base, regularizer +from paddle.base import core +from paddle.pir_utils import test_with_pir_api + + +class TestL1Decay(unittest.TestCase): + def test_l1decay_regularizer(self): + with paddle.pir_utils.IrGuard(): + main_program = paddle.static.Program() + with paddle.static.program_guard(main_program): + block = main_program.global_block() + mul_x = paddle.pir.core.create_parameter( + dtype="float32", + shape=[5, 10], + name="mul.x", + regularizer=regularizer.L1Decay(0.5), + initializer=paddle.nn.initializer.Constant(1), + ) + self.assertIsNotNone(mul_x.regularizer) + self.assertTrue( + isinstance(mul_x.regularizer, regularizer.L1Decay) + ) + + mul_y = paddle.static.data( + dtype="float32", shape=[10, 8], name="mul.y" + ) + mul_out = paddle.matmul(mul_x, mul_y) + mean_out = paddle.mean(mul_out) + grads = paddle.autograd.ir_backward.grad(mean_out, [mul_x]) + params_grads = [(mul_x, grads[0])] + self.assertEqual(len(params_grads), 1) + count_ops = len(block.ops) + optimizer = paddle.optimizer.Adam() + params_grads = optimizer.append_regularization_ops(params_grads) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(block.ops), count_ops + 5) + self.assertEqual(block.ops[-1].name(), 'pd_op.add_n') + self.assertEqual(block.ops[-3].name(), 'pd_op.scale') + self.assertEqual(block.ops[-5].name(), 'pd_op.sign') + + +class TestRegularizer(unittest.TestCase): + def setUp(self): + self.word_len = 1500 + self.train_data = [ + [(random.sample(range(1000), 10), [0])] for _ in range(2) + ] + + def get_places(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places + + @contextlib.contextmanager + def scope_prog_guard(self, main_prog, startup_prog): + scope = base.core.Scope() + with base.unique_name.guard(): + with base.scope_guard(scope): + with base.program_guard(main_prog, startup_prog): + yield + + def run_program(self, place, feed_list): + exe = base.Executor(place) + feeder = base.DataFeeder(feed_list=feed_list, place=place) + exe.run(base.default_startup_program()) + + main_prog = base.default_main_program() + param_list = [var.name for var in main_prog.block(0).all_parameters()] + + param_sum = [] + for data in self.train_data: + out = exe.run( + main_prog, feed=feeder.feed(data), fetch_list=param_list + ) + p_sum = 0 + for v in out: + p_sum += np.sum(np.abs(v)) + param_sum.append(p_sum) + return param_sum + + def check_l2decay_regularizer(self, place, model): + paddle.seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = base.framework.Program() + startup_prog = base.framework.Program() + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog + ): + data = paddle.static.data( + name="words", shape=[-1, 1], dtype="int64", lod_level=1 + ) + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + + avg_cost = model(data, label, self.word_len) + + optimizer = paddle.optimizer.Adagrad( + learning_rate=0.1, + weight_decay=paddle.regularizer.L2Decay(1.0), + ) + optimizer.minimize(avg_cost) + param_sum = self.run_program(place, [data, label]) + return param_sum + + def check_l2decay(self, place, model): + paddle.seed(1) + paddle.framework.random._manual_program_seed(1) + main_prog = base.framework.Program() + startup_prog = base.framework.Program() + + with self.scope_prog_guard( + main_prog=main_prog, startup_prog=startup_prog + ): + data = paddle.static.data( + name="words", shape=[-1, 1], dtype="int64", lod_level=1 + ) + label = paddle.static.data( + name="label", shape=[-1, 1], dtype="int64" + ) + + avg_cost_l2 = model(data, label, self.word_len) + + param_list = base.default_main_program().block(0).all_parameters() + para_sum = [] + for para in param_list: + para_mul = paddle.square(x=para) + para_sum.append(paddle.sum(para_mul)) + avg_cost_l2 += paddle.add_n(para_sum) * 0.5 + + optimizer = paddle.optimizer.Adagrad(learning_rate=0.1) + optimizer.minimize(avg_cost_l2) + param_sum = self.run_program(place, [data, label]) + return param_sum + + @test_with_pir_api + def test_repeated_regularization(self): + l1 = paddle.regularizer.L1Decay(coeff=0.1) + l2 = paddle.regularizer.L2Decay(coeff=0.01) + fc_param_attr = paddle.ParamAttr( + regularizer=paddle.regularizer.L1Decay() + ) + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + x = paddle.uniform([2, 2, 3]) + linear = paddle.nn.Linear(3, 5, weight_attr=fc_param_attr) + out = linear(x) + loss = paddle.sum(out) + sgd = paddle.optimizer.SGD(learning_rate=0.1, weight_decay=l2) + sgd.minimize(loss) + with base.dygraph.guard(): + input = paddle.to_tensor(np.random.randn(3, 2).astype('float32')) + paddle.seed(1) + paddle.framework.random._manual_program_seed(1) + + linear1 = paddle.nn.Linear( + 2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr + ) + linear2 = paddle.nn.Linear( + 2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr + ) + + loss1 = linear1(input) + loss1.backward() + # set l2 regularizer in optimizer, but l1 in base.ParamAttr + + paddle.optimizer.SGD( + parameters=linear1.parameters(), + learning_rate=1e-2, + weight_decay=l2, + ).minimize(loss1) + # only set l1 in base.ParamAttr + loss2 = linear2(input) + loss2.backward() + paddle.optimizer.SGD( + parameters=linear2.parameters(), learning_rate=1e-2 + ).minimize(loss2) + # they should both be applied by l1, and keep the same + np.testing.assert_allclose( + linear1.weight.numpy(), + linear2.weight.numpy(), + rtol=1e-05, + err_msg='weight should use the regularization in base.ParamAttr!', + ) + np.testing.assert_allclose( + linear1.bias.numpy(), + linear2.bias.numpy(), + rtol=1e-05, + err_msg='bias should use the regularization in base.ParamAttr!', + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/deprecated/legacy_test/test_regularizer_api.py b/test/legacy_test/test_regularizer_api.py similarity index 77% rename from test/deprecated/legacy_test/test_regularizer_api.py rename to test/legacy_test/test_regularizer_api.py index 32a98c5a72091..f6d3507628463 100644 --- a/test/deprecated/legacy_test/test_regularizer_api.py +++ b/test/legacy_test/test_regularizer_api.py @@ -15,7 +15,6 @@ import contextlib import random import unittest -from functools import partial import numpy as np @@ -25,41 +24,6 @@ from paddle.pir_utils import test_with_pir_api -def bow_net( - data, - label, - dict_dim, - is_sparse=False, - emb_dim=8, - hid_dim=8, - hid_dim2=6, - class_dim=2, -): - """ - BOW net - This model is from https://github.com/PaddlePaddle/models: - base/PaddleNLP/text_classification/nets.py - """ - emb = paddle.static.nn.embedding( - input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] - ) - bow = paddle.static.nn.sequence_lod.sequence_pool( - input=emb, pool_type='sum' - ) - bow_tanh = paddle.tanh(bow) - fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh") - fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh") - prediction = paddle.static.nn.fc( - x=[fc_2], size=class_dim, activation="softmax" - ) - cost = paddle.nn.functional.cross_entropy( - input=prediction, label=label, reduction='none', use_softmax=False - ) - avg_cost = paddle.mean(x=cost) - - return avg_cost - - class TestRegularizer(unittest.TestCase): def setUp(self): self.word_len = 1500 @@ -155,27 +119,6 @@ def check_l2decay(self, place, model): param_sum = self.run_program(place, [data, label]) return param_sum - def test_l2(self): - paddle.enable_static() - for place in self.get_places(): - dense_sparse_p_sum = [] - for sparse in [True, False]: - model = partial(bow_net, is_sparse=sparse) - framework_l2 = self.check_l2decay_regularizer(place, model) - l2 = self.check_l2decay(place, model) - assert len(l2) == len(framework_l2) - for i in range(len(l2)): - assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5) - dense_sparse_p_sum.append(framework_l2) - - assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1]) - for i in range(len(dense_sparse_p_sum[0])): - assert np.isclose( - a=dense_sparse_p_sum[0][i], - b=dense_sparse_p_sum[1][i], - rtol=5e-5, - ) - @test_with_pir_api def test_repeated_regularization(self): paddle.enable_static() diff --git a/test/deprecated/legacy_test/test_repeat_interleave_op.py b/test/legacy_test/test_repeat_interleave_op.py similarity index 100% rename from test/deprecated/legacy_test/test_repeat_interleave_op.py rename to test/legacy_test/test_repeat_interleave_op.py diff --git a/test/deprecated/legacy_test/test_reshape_op.py b/test/legacy_test/test_reshape_op.py similarity index 100% rename from test/deprecated/legacy_test/test_reshape_op.py rename to test/legacy_test/test_reshape_op.py diff --git a/test/deprecated/legacy_test/test_reverse_op.py b/test/legacy_test/test_reverse_op.py similarity index 100% rename from test/deprecated/legacy_test/test_reverse_op.py rename to test/legacy_test/test_reverse_op.py diff --git a/test/deprecated/legacy_test/test_roi_align_op.py b/test/legacy_test/test_roi_align_op.py similarity index 100% rename from test/deprecated/legacy_test/test_roi_align_op.py rename to test/legacy_test/test_roi_align_op.py diff --git a/test/deprecated/legacy_test/test_roi_pool_op.py b/test/legacy_test/test_roi_pool_op.py similarity index 100% rename from test/deprecated/legacy_test/test_roi_pool_op.py rename to test/legacy_test/test_roi_pool_op.py diff --git a/test/deprecated/legacy_test/test_roll_op.py b/test/legacy_test/test_roll_op.py similarity index 100% rename from test/deprecated/legacy_test/test_roll_op.py rename to test/legacy_test/test_roll_op.py diff --git a/test/deprecated/legacy_test/test_row_conv_op.py b/test/legacy_test/test_row_conv_op.py similarity index 100% rename from test/deprecated/legacy_test/test_row_conv_op.py rename to test/legacy_test/test_row_conv_op.py diff --git a/test/deprecated/legacy_test/test_save_inference_model_conditional_op.py b/test/legacy_test/test_save_inference_model_conditional_op.py similarity index 100% rename from test/deprecated/legacy_test/test_save_inference_model_conditional_op.py rename to test/legacy_test/test_save_inference_model_conditional_op.py diff --git a/test/deprecated/legacy_test/test_save_model_without_var.py b/test/legacy_test/test_save_model_without_var.py similarity index 100% rename from test/deprecated/legacy_test/test_save_model_without_var.py rename to test/legacy_test/test_save_model_without_var.py diff --git a/test/deprecated/legacy_test/test_scatter_op.py b/test/legacy_test/test_scatter_op.py similarity index 100% rename from test/deprecated/legacy_test/test_scatter_op.py rename to test/legacy_test/test_scatter_op.py diff --git a/test/deprecated/legacy_test/test_selu_op.py b/test/legacy_test/test_selu_op.py similarity index 100% rename from test/deprecated/legacy_test/test_selu_op.py rename to test/legacy_test/test_selu_op.py diff --git a/test/deprecated/legacy_test/test_sgd_op.py b/test/legacy_test/test_sgd_op.py similarity index 60% rename from test/deprecated/legacy_test/test_sgd_op.py rename to test/legacy_test/test_sgd_op.py index 20f67faf44f3f..d1e81b11c67e8 100644 --- a/test/deprecated/legacy_test/test_sgd_op.py +++ b/test/legacy_test/test_sgd_op.py @@ -20,7 +20,6 @@ from utils import dygraph_guard import paddle -from paddle import base from paddle.base import core paddle.enable_static() @@ -204,32 +203,6 @@ def test_sparse_parameter_sgd(self): self.check_with_place(place) -class TestSGDOpWithLargeInput(unittest.TestCase): - def runTest(self): - paddle.enable_static() - data = paddle.tensor.fill_constant(shape=[1], value=128, dtype='int64') - label = paddle.tensor.fill_constant( - shape=[1, 150], value=0.5, dtype='float32' - ) - emb = paddle.static.nn.embedding( - input=data, size=(10000000, 150), dtype='float32' - ) - out = paddle.nn.functional.normalize(x=emb, axis=-1) - - cost = paddle.nn.functional.square_error_cost(input=out, label=label) - avg_cost = paddle.mean(cost) - sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001) - sgd_optimizer.minimize(avg_cost) - - place = base.CPUPlace() - exe = base.Executor(place) - exe.run(base.default_startup_program()) - compiled_prog = base.compiler.CompiledProgram( - base.default_main_program() - ) - result = exe.run(compiled_prog, fetch_list=[avg_cost]) - - class TestSGDV2(unittest.TestCase): def test_sgd_dygraph(self): paddle.disable_static() @@ -247,50 +220,6 @@ def test_sgd_dygraph(self): adam.step() adam.clear_gradients() - def test_sgd(self): - paddle.enable_static() - - def check_sgd_optimizer(optimizer_attr): - init_program = paddle.static.Program() - program = paddle.static.Program() - block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - optimize_attr=optimizer_attr, - ) - mul_y = block.create_var( - dtype="float32", shape=[10, 8], lod_level=0, name="mul.y" - ) - mul_out = block.create_var( - dtype="float32", shape=[5, 8], lod_level=0, name="mul.out" - ) - mean_out = block.create_var( - dtype="float32", shape=[1], lod_level=0, name="mean.out" - ) - block.append_op( - type="mul", - inputs={"X": mul_x, "Y": mul_y}, - outputs={"Out": mul_out}, - attrs={"x_num_col_dims": 1}, - ) - block.append_op( - type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out} - ) - sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.01) - opts, _ = sgd_optimizer.minimize(mean_out, init_program) - return opts - - opts = check_sgd_optimizer({'learning_rate': 1.1}) - self.assertEqual(len(opts), 2) - self.assertEqual([op.type for op in opts], ["scale", "sgd"]) - - opts = check_sgd_optimizer({'learning_rate': 1.0}) - self.assertEqual(len(opts), 1) - self.assertEqual([op.type for op in opts], ["sgd"]) - def test_raise_error(self): self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None) @@ -320,114 +249,6 @@ def test_sgd_group_dygraph(self): adam.clear_gradients() -class TestSGDMultiPrecision2_0(unittest.TestCase): - def dygraph_sgd_mp(self, mp): - paddle.disable_static() - paddle.seed(10) - paddle.set_device('gpu') - input = paddle.randn((2, 2)) - model = paddle.nn.Linear(2, 2) - optimizer = paddle.optimizer.SGD( - parameters=model.parameters(), multi_precision=mp - ) - if mp: - model = paddle.amp.decorate(models=model, level='O2') - scaler = paddle.amp.GradScaler(init_loss_scaling=1024) - - for idx in range(5): - if mp: - with paddle.amp.auto_cast(level='O2'): - output = model(input) - loss = paddle.mean(output) - scaled = scaler.scale(loss) - scaled.backward() - scaler.minimize(optimizer, scaled) - optimizer.clear_grad() - else: - output = model(input) - loss = paddle.mean(output) - optimizer.step() - optimizer.clear_grad() - - return output, model.parameters() - - def static_sgd_mp(self, mp): - paddle.enable_static() - paddle.seed(10) - np.random.seed(10) - exe = paddle.static.Executor('gpu') - train_program = paddle.static.Program() - startup_program = paddle.static.Program() - optimizer = paddle.optimizer.SGD(multi_precision=mp) - - if mp: - optimizer = paddle.static.amp.decorate( - optimizer, - init_loss_scaling=128.0, - use_dynamic_loss_scaling=True, - use_pure_fp16=True, - use_fp16_guard=False, - ) - with paddle.static.program_guard(train_program, startup_program): - if mp: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float16' - ) - else: - data = paddle.static.data( - shape=[2, 2], name='X', dtype='float32' - ) - hidden = paddle.static.nn.fc(x=data, size=10) - loss = paddle.mean(hidden) - optimizer.minimize(loss) - exe.run(startup_program) - - if mp: - optimizer.amp_init( - place=paddle.CUDAPlace(0), scope=paddle.static.global_scope() - ) - x = np.random.random(size=(2, 2)).astype('float16') - else: - x = np.random.random(size=(2, 2)).astype('float32') - out = [] - for idx in range(5): - (loss_data,) = exe.run( - train_program, feed={"X": x}, fetch_list=[loss] - ) - out.append(loss_data) - return out - - def test_main(self): - if not paddle.is_compiled_with_cuda(): - return - "Test dygraph mode" - output1_dy, params1_dy = self.dygraph_sgd_mp(mp=True) - output2_dy, params2_dy = self.dygraph_sgd_mp(mp=False) - np.testing.assert_allclose( - output1_dy.astype('float32').numpy(), - output2_dy.astype('float32').numpy(), - rtol=1e-05, - atol=0.1, - ) - for idx in range(len(params1_dy)): - np.testing.assert_allclose( - params1_dy[idx].astype('float32').numpy(), - params2_dy[idx].astype('float32').numpy(), - rtol=1e-05, - atol=0.1, - ) - "Test static graph mode" - output1_st = self.static_sgd_mp(mp=True) - output2_st = self.static_sgd_mp(mp=False) - for idx in range(len(output1_st)): - np.testing.assert_allclose( - output1_st[idx].astype('float32'), - output2_st[idx].astype('float32'), - rtol=1e-05, - atol=0.1, - ) - - class TestSGDSimple(unittest.TestCase): def setUp(self) -> None: self.data = np.random.random(size=(2, 2)).astype('float32') diff --git a/test/deprecated/legacy_test/test_shuffle_channel_op.py b/test/legacy_test/test_shuffle_channel_op.py similarity index 100% rename from test/deprecated/legacy_test/test_shuffle_channel_op.py rename to test/legacy_test/test_shuffle_channel_op.py diff --git a/test/deprecated/legacy_test/test_sigmoid_cross_entropy_with_logits_grad_with_auto_grad.py b/test/legacy_test/test_sigmoid_cross_entropy_with_logits_grad_with_auto_grad.py similarity index 100% rename from test/deprecated/legacy_test/test_sigmoid_cross_entropy_with_logits_grad_with_auto_grad.py rename to test/legacy_test/test_sigmoid_cross_entropy_with_logits_grad_with_auto_grad.py diff --git a/test/deprecated/legacy_test/test_sign_op.py b/test/legacy_test/test_sign_op.py similarity index 100% rename from test/deprecated/legacy_test/test_sign_op.py rename to test/legacy_test/test_sign_op.py diff --git a/test/deprecated/legacy_test/test_solve_op.py b/test/legacy_test/test_solve_op.py similarity index 100% rename from test/deprecated/legacy_test/test_solve_op.py rename to test/legacy_test/test_solve_op.py diff --git a/test/legacy_test/test_sparse_mask_as_op.py b/test/legacy_test/test_sparse_mask_as_op.py new file mode 100644 index 0000000000000..f4cd639452b5d --- /dev/null +++ b/test/legacy_test/test_sparse_mask_as_op.py @@ -0,0 +1,159 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + + +def generate_data(shape, dtype): + """ + Generate `data` and `mask` with the same shape and dtype. + """ + _mask = np.random.randint(0, 2, shape) + if np.sum(_mask) == 0: + _mask.flat[0] = 1 + mask = (np.random.randint(-100, 100, shape) * _mask).astype(dtype) + data = np.random.randint(-100, 100, shape).astype(dtype) + return data, mask + + +class TestMaskAs(unittest.TestCase): + def setUp(self): + self.init_format() + self.places = [paddle.CPUPlace()] + if paddle.is_compiled_with_cuda(): + self.places.append(paddle.CUDAPlace(0)) + + def init_format(self): + self.format = None + + def check(self, shape, dtype, place, check_grad=True): + paddle.disable_static() + dense_data_np, dense_mask_np = generate_data(shape, dtype) + + dense_data_pd = paddle.to_tensor( + dense_data_np, dtype=dtype, place=place + ) + dense_data_pd.stop_gradient = False + + if self.format == 'coo': + sparse_mask_pd = paddle.to_tensor( + dense_mask_np, dtype=dtype, place=place + ).to_sparse_coo(len(shape)) + else: + sparse_mask_pd = paddle.to_tensor( + dense_mask_np, dtype=dtype, place=place + ).to_sparse_csr() + + sparse_out_pd = paddle.sparse.mask_as(dense_data_pd, sparse_mask_pd) + + # compare the tensor from sparse->dense with reference numpy data + # the result only keeps the values where mask not zero, like: + # dense_data_np + # [[ 38. 15. 76.] + # [-98. -75. 10.] + # [-52. 49. -48.]] + # dense_mask_np + # [[-70. 0. 0.] + # [-50. 34. 60.] + # [-34. 0. -18.]] + # dense_data_np_ref + # [[ 38. 0. 0.] + # [-98. -75. 10.] + # [-52. 0. -48.]] + dense_data_np_ref = dense_data_np * (dense_mask_np != 0) + np.testing.assert_allclose( + sparse_out_pd.to_dense().numpy(), dense_data_np_ref + ) + + if check_grad: + # with sparse_out_pd backward, we get the grad from dense_data_pd + sparse_out_pd.backward() + dense_data_grad = dense_data_pd.grad + + self.assertEqual( + list(dense_data_grad.shape), list(dense_data_pd.shape) + ) + self.assertEqual(dense_data_grad.dtype, dense_data_pd.dtype) + + # make a dense data to compare the grad from sparse_out_pd + grad_ref = np.ones_like(dense_mask_np) * (dense_mask_np != 0) + + np.testing.assert_allclose( + dense_data_pd.grad.numpy(), + grad_ref, + ) + + def check_with_dtypes(self, shape): + for place in self.places: + self.check(shape, 'float32', place) + self.check(shape, 'float64', place) + self.check(shape, 'int32', place) + self.check(shape, 'int64', place) + self.check(shape, 'complex64', place) + self.check(shape, 'complex128', place) + + # `int8`` not registered in `FullLikeCooKernel`, so skip check_grad + self.check(shape, 'int8', place, check_grad=False) + + # `int16` not registered in `multiply`, so skip check_grad + self.check(shape, 'int16', place, check_grad=False) + + if paddle.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + self.check(shape, 'float16', place) + + +class TestMaskAsCoo(TestMaskAs): + def init_format(self): + self.format = 'coo' + + def test_1d(self): + self.check_with_dtypes((5,)) + + def test_2d(self): + self.check_with_dtypes((5, 3)) + + def test_3d(self): + self.check_with_dtypes((5, 3, 4)) + + def test_4d(self): + self.check_with_dtypes((5, 3, 4, 2)) + + +class TestMaskAsCsr(TestMaskAs): + def init_format(self): + self.format = 'csr' + + def test_2d(self): + self.check_with_dtypes((5, 3)) + + def test_3d(self): + self.check_with_dtypes((5, 3, 4)) + + def test_error_dimension(self): + # error 1d + with self.assertRaises(ValueError): + self.check_with_dtypes((5,)) + + # error 4d + with self.assertRaises(ValueError): + self.check_with_dtypes((5, 3, 4, 2)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/deprecated/legacy_test/test_spectral_norm_op.py b/test/legacy_test/test_spectral_norm_op.py similarity index 100% rename from test/deprecated/legacy_test/test_spectral_norm_op.py rename to test/legacy_test/test_spectral_norm_op.py diff --git a/test/deprecated/legacy_test/test_split_op.py b/test/legacy_test/test_split_op.py similarity index 100% rename from test/deprecated/legacy_test/test_split_op.py rename to test/legacy_test/test_split_op.py diff --git a/test/deprecated/legacy_test/test_static_save_load_large.py b/test/legacy_test/test_static_save_load_large.py similarity index 100% rename from test/deprecated/legacy_test/test_static_save_load_large.py rename to test/legacy_test/test_static_save_load_large.py diff --git a/test/deprecated/legacy_test/test_stft_op.py b/test/legacy_test/test_stft_op.py similarity index 100% rename from test/deprecated/legacy_test/test_stft_op.py rename to test/legacy_test/test_stft_op.py diff --git a/test/deprecated/legacy_test/test_svd_op.py b/test/legacy_test/test_svd_op.py similarity index 100% rename from test/deprecated/legacy_test/test_svd_op.py rename to test/legacy_test/test_svd_op.py diff --git a/test/deprecated/legacy_test/test_swiglu.py b/test/legacy_test/test_swiglu.py similarity index 100% rename from test/deprecated/legacy_test/test_swiglu.py rename to test/legacy_test/test_swiglu.py diff --git a/test/deprecated/legacy_test/test_temporal_shift_op.py b/test/legacy_test/test_temporal_shift_op.py similarity index 100% rename from test/deprecated/legacy_test/test_temporal_shift_op.py rename to test/legacy_test/test_temporal_shift_op.py diff --git a/test/deprecated/legacy_test/test_top_k_op.py b/test/legacy_test/test_top_k_op.py similarity index 100% rename from test/deprecated/legacy_test/test_top_k_op.py rename to test/legacy_test/test_top_k_op.py diff --git a/test/deprecated/legacy_test/test_top_k_v2_op.py b/test/legacy_test/test_top_k_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_top_k_v2_op.py rename to test/legacy_test/test_top_k_v2_op.py diff --git a/test/deprecated/legacy_test/test_trace_op.py b/test/legacy_test/test_trace_op.py similarity index 100% rename from test/deprecated/legacy_test/test_trace_op.py rename to test/legacy_test/test_trace_op.py diff --git a/test/deprecated/legacy_test/test_triangular_solve_op.py b/test/legacy_test/test_triangular_solve_op.py similarity index 100% rename from test/deprecated/legacy_test/test_triangular_solve_op.py rename to test/legacy_test/test_triangular_solve_op.py diff --git a/test/deprecated/legacy_test/test_trilinear_interp_op.py b/test/legacy_test/test_trilinear_interp_op.py similarity index 100% rename from test/deprecated/legacy_test/test_trilinear_interp_op.py rename to test/legacy_test/test_trilinear_interp_op.py diff --git a/test/deprecated/legacy_test/test_trilinear_interp_v2_op.py b/test/legacy_test/test_trilinear_interp_v2_op.py similarity index 100% rename from test/deprecated/legacy_test/test_trilinear_interp_v2_op.py rename to test/legacy_test/test_trilinear_interp_v2_op.py diff --git a/test/deprecated/legacy_test/test_trunc_op.py b/test/legacy_test/test_trunc_op.py similarity index 100% rename from test/deprecated/legacy_test/test_trunc_op.py rename to test/legacy_test/test_trunc_op.py diff --git a/test/deprecated/legacy_test/test_unfold_op.py b/test/legacy_test/test_unfold_op.py similarity index 100% rename from test/deprecated/legacy_test/test_unfold_op.py rename to test/legacy_test/test_unfold_op.py diff --git a/test/deprecated/legacy_test/test_unique_consecutive_op.py b/test/legacy_test/test_unique_consecutive_op.py similarity index 100% rename from test/deprecated/legacy_test/test_unique_consecutive_op.py rename to test/legacy_test/test_unique_consecutive_op.py diff --git a/test/deprecated/legacy_test/test_unpool3d_op.py b/test/legacy_test/test_unpool3d_op.py similarity index 100% rename from test/deprecated/legacy_test/test_unpool3d_op.py rename to test/legacy_test/test_unpool3d_op.py diff --git a/test/deprecated/legacy_test/test_unpool_op.py b/test/legacy_test/test_unpool_op.py similarity index 100% rename from test/deprecated/legacy_test/test_unpool_op.py rename to test/legacy_test/test_unpool_op.py diff --git a/test/deprecated/legacy_test/test_unstack_op.py b/test/legacy_test/test_unstack_op.py similarity index 100% rename from test/deprecated/legacy_test/test_unstack_op.py rename to test/legacy_test/test_unstack_op.py diff --git a/test/deprecated/legacy_test/test_yolov3_loss_op.py b/test/legacy_test/test_yolov3_loss_op.py similarity index 100% rename from test/deprecated/legacy_test/test_yolov3_loss_op.py rename to test/legacy_test/test_yolov3_loss_op.py diff --git a/test/mkldnn/test_batch_norm_mkldnn_op.py b/test/mkldnn/test_batch_norm_mkldnn_op.py index 99f48c65b0a4e..490021ecee6d9 100644 --- a/test/mkldnn/test_batch_norm_mkldnn_op.py +++ b/test/mkldnn/test_batch_norm_mkldnn_op.py @@ -20,8 +20,8 @@ from op_test import _set_use_system_allocator, pir_executor_guard sys.path.append("../deprecated/legacy_test") -from test_batch_norm_op import ( - TestBatchNormOpInference, +from test_batch_norm_op import TestBatchNormOpInference +from test_batch_norm_op_deprecated import ( TestBatchNormOpTraining, _reference_grad, _reference_training, diff --git a/test/prim/pir_prim/CMakeLists.txt b/test/prim/pir_prim/CMakeLists.txt index 108cc3b8b28da..c2bb0610a60d6 100644 --- a/test/prim/pir_prim/CMakeLists.txt +++ b/test/prim/pir_prim/CMakeLists.txt @@ -12,7 +12,9 @@ set(TEST_PRIM_PURE_PIR_CASES test_auto_recompute test_auto_recompute_dy2static test_prim_sub_graph_dynamic_shape - test_decompose_control_flow) + test_prim_sub_graph_backward_dynamic_shape + test_decompose_control_flow + test_decomp_whole_program) foreach(target ${TEST_PRIM_PURE_PIR_CASES}) py_test_modules( @@ -52,6 +54,7 @@ if(WITH_CINN) FLAGS_prim_check_ops=true FLAGS_enable_pir_api=true FLAGS_prim_enable_dynamic=true + FLAGS_prim_vjp_skip_default_ops=false FLAGS_cinn_bucket_compile=True FLAGS_pir_apply_shape_optimization_pass=1) set_tests_properties(${target} PROPERTIES LABELS "RUN_TYPE=CINN") diff --git a/test/prim/pir_prim/test_decomp_whole_program.py b/test/prim/pir_prim/test_decomp_whole_program.py index f8c58ef7c2469..7d0b28edf5dad 100644 --- a/test/prim/pir_prim/test_decomp_whole_program.py +++ b/test/prim/pir_prim/test_decomp_whole_program.py @@ -40,7 +40,8 @@ def base_net(self, flag=None): y.stop_gradient = False x1 = paddle.sin(x) y1 = paddle.cos(y) - tmp1 = paddle.matmul(x1, y1) + y3 = paddle.matmul(x1, y1) + tmp1 = paddle.concat((x1, y1, y3)) tmp2 = paddle.mean(tmp1) sum_out = paddle.sin(tmp2) gradients = grad(sum_out, (x, y)) @@ -54,17 +55,18 @@ def base_net(self, flag=None): whole_ops = [op.name() for op in main_program.global_block().ops] if flag == "prim": - assert 'pd_op.matmul_grad' not in whole_ops + assert 'pd_op.concat_grad' not in whole_ops else: - assert 'pd_op.matmul_grad' in whole_ops + assert 'pd_op.concat_grad' in whole_ops return fwd, dx, dy def test_prim_all(self): + paddle.base.core._set_prim_backward_blacklist("sin_grad", "cos_grad") res_ref = self.base_net() res = self.base_net("prim") for ref, actual in zip(res_ref, res): - np.testing.assert_allclose(ref, actual, rtol=1e-6) + np.testing.assert_allclose(ref, actual, rtol=1e-6, atol=1e-6) if __name__ == "__main__": diff --git a/test/prim/pir_prim/test_prim_sub_graph_backward_dynamic_shape.py b/test/prim/pir_prim/test_prim_sub_graph_backward_dynamic_shape.py new file mode 100644 index 0000000000000..4567139ea3c34 --- /dev/null +++ b/test/prim/pir_prim/test_prim_sub_graph_backward_dynamic_shape.py @@ -0,0 +1,210 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.framework import core +from paddle.static import InputSpec + + +def sum_net1(x): + return paddle.sum(x, axis=1, keepdim=False) + + +def sum_net2(x): + return paddle.sum(x) + + +def sum_net3(x): + return paddle.sum(x, keepdim=True) + + +def sum_net4(x): + return paddle.sum(x, axis=-1, keepdim=False) + + +def sum_net5(x): + return paddle.sum(x, axis=[0, 2], keepdim=False) + + +def mean_net1(x): + return paddle.mean(x, axis=1, keepdim=False) + + +def mean_net2(x): + return paddle.mean(x, axis=-1, keepdim=False) + + +def mean_net3(x): + return paddle.mean(x, axis=[0, 2], keepdim=False) + + +def apply_to_static(net, use_cinn, input_spec=None): + build_strategy = paddle.static.BuildStrategy() + build_strategy.build_cinn_pass = use_cinn + return paddle.jit.to_static( + net, + input_spec=input_spec, + build_strategy=build_strategy, + full_graph=True, + ) + + +class TestPrimBaseWithGrad(unittest.TestCase): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net1 + self.enable_cinn = False + self.tol = 1e-6 + + def base_net(self, flag=None): + if flag == "prim": + core._set_prim_all_enabled(True) + x = paddle.to_tensor(self.x, stop_gradient=False) + if flag == "prim": + fn = apply_to_static( + self.net, + use_cinn=self.enable_cinn, + input_spec=[ + InputSpec(shape=self.init_x_shape, dtype='float32'), + ], + ) + fn.train() + else: + fn = self.net + res = fn(x) + res.backward() + x_grad = x.gradient() + if flag == "prim": + core._set_prim_all_enabled(False) + return res, x_grad + + def test_prim_all_dynamic(self): + res_ref, grad_ref = self.base_net() + res, grad = self.base_net("prim") + + for ref, actual in zip(res_ref, res): + np.testing.assert_allclose( + ref, actual, rtol=self.tol, atol=self.tol + ) + + for dr, d in zip(grad_ref, grad): + np.testing.assert_allclose(dr, d, rtol=self.tol, atol=self.tol) + + +class TestPrimSumWithGrad1(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [1000] + self.init_x_shape = [None] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net2 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimSumWithGrad2(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net3 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimSumWithGrad3(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net2 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimSumWithGrad4(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net4 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimSumWithGrad5(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = sum_net5 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimMeanWithGrad(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = mean_net1 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimMeanWithGrad2(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = mean_net2 + self.enable_cinn = False + self.tol = 1e-6 + + +class TestPrimMeanWithGrad3(TestPrimBaseWithGrad): + def setUp(self): + np.random.seed(2023) + self.dtype = "float32" + self.x_shape = [30, 200, 40] + self.init_x_shape = [None, None, 40] + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.net = mean_net3 + self.enable_cinn = False + self.tol = 1e-6 + + +if __name__ == "__main__": + unittest.main() diff --git a/test/ps/download_criteo_data.sh b/test/ps/download_criteo_data.sh index 69bfd90bee050..911ba59f34d5e 100755 --- a/test/ps/download_criteo_data.sh +++ b/test/ps/download_criteo_data.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/ps/download_data.sh b/test/ps/download_data.sh index 498d9df9c2b4a..8feb69bcb9407 100755 --- a/test/ps/download_data.sh +++ b/test/ps/download_data.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/ps/gpubox_run.sh b/test/ps/gpubox_run.sh index a38a4498ee4c0..27316171ec667 100644 --- a/test/ps/gpubox_run.sh +++ b/test/ps/gpubox_run.sh @@ -1,13 +1,13 @@ # !/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/test/quantization/CMakeLists.txt b/test/quantization/CMakeLists.txt index e18f8c0a38096..5b37f83e0c28f 100644 --- a/test/quantization/CMakeLists.txt +++ b/test/quantization/CMakeLists.txt @@ -461,16 +461,9 @@ list(REMOVE_ITEM TEST_OPS test_filter_pruning) # fix if(WIN32) set(SINGLE_CARD_TEST_OPS - test_user_defined_quantization - test_quantization_scale_pass - test_quantization_pass - test_moving_average_abs_max_scale_op - test_imperative_qat_channelwise - test_imperative_qat - test_imperative_qat_lsq - test_imperative_qat_matmul - test_imperative_out_scale - test_graph) + test_imperative_qat_channelwise test_imperative_qat + test_imperative_qat_lsq test_imperative_qat_matmul + test_imperative_out_scale) list(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS}) foreach(src ${SINGLE_CARD_TEST_OPS}) py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0) diff --git a/test/quantization/test_imperative_qat_lsq.py b/test/quantization/test_imperative_qat_lsq.py index c71bd02c56bbc..bd16d309b249c 100644 --- a/test/quantization/test_imperative_qat_lsq.py +++ b/test/quantization/test_imperative_qat_lsq.py @@ -213,7 +213,7 @@ def func_qat(self): print('eval_acc_top1', eval_acc_top1) self.assertTrue( eval_acc_top1 > 0.9, - msg="The test acc {%f} is less than 0.9." % eval_acc_top1, + msg=f"The test acc {{{eval_acc_top1:f}}} is less than 0.9.", ) def test_qat(self): diff --git a/test/sot/test_builtin_map.py b/test/sot/test_builtin_map.py index f005ec10cdbe4..bad6206f3b3bc 100644 --- a/test/sot/test_builtin_map.py +++ b/test/sot/test_builtin_map.py @@ -24,11 +24,11 @@ from paddle.jit.sot.utils import strict_mode_guard -def double_num(num: float | int): +def double_num(num: float): return num * 2 -def double_num_with_breakgraph(num: float | int): +def double_num_with_breakgraph(num: float): sot.psdb.breakgraph() return num * 2 diff --git a/test/sot/test_sot_dynamic_shape.py b/test/sot/test_sot_dynamic_shape.py index ceed37d64438a..12608d1c871e4 100644 --- a/test/sot/test_sot_dynamic_shape.py +++ b/test/sot/test_sot_dynamic_shape.py @@ -25,7 +25,7 @@ from paddle.jit.sot.utils import with_allow_dynamic_shape_guard -def foo(x): +def dynamic_shape_input_func1(x): s = x.shape[0] return x + s @@ -85,6 +85,20 @@ def test_dynamic_int_input_cache_hit_case3(self): ) self.assertEqual(ctx.translate_count, i + 1) + def test_dynamic_shape_input_cache_hit_case1(self): + with with_allow_dynamic_shape_guard( + True + ), test_instruction_translator_cache_context() as ctx: + self.assert_results( + dynamic_shape_input_func1, paddle.randn([1, 4, 5]) + ) + self.assertEqual(ctx.translate_count, 1) + for i in range(2, 6): + self.assert_results( + dynamic_shape_input_func1, paddle.randn([i, 4, 5]) + ) + self.assertEqual(ctx.translate_count, 2) + if __name__ == '__main__': unittest.main() diff --git a/test/standalone_executor/test_standalone_measure_real_op_cost.py b/test/standalone_executor/test_standalone_measure_real_op_cost.py index 9825e16e91ee6..8ee254a427d8e 100644 --- a/test/standalone_executor/test_standalone_measure_real_op_cost.py +++ b/test/standalone_executor/test_standalone_measure_real_op_cost.py @@ -112,7 +112,7 @@ def _run_op_profiling(self, place, run_profiling=True): return loss_data def _compare_loss_between(self, loss_run1, loss_run2): - s1, s2 = '%.6f' % loss_run1, '%.6f' % loss_run2 + s1, s2 = f'{loss_run1:.6f}', f'{loss_run2:.6f}' return s1 == s2 def test_op_profiling_cuda0(self): diff --git a/test/white_list/op_threshold_white_list.py b/test/white_list/op_threshold_white_list.py index 518980dec7de7..9809105815577 100644 --- a/test/white_list/op_threshold_white_list.py +++ b/test/white_list/op_threshold_white_list.py @@ -33,6 +33,7 @@ 'fractional_max_pool3d', 'norm', 'pool3d', + 'lp_pool2d', 'reduce_prod', 'selu', 'sigmoid_cross_entropy_with_logits', diff --git a/test/white_list/pir_op_test_white_list b/test/white_list/pir_op_test_white_list index 2daa7ddd497e4..99eb57ea4a17b 100644 --- a/test/white_list/pir_op_test_white_list +++ b/test/white_list/pir_op_test_white_list @@ -142,7 +142,7 @@ test_i0_op test_i0e_op test_i1_op test_i1e_op -test_imperative_lod_tensor_to_selected_rows +test_imperative_lod_tensor_to_selected_rows_deprecated test_index_add_op test_index_sample_op test_index_select_op diff --git a/test/xpu/test_block_multihead_attention_op_xpu.py b/test/xpu/test_block_multihead_attention_op_xpu.py new file mode 100644 index 0000000000000..6a898bbf8f26e --- /dev/null +++ b/test/xpu/test_block_multihead_attention_op_xpu.py @@ -0,0 +1,581 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.incubate.nn.functional import block_multihead_attention_xpu + +paddle.seed(2023) +np.random.seed(2023) + + +def create_attn_mask( + mask_type, + batch_size, + seq_lens, + pre_cache_length=0, +): + max_seq_len = max(seq_lens) + mask = paddle.zeros( + [batch_size, 1, max_seq_len, max_seq_len + pre_cache_length], + dtype=mask_type, + ) + mask[:, :, :, :pre_cache_length] = 1 + for i in range(batch_size): + seq_len = seq_lens[i] + mask[i, 0, :seq_len, :seq_len] = ( + paddle.tril(paddle.ones(shape=(seq_len, seq_len), dtype=mask_type)) + - 1 + ) * 1e4 + return mask + + +def naive_attention_impl( + query, + key, + value, + cache_k=None, + cache_v=None, + pre_cache_k=None, + pre_cache_v=None, + mask=None, + scale=1.0, + cache_k_dequant_scales=None, + cache_v_dequant_scales=None, + use_cachekv_int8="None", +): + batch = query.shape[0] + heads = query.shape[1] + seq_len = query.shape[2] + head_dim = query.shape[3] + kv_head = key.shape[1] + + key = key.reshape([batch, kv_head, 1, seq_len, head_dim]) + key = paddle.tile(key, [1, 1, heads // kv_head, 1, 1]) + key = key.reshape([batch, heads, seq_len, head_dim]) + + if use_cachekv_int8 == "dynamic": + unsqueeze_shape = [2, 3] + elif use_cachekv_int8 == "static": + unsqueeze_shape = [0, 2, 3] + if pre_cache_k is not None: + key = paddle.concat([pre_cache_k, key], axis=2) + if cache_k is not None: + if cache_k_dequant_scales is not None: + dequant_cache_k = ( + (cache_k.astype('float32') - 128.0) + * cache_k_dequant_scales.unsqueeze(unsqueeze_shape) + ).astype(key.dtype) + key = paddle.concat([dequant_cache_k, key], axis=2) + else: + key = paddle.concat([cache_k, key], axis=2) + + value = value.reshape([batch, kv_head, 1, seq_len, head_dim]) + value = paddle.tile(value, [1, 1, heads // kv_head, 1, 1]) + value = value.reshape([batch, heads, seq_len, head_dim]) + if pre_cache_v is not None: + value = paddle.concat([pre_cache_v, value], axis=2) + if cache_v is not None: + if cache_v_dequant_scales is not None: + dequant_cache_v = ( + (cache_v.astype('float32') - 128.0) + * cache_v_dequant_scales.unsqueeze(unsqueeze_shape) + ).astype(value.dtype) + value = paddle.concat([dequant_cache_v, value], axis=2) + else: + value = paddle.concat([cache_v, value], axis=2) + qk_res = paddle.matmul(query, key, transpose_y=True) + attention = qk_res * scale + if mask is not None: + attention = attention + mask + softmax_result = paddle.nn.functional.softmax(attention, -1) + result = paddle.matmul(softmax_result, value) + return result + + +def get_padding_offset(bsz, max_seq_len, seq_lens_this_time): + cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time) + cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32") + cum_offsets[1:] = cum_offsets_now + token_num = paddle.sum(seq_lens_this_time) + padding_offsets = paddle.zeros(shape=(token_num), dtype="int32") + cu_seqlens_q = paddle.zeros(shape=(bsz + 1), dtype="int32") + cu_seqlens_k = paddle.zeros(shape=(bsz + 1), dtype="int32") + for i in range(bsz): + seq_len_now = seq_lens_this_time[i] + cum_offset = cum_offsets[i] + for j in range(seq_len_now): + padding_offsets[i * max_seq_len - cum_offset + j] = cum_offset + cum_seq_len = (i + 1) * max_seq_len - cum_offsets[i + 1] + cu_seqlens_q[i + 1] = cum_seq_len + cu_seqlens_k[i + 1] = cum_seq_len + return padding_offsets, cum_offsets[:-1], cu_seqlens_q, cu_seqlens_k + + +class RopeEmbedding: + def _rotary_position_embedding(self, seq_len, head_dim, dtype): + pos_seq = paddle.arange(0, seq_len, 1, dtype=dtype) + indices = paddle.arange(0, head_dim, 2, dtype=dtype) + indices = 1 / 10000 ** (indices / head_dim) + + sinusoid_inp = pos_seq.unsqueeze(1) * indices.unsqueeze(0) + pos_emb = paddle.concat( + [paddle.sin(sinusoid_inp), paddle.cos(sinusoid_inp)], axis=-1 + ) + pos_emb = paddle.reshape(pos_emb, (1, 1, seq_len, head_dim)) + pos_emb.stop_gradient = True + return pos_emb + + def _apply_rope(self, rp, q, k, v=None): + # sin [sequence_length, embed_size_per_head//2] + # cos [sequence_length, embed_size_per_head//2] + sin, cos = paddle.chunk(rp, 2, axis=-1) + # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + sin_pos = paddle.reshape(paddle.stack([sin, sin], axis=-1), rp.shape) + # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ0,θ1,θ1,θ2,θ2......θd/2-1,θd/2-1] + cos_pos = paddle.reshape(paddle.stack([cos, cos], axis=-1), rp.shape) + # rotate_half_query_layer [-q1,q0,-q3,q2......,-qd-1,qd-2] + rotate_half_q = paddle.reshape( + paddle.stack([-q[:, :, :, 1::2], q[:, :, :, 0::2]], axis=-1), + paddle.shape(q), + ) + query = paddle.add( + paddle.multiply(q, cos_pos), paddle.multiply(rotate_half_q, sin_pos) + ) + # rotate_half_key_layer [-k1,k0,-k3,k2......,-kd-1,kd-2] + rotate_half_k = paddle.reshape( + paddle.stack([-k[:, :, :, 1::2], k[:, :, :, 0::2]], axis=-1), + paddle.shape(k), + ) + key = paddle.add( + paddle.multiply(k, cos_pos), paddle.multiply(rotate_half_k, sin_pos) + ) + if v is not None: + # rotate_half_value_layer [-v1,v0,-v3,v2......,-vd-1,vd-2] + rotate_half_v = paddle.reshape( + paddle.stack([-v[:, :, :, 1::2], v[:, :, :, 0::2]], axis=-1), + paddle.shape(v), + ) + value = paddle.add( + paddle.multiply(v, cos_pos), + paddle.multiply(rotate_half_v, sin_pos), + ) + return query, key, value + return query, key + + def _apply_neox_rope(self, rp, q, k, v=None): + # sin [bs, sequence_length, embed_size_per_head//2] + # cos [bs, sequence_length, embed_size_per_head//2] + sin, cos = paddle.chunk(rp, 2, axis=-1) + + # sin [θ0,θ1,θ2......θd/2-1] -> sin_pos [θ0,θ1,θ2......θd/2-1, θ0,θ1,θ2......θd/2-1] + sin_pos = paddle.concat([sin, sin], axis=-1).squeeze(0).unsqueeze(1) + # cos [θ0,θ1,θ2......θd/2-1] -> cos_pos [θ0,θ1,θ2......θd/2-1, θ0,θ1,θ2......θd/2-1] + cos_pos = paddle.concat([cos, cos], axis=-1).squeeze(0).unsqueeze(1) + rotate_half_q = paddle.reshape( + paddle.concat( + [-q[:, :, :, sin.shape[-1] :], q[:, :, :, 0 : sin.shape[-1]]], + axis=-1, + ), + paddle.shape(q), + ) + query = paddle.add( + paddle.multiply(q, cos_pos), paddle.multiply(rotate_half_q, sin_pos) + ) + rotate_half_k = paddle.reshape( + paddle.concat( + [-k[:, :, :, sin.shape[-1] :], k[:, :, :, 0 : sin.shape[-1]]], + axis=-1, + ), + paddle.shape(k), + ) + key = paddle.add( + paddle.multiply(k, cos_pos), paddle.multiply(rotate_half_k, sin_pos) + ) + if v is not None: + rotate_half_v = paddle.reshape( + paddle.concat( + [ + -v[:, :, :, sin.shape[-1] :], + v[:, :, :, 0 : sin.shape[-1]], + ], + axis=-1, + ), + paddle.shape(v), + ) + value = paddle.add( + paddle.multiply(v, cos_pos), + paddle.multiply(rotate_half_v, sin_pos), + ) + return query, key, value + return query, key + + +def remove_padding(seq_lens, cu_seq_lens, inputs, token_num): + bsz, num_head, seq_len, dim_head = inputs.shape + output = paddle.zeros( + shape=[token_num, num_head * dim_head], dtype=inputs.dtype + ) + inputs = inputs.transpose([0, 2, 1, 3]).reshape([bsz, seq_len, -1]) + for i in range(bsz): + seq_len_now = seq_lens[i] + start_idx = cu_seq_lens[i] + end_idx = cu_seq_lens[i + 1] + output[start_idx:end_idx, :] = inputs[i, :seq_len_now, :] + return output + + +def block_cache_to_naive_cache( + cache_k, cache_v, bsz, block_tables, cache_seq_len +): + _, num_head, blocksize, dim_head = cache_k.shape + out_cache_k = paddle.zeros( + shape=[bsz, num_head, cache_seq_len, dim_head], dtype=cache_k.dtype + ) + out_cache_v = paddle.zeros( + shape=[bsz, num_head, cache_seq_len, dim_head], dtype=cache_v.dtype + ) + for i in range(bsz): + for j in range(cache_seq_len): + out_cache_k[i, :, j, :] = cache_k[ + block_tables[i, j // blocksize], :, j % blocksize, : + ] + out_cache_v[i, :, j, :] = cache_v[ + block_tables[i, j // blocksize], :, j % blocksize, : + ] + return out_cache_k, out_cache_v + + +class TestBlockMultiHeadAttnRoPEXPU(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.name = "TestBlockMultiHeadAttnRoPE" + self.place = paddle.XPUPlace(0) + self.batch_size = 2 + self.num_head = 8 + self.seq_len = 64 + self.max_dec_len = 64 + self.dim_head = 64 + self.hid_dim = self.num_head * self.dim_head + self.blocksize = 64 + self.block_num_per_seq = ( + self.seq_len + self.max_dec_len + self.blocksize - 1 + ) // self.blocksize + self.rope = RopeEmbedding() + self.max_block_num = self.block_num_per_seq * self.batch_size + self.free_list = list(range(self.max_block_num - 1, -1, -1)) + self.seq_lens_encoder = paddle.to_tensor( + [ + self.seq_len, + ] + * self.batch_size, + "int32", + ) + self.seq_lens_decoder = paddle.to_tensor( + [ + 0, + ] + * self.batch_size, + "int32", + ) + self.seq_lens_this_time = paddle.to_tensor( + [ + self.seq_len, + ] + * self.batch_size, + "int32", + ) + self.shape = ( + self.batch_size, + self.num_head, + self.seq_len, + self.dim_head, + ) + self.cache_shape = ( + self.max_block_num, + self.num_head, + self.blocksize, + self.dim_head, + ) + self.dtype = 'float16' + self.attention_mask = create_attn_mask( + self.dtype, + self.batch_size, + [ + self.seq_len, + ] + * self.batch_size, + ) + self.scale = 1.0 / np.sqrt(self.shape[-1]) + self.cache_k = paddle.zeros(shape=self.cache_shape, dtype=self.dtype) + self.cache_v = paddle.zeros(shape=self.cache_shape, dtype=self.dtype) + self.block_tables = paddle.zeros( + shape=(self.batch_size, self.block_num_per_seq), dtype="int32" + ) + self.cache_k_per_batch_maxs = paddle.zeros( + [self.batch_size, 6], dtype="float32" + ) + self.cache_v_per_batch_maxs = paddle.zeros( + [self.batch_size, 6], dtype="float32" + ) + for i in range(self.batch_size): + need_block_num = ( + self.seq_len + self.max_dec_len + self.blocksize - 1 + ) // self.blocksize + for j in range(need_block_num): + self.block_tables[i, j] = self.free_list.pop() + ( + self.padding_offset, + self.cum_offset, + self.cu_seqlens_q, + self.cu_seqlens_k, + ) = get_padding_offset( + self.batch_size, self.seq_len, self.seq_lens_this_time + ) + self.token_num = self.padding_offset.shape[0] + + def get_rotary_position_embedding(self, position_ids, head_dim): + bsz, max_seq_len = position_ids.shape[:2] + rot_emb = paddle.zeros( + (2, bsz, max_seq_len, 1, head_dim), dtype="float32" + ) + inv_freq = 10000 ** ( + -paddle.arange(0, head_dim, 2, dtype="float32") / head_dim + ) + + # shape: [B, S, D/2] + freqs = paddle.einsum( + "ij,k->ijk", position_ids.cast("float32"), inv_freq + ) + # shape: [B, S, D] + emb = paddle.concat([freqs, freqs], axis=-1).reshape( + (bsz, max_seq_len, head_dim) + ) + # emb = paddle.stack([freqs], axis=-1).reshape( + # (bsz, max_seq_len, head_dim // 2) + # ) + # shape: [B, S, 1, D] + emb = paddle.unsqueeze(emb, 2) + + rot_emb[0] = paddle.cos(emb) + rot_emb[1] = paddle.sin(emb) + return rot_emb + + def test_all(self): + paddle.disable_static() + tmp_position_ids = paddle.arange( + self.seq_len + self.max_dec_len + ).reshape((1, -1)) + self.rope_emb = self.get_rotary_position_embedding( + tmp_position_ids, self.dim_head + ) + # encoder + query = np.random.uniform(-1, 1, self.shape) + q = paddle.to_tensor( + query, place=self.place, dtype=self.dtype, stop_gradient=False + ) + key = np.random.uniform(-1, 1, self.shape) + k = paddle.to_tensor( + key, place=self.place, dtype=self.dtype, stop_gradient=False + ) + value = np.random.uniform(-1, 1, self.shape) + v = paddle.to_tensor( + value, place=self.place, dtype=self.dtype, stop_gradient=False + ) + qkv = paddle.stack( + [ + q.transpose([0, 2, 1, 3]).reshape( + [self.token_num, self.hid_dim] + ), + k.transpose([0, 2, 1, 3]).reshape( + [self.token_num, self.hid_dim] + ), + v.transpose([0, 2, 1, 3]).reshape( + [self.token_num, self.hid_dim] + ), + ], + axis=1, + ).reshape([self.token_num, -1]) + sinusoidal_pos = self.rope._rotary_position_embedding( + self.seq_len, self.dim_head, "float32" + ) + q, k = self.rope._apply_neox_rope( + sinusoidal_pos.astype("float16"), q, k + ) + + out_ = naive_attention_impl( + q, k, v, None, None, None, None, self.attention_mask, self.scale + ) + out_ = remove_padding( + self.seq_lens_this_time, self.cu_seqlens_q, out_, self.token_num + ) + out = block_multihead_attention_xpu( + qkv, + self.cache_k, + self.cache_v, + self.seq_lens_encoder, + self.seq_lens_decoder, + self.seq_lens_this_time, + self.padding_offset, + self.cum_offset, + self.cu_seqlens_q, + self.cu_seqlens_k, + self.block_tables, + self.cache_k_per_batch_maxs, + self.cache_v_per_batch_maxs, + None, # pre_key_cache + None, # pre_value_cache + None, # cache_k_quant_scales + None, # cache_v_quant_scales + None, # cache_k_dequant_scales + None, # cache_v_dequant_scales + None, # qkv_out_scale + None, # qkv_bias + None, # out_shift + None, # out_smooth + None, # max_enc_len_this_time + None, # max_dec_len_this_time + self.rope_emb, # rotary_embs + None, # attn_mask + None, # tgt_mask + self.seq_len, + self.blocksize, + True, # use_neox_rotary_style + )[0] + np.testing.assert_allclose( + out.numpy(), + out_.numpy(), + rtol=5e-02, + atol=1e-03, + ) + # decoder + naive_cache_k, naive_cache_v = block_cache_to_naive_cache( + self.cache_k, + self.cache_v, + self.batch_size, + self.block_tables, + self.seq_len, + ) + + self.seq_lens_decoder = self.seq_lens_encoder.clone() + self.seq_lens_encoder[:] = paddle.zeros_like(self.seq_lens_encoder) + self.seq_lens_this_time[:] = 1 + self.shape = ( + self.batch_size, + self.num_head, + 1, + self.dim_head, + ) + query = np.random.uniform(-1, 1, self.shape) + q = paddle.to_tensor( + query, place=self.place, dtype=self.dtype, stop_gradient=False + ) + key = np.random.uniform(-1, 1, self.shape) + k = paddle.to_tensor( + key, place=self.place, dtype=self.dtype, stop_gradient=False + ) + value = np.random.uniform(-1, 1, self.shape) + v = paddle.to_tensor( + value, place=self.place, dtype=self.dtype, stop_gradient=False + ) + + qkv = paddle.stack( + [ + q.transpose([0, 2, 1, 3]).reshape( + [self.batch_size, self.hid_dim] + ), + k.transpose([0, 2, 1, 3]).reshape( + [self.batch_size, self.hid_dim] + ), + v.transpose([0, 2, 1, 3]).reshape( + [self.batch_size, self.hid_dim] + ), + ], + axis=1, + ).reshape([self.batch_size, -1]) + + sinusoidal_pos = self.rope._rotary_position_embedding( + self.seq_len + 1, self.dim_head, "float32" + )[:, :, -1:, :] + q, k = self.rope._apply_neox_rope( + sinusoidal_pos.astype("float16"), q, k + ) + ( + self.padding_offset, + self.cum_offset, + self.cu_seqlens_q, + self.cu_seqlens_k, + ) = get_padding_offset(self.batch_size, 1, self.seq_lens_this_time) + out_ = ( + naive_attention_impl( + q, + k, + v, + naive_cache_k, + naive_cache_v, + None, + None, + None, + self.scale, + ) + .transpose([0, 2, 1, 3]) + .reshape([self.batch_size, -1]) + ) + out = block_multihead_attention_xpu( + qkv, + self.cache_k, + self.cache_v, + self.seq_lens_encoder, + self.seq_lens_decoder, + self.seq_lens_this_time, + self.padding_offset, + self.cum_offset, + self.cu_seqlens_q, + self.cu_seqlens_k, + self.block_tables, + self.cache_k_per_batch_maxs, + self.cache_v_per_batch_maxs, + None, # pre_key_cache + None, # pre_value_cache + None, # cache_k_quant_scales + None, # cache_v_quant_scales + None, # cache_k_dequant_scales + None, # cache_v_dequant_scales + None, # qkv_out_scale + None, # qkv_bias + None, # out_shift + None, # out_smooth + None, # max_enc_len_this_time + None, # max_dec_len_this_time + self.rope_emb, # rotary_embs + None, # attn_mask + None, # tgt_mask + self.seq_len + self.max_dec_len, # seq_len, + self.blocksize, + True, # use_neox_rotary_style + )[0] + # NOTE: The diff of decoder is a little big + np.testing.assert_allclose( + out.numpy(), + out_.numpy(), + rtol=5e-02, + atol=5e-02, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/xpu/test_collective_api_base.py b/test/xpu/test_collective_api_base.py index 0c3d710a06335..c94061d5fc6d1 100644 --- a/test/xpu/test_collective_api_base.py +++ b/test/xpu/test_collective_api_base.py @@ -202,7 +202,7 @@ def setUp(self): self._trainers = 2 self._ps_endpoints = f"127.0.0.1:{self._find_free_port()},127.0.0.1:{self._find_free_port()}" self._python_interp = sys.executable - self._master_endpoints = "127.0.0.1:%s" % (self._find_free_port()) + self._master_endpoints = f"127.0.0.1:{self._find_free_port()}" self.temp_dir = tempfile.TemporaryDirectory() @@ -300,15 +300,15 @@ def _run_cluster(self, model_file, envs): tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() - sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err) - sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err) + sys.stderr.write(f'trainer 0 stderr: {tr0_err}\n') + sys.stderr.write(f'trainer 1 stderr: {tr1_err}\n') # close trainer file tr0_pipe.close() tr1_pipe.close() with open(path0, "r") as f: - sys.stderr.write('trainer 0 stderr file: %s\n' % f.read()) + sys.stderr.write(f'trainer 0 stderr file: {f.read()}\n') with open(path1, "r") as f: - sys.stderr.write('trainer 1 stderr file: %s\n' % f.read()) + sys.stderr.write(f'trainer 1 stderr file: {f.read()}\n') def load_and_remove(path): with open(path, 'rb') as f: diff --git a/test/xpu/test_collective_base_xpu.py b/test/xpu/test_collective_base_xpu.py index 8a3289f0eb02a..c6cd081b498d7 100644 --- a/test/xpu/test_collective_base_xpu.py +++ b/test/xpu/test_collective_base_xpu.py @@ -244,8 +244,8 @@ def _run_cluster(self, model_file, envs): tr0_out, tr0_err = tr0_proc.communicate() tr1_out, tr1_err = tr1_proc.communicate() - sys.stderr.write('trainer 0 stderr: %s\n' % tr0_err) - sys.stderr.write('trainer 1 stderr: %s\n' % tr1_err) + sys.stderr.write(f'trainer 0 stderr: {tr0_err}\n') + sys.stderr.write(f'trainer 1 stderr: {tr1_err}\n') # close trainer file tr0_pipe.close() tr1_pipe.close() diff --git a/test/xpu/test_conv2d_op_xpu.py b/test/xpu/test_conv2d_op_xpu.py index df36f226408eb..4c7419ae9e5fd 100644 --- a/test/xpu/test_conv2d_op_xpu.py +++ b/test/xpu/test_conv2d_op_xpu.py @@ -36,14 +36,14 @@ def conv2d_forward_naive( ): if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if data_format not in ["NCHW", "NHWC"]: raise ValueError( - "Unknown Attr(data_format): '%s' ." - "It can only be 'NCHW' or 'NHWC'." % str(data_format) + f"Unknown Attr(data_format): '{str(data_format)}' ." + "It can only be 'NCHW' or 'NHWC'." ) channel_last = data_format == "NHWC" diff --git a/test/xpu/test_conv2d_transpose_op_xpu.py b/test/xpu/test_conv2d_transpose_op_xpu.py index 57c564335fbc1..1728889827992 100644 --- a/test/xpu/test_conv2d_transpose_op_xpu.py +++ b/test/xpu/test_conv2d_transpose_op_xpu.py @@ -31,8 +31,8 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): padding_algorithm = attrs['padding_algorithm'] if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if attrs['data_format'] == 'NHWC': diff --git a/test/xpu/test_conv3d_op_xpu.py b/test/xpu/test_conv3d_op_xpu.py index 021c57821c12d..26582b4e1b2c5 100644 --- a/test/xpu/test_conv3d_op_xpu.py +++ b/test/xpu/test_conv3d_op_xpu.py @@ -31,14 +31,14 @@ def conv3d_forward_naive( ): if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if data_format not in ["NCDHW", "NDHWC"]: raise ValueError( - "Unknown Attr(data_format): '%s' ." - "It can only be 'NCDHW' or 'NDHWC'." % str(data_format) + f"Unknown Attr(data_format): '{str(data_format)}' ." + "It can only be 'NCDHW' or 'NDHWC'." ) channel_last = data_format == "NDHWC" diff --git a/test/xpu/test_depthwise_conv2d_transpose_op_xpu.py b/test/xpu/test_depthwise_conv2d_transpose_op_xpu.py index 96077ae8c83d0..878519fbd507d 100644 --- a/test/xpu/test_depthwise_conv2d_transpose_op_xpu.py +++ b/test/xpu/test_depthwise_conv2d_transpose_op_xpu.py @@ -31,8 +31,8 @@ def depthwiseconv2dtranspose_forward_naive(input_, filter_, attrs): padding_algorithm = attrs['padding_algorithm'] if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if attrs['data_format'] == 'NHWC': diff --git a/test/xpu/test_parallel_dygraph_dataparallel.py b/test/xpu/test_parallel_dygraph_dataparallel.py index 0070f8ade9802..3eed21553b7a5 100644 --- a/test/xpu/test_parallel_dygraph_dataparallel.py +++ b/test/xpu/test_parallel_dygraph_dataparallel.py @@ -73,9 +73,11 @@ def start_local_trainers( for t in pod.trainers: proc_env = { "PADDLE_DISTRI_BACKEND": "bkcl", - "FLAGS_selected_xpus": "%s" % ",".join([str(g) for g in t.gpus]), + "FLAGS_selected_xpus": "{}".format( + ",".join([str(g) for g in t.gpus]) + ), "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}", "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), } diff --git a/test/xpu/test_pool2d_op_xpu.py b/test/xpu/test_pool2d_op_xpu.py index f62ffb4fc45a6..1d3c1def63bfb 100644 --- a/test/xpu/test_pool2d_op_xpu.py +++ b/test/xpu/test_pool2d_op_xpu.py @@ -172,8 +172,8 @@ def _get_padding_with_SAME(input_shape, pool_size, pool_stride): padding_algorithm = padding_algorithm.upper() if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if padding_algorithm == "VALID": diff --git a/test/xpu/test_pool3d_op_xpu.py b/test/xpu/test_pool3d_op_xpu.py index 865029ad0d07d..01dd6d77b2b86 100644 --- a/test/xpu/test_pool3d_op_xpu.py +++ b/test/xpu/test_pool3d_op_xpu.py @@ -68,8 +68,8 @@ def _get_padding_with_SAME(input_shape, pool_size, pool_stride): padding_algorithm = padding_algorithm.upper() if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: raise ValueError( - "Unknown Attr(padding_algorithm): '%s'. " - "It can only be 'SAME' or 'VALID'." % str(padding_algorithm) + f"Unknown Attr(padding_algorithm): '{str(padding_algorithm)}'. " + "It can only be 'SAME' or 'VALID'." ) if padding_algorithm == "VALID": diff --git a/test/xpu/test_swiglu_op_xpu.py b/test/xpu/test_swiglu_op_xpu.py new file mode 100644 index 0000000000000..35d8350c85e26 --- /dev/null +++ b/test/xpu/test_swiglu_op_xpu.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +import paddle.nn.functional as F +from paddle.incubate.nn.functional import swiglu as fused_swiglu_impl + + +def swiglu(x, y, out_grad): + if isinstance(x, np.ndarray): + x = paddle.to_tensor(x) + y = paddle.to_tensor(y) + out_grad = paddle.to_tensor(out_grad) + + origin_x = x.detach().clone() + origin_x.stop_gradient = False + x = origin_x + + origin_y = y.detach().clone() + origin_y.stop_gradient = False + y = origin_y + + dtype = x.dtype + need_convert = False + assert dtype == y.dtype + output_dtype = dtype + + out = F.silu(x) * y + if need_convert: + out = out.astype(dtype) + out.backward(out_grad) + ret = [ + out.astype(output_dtype), + origin_x.grad.astype(output_dtype), + origin_y.grad.astype(output_dtype), + ] + return ret + + +def fused_swiglu(x, y, out_grad): + x = x.detach().clone() + x.stop_gradient = False + if y is not None: + y = y.detach().clone() + y.stop_gradient = False + out = fused_swiglu_impl(x, y) + out.backward(out_grad) + + output_dtype = x.dtype + ret = [ + out.astype(output_dtype), + ] + if y is not None: + x_grad, y_grad = x.grad, y.grad + else: + x_grad, y_grad = paddle.split(x.grad, 2, axis=-1) + + ret.append(x_grad.astype(output_dtype)) + ret.append(y_grad.astype(output_dtype)) + return ret + + +tol_map = { + paddle.float64: [1e-8, 1e-8], + paddle.float32: [1e-6, 1e-6], + paddle.float16: [1e-3, 1e-3], + paddle.bfloat16: [1e-2, 1e-2], +} + + +class TestSwiGLUDygraph(unittest.TestCase): + def setUp(self): + self.init_case() + self.seed = 1234 + + def init_case(self): + self.shape = [] + self.shape.append([8, 100]) + self.shape.append([4, 102]) + + def check_dygraph_impl(self, device, shape, dtype): + x = paddle.randn(shape, dtype=dtype) + y = paddle.randn(shape, dtype=dtype) + out_grad = paddle.randn(shape, dtype=dtype) + + ret1 = swiglu(x, y, out_grad) + ret2 = fused_swiglu(x, y, out_grad) + ret3 = fused_swiglu(paddle.concat([x, y], axis=-1), None, out_grad) + + atol, rtol = tol_map[dtype] + err_msg = ( + f"Failed when device = {device}, dtype = {dtype}, shape = {shape}" + ) + for t1, t2, t3 in zip(ret1, ret2, ret3): + t1, t2, t3 = t1.numpy(), t2.numpy(), t3.numpy() + np.testing.assert_allclose( + t1, t2, atol=atol, rtol=rtol, err_msg=err_msg + ) + np.testing.assert_equal(t2, t3, err_msg=err_msg) + + def check_dygraph(self, shape): + metas = [] + metas.append(('xpu', paddle.float32)) + metas.append(('xpu', paddle.float64)) + # Enable in KL3 + # metas.append(('xpu', paddle.float16)) + # metas.append(('xpu', paddle.bfloat16)) + + for device, dtype in metas: + origin_device = paddle.get_device() + paddle.set_device(device) + for with_split in [True]: + self.check_dygraph_impl(device, shape, dtype) + paddle.set_device(origin_device) + + def check_static_graph(self, shape, dtype="float32"): + x = paddle.static.data(name='x', shape=shape, dtype=dtype) + y = paddle.static.data(name='y', shape=shape, dtype=dtype) + concated_x = paddle.static.data( + name='concated_x', + shape=list(shape[:-1]) + [shape[-1] * 2], + dtype=dtype, + ) + out1 = fused_swiglu_impl(x, y) + out2 = fused_swiglu_impl(concated_x) + + concated_x_np = np.random.random(concated_x.shape).astype(dtype) + x_np, y_np = np.split(concated_x_np, 2, axis=-1) + + exe = paddle.static.Executor() + t1, t2 = exe.run( + feed={'x': x_np, 'y': y_np, 'concated_x': concated_x_np}, + fetch_list=[out1, out2], + ) + np.testing.assert_equal(out1, out2) + + def check_main(self, shape): + self.check_dygraph(shape) + paddle.enable_static() + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + self.check_static_graph(shape) + paddle.disable_static() + + def test_main(self): + for i in self.shape: + self.check_main(i) + + +class TestSwigluOp(TestSwiGLUDygraph): + def init_case(self): + self.shape = [[1, 4096, 1376], [1, 4096, 11008]] + + +if __name__ == "__main__": + unittest.main() diff --git a/test/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py index 133c9b1302013..ac5e2df75b46f 100644 --- a/test/xpu/test_zero_dim_tensor_xpu.py +++ b/test/xpu/test_zero_dim_tensor_xpu.py @@ -345,7 +345,7 @@ def test_dygraph_binary(self): # 1) x is 0D, y is 0D x_np = np.random.randint(-10, 10, []) y_np = np.random.randint(-10, 10, []) - out_np = eval('np.%s(x_np, y_np)' % api.__name__) + out_np = eval(f'np.{api.__name__}(x_np, y_np)') x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) @@ -357,7 +357,7 @@ def test_dygraph_binary(self): # 2) x is ND, y is 0D x_np = np.random.randint(-10, 10, [3, 5]) y_np = np.random.randint(-10, 10, []) - out_np = eval('np.%s(x_np, y_np)' % api.__name__) + out_np = eval(f'np.{api.__name__}(x_np, y_np)') x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) @@ -369,7 +369,7 @@ def test_dygraph_binary(self): # 3) x is 0D , y is ND x_np = np.random.randint(-10, 10, []) y_np = np.random.randint(-10, 10, [3, 5]) - out_np = eval('np.%s(x_np, y_np)' % api.__name__) + out_np = eval(f'np.{api.__name__}(x_np, y_np)') x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) diff --git a/third_party/onednn b/third_party/onednn index 01204edbda1c2..0fb7e6ed4f32e 160000 --- a/third_party/onednn +++ b/third_party/onednn @@ -1 +1 @@ -Subproject commit 01204edbda1c2a4ff0cccd40476ed6bd2fb62d56 +Subproject commit 0fb7e6ed4f32e5d89832b2bd742bbf834cd296ed diff --git a/tools/CheckPRTemplate.py b/tools/CheckPRTemplate.py index 1cc601dba0a29..a3a350d107af6 100644 --- a/tools/CheckPRTemplate.py +++ b/tools/CheckPRTemplate.py @@ -79,7 +79,7 @@ def parameter_accuracy(body): for i in value: i = i.strip().lower() if i not in test_list_lower: - single_mess += '%s.' % i + single_mess += f'{i}.' if len(single_mess) != 0: message += f'{key} should be in {test_list}. but now is [{single_mess}].' return message diff --git a/tools/CrossStackProfiler/CspFileReader.py b/tools/CrossStackProfiler/CspFileReader.py index 28038b5c76d3b..5802edc965cca 100755 --- a/tools/CrossStackProfiler/CspFileReader.py +++ b/tools/CrossStackProfiler/CspFileReader.py @@ -108,7 +108,7 @@ def printArgs(self): def _checkArgsKey(self, key, type): if key not in self._args: - raise KeyError("args should has key [%s]!" % key) + raise KeyError(f"args should has key [{key}]!") if not isinstance(self._args[key], type): raise TypeError( @@ -130,17 +130,14 @@ def _checkArgs(self): or self._organizeForm == FILEORGANIZEFORM_BYOTHER ): raise NotImplementedError( - "we have not known how to process this form of file [%s]!" - % self._organizeForm + f"we have not known how to process this form of file [{self._organizeForm}]!" ) self._checkArgsKey("gpuPerTrainer", int) self._checkArgsKey("dataPath", str) if not os.path.exists(self._dataPath): - raise OSError( - "input data path [%s] not existed!" % (self._dataPath) - ) + raise OSError(f"input data path [{self._dataPath}] not existed!") self._checkArgsKey("groupSize", int) self._checkArgsKey("displaySize", int) @@ -183,8 +180,7 @@ def _getFileList(self): newFileList.append(file) else: raise NotImplementedError( - "[%s] is repeated by id, we don not how to process it!" - % file + f"[{file}] is repeated by id, we don not how to process it!" ) if not self._fileList: @@ -201,7 +197,7 @@ def _sortBySuffix(elem): if not self._fileList: self._logger.warning( - "we can not find any file in dir [%s]!" % self._dataPath + f"we can not find any file in dir [{self._dataPath}]!" ) else: self._logger.info( @@ -215,12 +211,11 @@ def _sortBySuffix(elem): def _getId(self, fileName, organizeForm, sed="."): if self._organizeForm != organizeForm: raise TypeError( - "Can not get rank id when organizer form is not %s!" - % organizeForm + f"Can not get rank id when organizer form is not {organizeForm}!" ) if not os.path.isfile(fileName): - raise OSError("[%s] is not a valid file!" % (fileName)) + raise OSError(f"[{fileName}] is not a valid file!") try: prefix_str = fileName.split(sed)[-1] @@ -228,13 +223,12 @@ def _getId(self, fileName, organizeForm, sed="."): return int(prefix_str) except ValueError as e: print(e) - raise TypeError("invalid fileName [%s]" % fileName) + raise TypeError(f"invalid fileName [{fileName}]") except IndexError as e: print(e) raise TypeError( - "invalid fileName [%s], the prefix should be a number!" - % fileName + f"invalid fileName [{fileName}], the prefix should be a number!" ) def getRankId(self, fileName, sed="."): @@ -298,19 +292,15 @@ def getDcgmInfoDict(self, groupId, gpuId, tmpPath="./tmp"): def getDict(self, name, groupId, gpuId, tmpPath="./tmp"): fileName = self.getFileName(name, groupId, gpuId, tmpPath) if not os.path.isfile(fileName): - raise OSError("[%s] is not existed!" % fileName) + raise OSError(f"[{fileName}] is not existed!") data = {} with open(fileName, "r") as rf: try: data = json.load(rf) except Exception: - self._logger.error( - "read [%s] error. not a json file!" % (fileName) - ) - raise TypeError( - "read [%s] error. not a json file!" % (fileName) - ) + self._logger.error(f"read [{fileName}] error. not a json file!") + raise TypeError(f"read [{fileName}] error. not a json file!") return data def dumpOpInfoDict( @@ -344,7 +334,7 @@ def dumpDict( fileObject = open(fileName, 'w') fileObject.write(jsObj) fileObject.close() - self._logger.info("dump [%s] successfully!" % fileName) + self._logger.info(f"dump [{fileName}] successfully!") def getLogger(): diff --git a/tools/CrossStackProfiler/DCGMFileReader.py b/tools/CrossStackProfiler/DCGMFileReader.py index f462ce5c9ad5e..eb31ad7820a78 100755 --- a/tools/CrossStackProfiler/DCGMFileReader.py +++ b/tools/CrossStackProfiler/DCGMFileReader.py @@ -88,7 +88,7 @@ def parseFileByGroup(self, groupId, processNum=8): def _parseTask(self, taskList, q=None): is_first = True for fileName in taskList: - self._logger.info("I am processing %s!" % fileName) + self._logger.info(f"I am processing {fileName}!") tmp_data = self._parseSingleFile(fileName) if tmp_data is None: continue @@ -103,7 +103,7 @@ def _parseTask(self, taskList, q=None): dcgm_data = dcgm_data.dropna() if q is not None: q.put(dcgm_data) - self._logger.info("I finish processing %s!" % fileName) + self._logger.info(f"I finish processing {fileName}!") return dcgm_data def _parseSingleFile(self, fileName): @@ -192,7 +192,7 @@ def _getDCGMTraceInfoByGpuId( di = {} # name = "%s_%d" % (metric, trainerId) - name = "%s" % (metric) + name = f"{metric}" di['name'] = name di['pid'] = pid_map[metric] di['ts'] = self._align_ts(int(row['ts'])) diff --git a/tools/CrossStackProfiler/ProfileFileReader.py b/tools/CrossStackProfiler/ProfileFileReader.py index af955bd6652c4..266e9e5cf706d 100755 --- a/tools/CrossStackProfiler/ProfileFileReader.py +++ b/tools/CrossStackProfiler/ProfileFileReader.py @@ -46,7 +46,7 @@ def _parseTask(self, taskList, q=None): profile_dict["trainerRank.%03d" % (rankId)] = self._parseSingleFile( fileName ) - self._logger.info("I finish processing %s!" % fileName) + self._logger.info(f"I finish processing {fileName}!") if q is not None: q.put(profile_dict) diff --git a/tools/analysisPyXml.py b/tools/analysisPyXml.py index 2f2d8b472c566..9d9ec062180cb 100644 --- a/tools/analysisPyXml.py +++ b/tools/analysisPyXml.py @@ -31,7 +31,7 @@ def analysisPyXml(rootPath, ut): for clazz in root.findall('packages/package/classes/class'): clazz_filename = clazz.attrib.get('filename') if not clazz_filename.startswith('/paddle'): - clazz_filename = '/paddle/%s' % clazz_filename + clazz_filename = f'/paddle/{clazz_filename}' for line in clazz.findall('lines/line'): line_hits = int(line.attrib.get('hits')) if line_hits != 0: diff --git a/tools/analysis_build_time.py b/tools/analysis_build_time.py index 6ae3ee6bbacc1..ae340a1bcfe03 100644 --- a/tools/analysis_build_time.py +++ b/tools/analysis_build_time.py @@ -33,10 +33,10 @@ def getUsefulBuildTimeFile(filename): def analysisBuildTime(): - filename = '%s/build/build-time' % root_path + filename = f'{root_path}/build/build-time' getUsefulBuildTimeFile(filename) - os.system('rm -rf %s/tools/tempbuildTime.txt' % root_path) - with open('%s/tools/analysis_build_time' % root_path, 'r') as f: + os.system(f'rm -rf {root_path}/tools/tempbuildTime.txt') + with open(f'{root_path}/tools/analysis_build_time', 'r') as f: lines = f.readlines() for line in lines: try: diff --git a/tools/auto_parallel/ci_auto_parallel.sh b/tools/auto_parallel/ci_auto_parallel.sh index ab7a3c60c5874..2fbb47ec37112 100644 --- a/tools/auto_parallel/ci_auto_parallel.sh +++ b/tools/auto_parallel/ci_auto_parallel.sh @@ -34,7 +34,7 @@ cd ${paddle_dir} # get the location of "test/auto_parallel" in target_lists_for_semi_auto_ci count=0 for element in "${target_lists_for_semi_auto_ci[@]}";do - if [[ "$element" == "test/auto_parallel" ]]; then + if [[ "$element" == "test/auto_parallel" ]]; then test_auto_num=$count break fi @@ -43,7 +43,7 @@ done # get the location of "test/collective/hybrid_strategy" in target_lists_for_dygraph_ci count=0 for element in "${target_lists_for_dygraph_ci[@]}";do - if [[ "$element" == "test/collective/hybrid_strategy" ]]; then + if [[ "$element" == "test/collective/hybrid_strategy" ]]; then test_dygraph_num=$count break fi @@ -64,7 +64,7 @@ for file_name in `git diff --numstat upstream/${AGILE_COMPILE_BRANCH} |awk '{pri elif [[ ${file_name##*.} == "md" ]] || [[ ${file_name##*.} == "rst" ]] || [[ ${dir1} == "docs" ]];then continue else - # The most auto unittests have been monitored in PR-CI-Distribute-stable, + # The most auto unittests have been monitored in PR-CI-Distribute-stable, # while the other tests of llama model will be executed in PR-CI-Auto-Parallel. for ((i=0; i<${#target_lists_for_semi_auto_ci[@]}; i++)); do if [[ $i != ${test_auto_num} ]] && [[ ${file_item} == *${target_lists_for_semi_auto_ci[i]}* ]];then @@ -122,7 +122,7 @@ get_diff_TO_case #################### if [[ "${case_list[*]}" == *"gpt-3_auto"* ]] && [[ "${case_list[*]}" == *"gpt-3_auto_pir"* ]]; then - echo "同时命中gpt-3_auto 和 gpt-3_auto_pir, 只执行新ir, 不执行旧ir" + echo "同时命中gpt-3_auto 和 gpt-3_auto_pir, 只执行新ir, 不执行旧ir" case_list=("${case_list[@]/*gpt-3_auto_pir*/}") case_list=("${case_list[@]/*gpt-3_auto*/}") case_list[${#case_list[*]}]=gpt-3_auto_pir @@ -135,7 +135,7 @@ if [[ ${#case_list[*]} -ne 0 ]];then echo -e "\033[31m ---- case_list length: ${#case_list[*]}, cases: ${case_list[*]} \033" echo -e "\033[31m ============================= \033" set +e - + # Install paddle install_paddle case_num=1 diff --git a/tools/check_added_ut.sh b/tools/check_added_ut.sh index b036c08e1d93e..6d422774d12ed 100644 --- a/tools/check_added_ut.sh +++ b/tools/check_added_ut.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -32,7 +32,7 @@ if [[ "$SYSTEM" == "Linux" ]] || [[ "$SYSTEM" == "Darwin" ]];then cp $PADDLE_ROOT/paddle/scripts/paddle_build.sh $PADDLE_ROOT/paddle/scripts/paddle_build_pre.sh elif [[ "$SYSTEM" == "Windows_NT" ]];then git remote | grep upstream - if [ $? != 0 ]; then + if [ $? != 0 ]; then git remote add upstream https://github.com/PaddlePaddle/Paddle.git fi git fetch upstream ${BRANCH} diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh index 4a8e7cf708994..7819072687da7 100644 --- a/tools/check_api_approvals.sh +++ b/tools/check_api_approvals.sh @@ -40,12 +40,18 @@ function add_failed(){ api_params_diff=`python ${PADDLE_ROOT}/tools/check_api_compatible.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec` api_spec_diff=`python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec.api ${PADDLE_ROOT}/paddle/fluid/API_PR.spec.api` +api_annotation_diff=`python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec.annotations ${PADDLE_ROOT}/paddle/fluid/API_PR.spec.annotations` if [ "$api_spec_diff" != "" -o "${api_params_diff}" != "" ]; then echo_line="You must have one RD (XiaoguangHu01, jeff41404, lanxianghit or qingqing01) approval for API change.\n" check_approval 1 XiaoguangHu01 jeff41404 lanxianghit qingqing01 fi +if [ "$api_annotation_diff" != "" ]; then + echo_line="You must have one member of Typing group (SigureMo, megemini, zrr1999, sunzhongkai588, luotao1) approval for API annotation change.\n" + check_approval 1 SigureMo megemini zrr1999 sunzhongkai588 luotao1 +fi + api_yaml_diff=`python ${PADDLE_ROOT}/tools/check_api_yaml_same.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec ${BRANCH} ${PADDLE_ROOT}` if [ "$api_yaml_diff" != "" ]; then echo_line="API's name and params should be consistent with op's name and params in yaml. @@ -133,7 +139,7 @@ if [ -n "${echo_list}" ];then echo "**************************************************************" # L40 L48 L62 has fetch the result out, but there are splitted. - if [ "${api_spec_diff}" != "" -o "${api_doc_spec_diff}" != "" ] ; then + if [ "${api_spec_diff}" != "" -o "${api_annotation_diff}" != "" ] ; then python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec fi if [ "${api_params_diff}" != "" ] ; then diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index d637c4f0c3b82..c844c09565da3 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -21,11 +21,12 @@ fi PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../" && pwd )" # If you want to add monitoring file modifications, please perform the. github/CODEOWNERS operation -API_FILES=("tools/print_signatures.py" - "tools/sampcd_processor.py" - "tools/check_pr_approval.py" - "tools/checkout_api_compatible.py" - ) +API_FILES=( + "tools/print_signatures.py" + "tools/sampcd_processor.py" + "tools/check_pr_approval.py" + "tools/checkout_api_compatible.py" +) approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000` git_files=`git diff --numstat upstream/$BRANCH| wc -l` diff --git a/tools/check_op_benchmark_result.py b/tools/check_op_benchmark_result.py index ca3df4bb99eef..82f7967133576 100644 --- a/tools/check_op_benchmark_result.py +++ b/tools/check_op_benchmark_result.py @@ -21,7 +21,7 @@ def check_path_exists(path): """Assert whether file/directory exists.""" - assert os.path.exists(path), "%s does not exist." % path + assert os.path.exists(path), f"{path} does not exist." def parse_case_name(log_file_name): @@ -48,7 +48,7 @@ def parse_log_file(log_file): pass # do nothing if result is None: - logging.warning("Parse %s fail!" % log_file) + logging.warning(f"Parse {log_file} fail!") return result @@ -81,29 +81,29 @@ def check_speed_result(case_name, develop_data, pr_data, pr_result): develop_total_time = develop_data.get("total") total_time_diff = (pr_total_time - develop_total_time) / develop_total_time - logging.info("------ OP: %s ------" % case_name) + logging.info(f"------ OP: {case_name} ------") logging.info( f"GPU time change: {gpu_time_diff_str} (develop: {develop_gpu_time:.7f} -> PR: {pr_gpu_time:.7f})" ) logging.info( f"Total time change: {total_time_diff * 100:.5f}% (develop: {develop_total_time:.7f} -> PR: {pr_total_time:.7f})" ) - logging.info("backward: %s" % pr_result.get("backward")) + logging.info("backward: {}".format(pr_result.get("backward"))) logging.info("parameters:") for line in pr_result.get("parameters").strip().split("\n"): - logging.info("\t%s" % line) + logging.info(f"\t{line}") return gpu_time_diff > 0.05 def check_accuracy_result(case_name, pr_result): """Check accuracy result.""" - logging.info("------ OP: %s ------" % case_name) - logging.info("Accuracy diff: %s" % pr_result.get("diff")) - logging.info("backward: %s" % pr_result.get("backward")) + logging.info(f"------ OP: {case_name} ------") + logging.info("Accuracy diff: {}".format(pr_result.get("diff"))) + logging.info("backward: {}".format(pr_result.get("backward"))) logging.info("parameters:") for line in pr_result.get("parameters").strip().split("\n"): - logging.info("\t%s" % line) + logging.info(f"\t{line}") return not pr_result.get("consistent") @@ -154,11 +154,11 @@ def update_api_info_file(fail_case_list, api_info_file): def summary_results(check_results, api_info_file): """Summary results and return sys.exit code.""" for case_name in check_results["speed"]: - logging.error("Check speed result with case \"%s\" failed." % case_name) + logging.error(f"Check speed result with case \"{case_name}\" failed.") for case_name in check_results["accuracy"]: logging.error( - "Check accuracy result with case \"%s\" failed." % case_name + f"Check accuracy result with case \"{case_name}\" failed." ) if len(check_results["speed"]) and api_info_file: diff --git a/tools/check_sequence_op.sh b/tools/check_sequence_op.sh index 35357476a3224..51a482c3e9306 100644 --- a/tools/check_sequence_op.sh +++ b/tools/check_sequence_op.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/ci_op_benchmark.sh b/tools/ci_op_benchmark.sh index 58e327327e6ad..93eb52a4f16aa 100644 --- a/tools/ci_op_benchmark.sh +++ b/tools/ci_op_benchmark.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -333,6 +333,6 @@ fi case $1 in run_op_benchmark) prepare_env - gpu_op_benchmark + gpu_op_benchmark ;; esac diff --git a/tools/cinn/ci_build.sh b/tools/cinn/ci_build.sh index 19aef611d7158..18e133fb1bfe6 100755 --- a/tools/cinn/ci_build.sh +++ b/tools/cinn/ci_build.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 CINN Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/cinn/docker/Dockerfile b/tools/cinn/docker/Dockerfile index fcbe406ea46af..180e8ff78dd38 100644 --- a/tools/cinn/docker/Dockerfile +++ b/tools/cinn/docker/Dockerfile @@ -16,12 +16,12 @@ ENV HOME /root RUN apt-get update && \ apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ - apt-get install -y curl wget vim git unzip unrar tar xz-utils bzip2 gzip \ + apt-get install -y curl wget vim git unzip unrar tar xz-utils bzip2 gzip \ coreutils ntp language-pack-zh-hans python-qt4 libsm6 libxext6 libxrender-dev # Downgrade gcc&&g++ -WORKDIR /usr/bin +WORKDIR /usr/bin RUN apt-get update --fix-missing COPY script_build /script_build RUN bash /script_build/install_gcc.sh gcc82 && rm -rf /script_build && \ @@ -30,7 +30,7 @@ RUN bash /script_build/install_gcc.sh gcc82 && rm -rf /script_build && \ ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ && \ ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc && \ ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ -ENV PATH=/usr/local/gcc-8.2/bin:$PATH +ENV PATH=/usr/local/gcc-8.2/bin:$PATH RUN apt-get update && \ apt-get install -y python3.6 python3.6-dev python3.6-venv && \ @@ -43,8 +43,8 @@ RUN wget -q https://cmake.org/files/v3.20/cmake-3.20.0-linux-x86_64.tar.gz && ta ENV PATH=/home/cmake-3.20.0-linux-x86_64/bin:$PATH # remove them when apt-get support 2.27 and higher version -RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ - tar -xzf binutils-2.33.1.tar.gz && \ +RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ + tar -xzf binutils-2.33.1.tar.gz && \ cd binutils-2.33.1 && \ ./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz @@ -99,7 +99,7 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -# For CINN environment +# For CINN environment RUN apt update --fix-missing && \ apt install autoconf autogen libtool zlib1g-dev sudo libginac-dev clang cmake -y && \ apt remove python3-six python-six -y && \ diff --git a/tools/cinn/docker/Dockerfile.ci b/tools/cinn/docker/Dockerfile.ci index 53e1bbf64ec51..c91ecbb3641d5 100644 --- a/tools/cinn/docker/Dockerfile.ci +++ b/tools/cinn/docker/Dockerfile.ci @@ -1,5 +1,5 @@ # Use SHA to specify the docker image to prevent the use of old cache images -FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 +FROM registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 # NVIDIA update GPG key on 04/29/2022. Fetch the public key for CI machine # Reference: https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ diff --git a/tools/cinn/docker/script_build/install_gcc.sh b/tools/cinn/docker/script_build/install_gcc.sh index e744e9ddac66e..46470b179ad88 100644 --- a/tools/cinn/docker/script_build/install_gcc.sh +++ b/tools/cinn/docker/script_build/install_gcc.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,7 +30,7 @@ else fi if [ "$1" == "gcc82" ]; then - wget -q https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz + wget -q https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz tar -xvf gcc-8.2.0.tar.xz && \ cd gcc-8.2.0 && \ unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \ @@ -39,12 +39,12 @@ if [ "$1" == "gcc82" ]; then ../gcc-8.2.0/configure --prefix=/usr/local/gcc-8.2 --enable-threads=posix --disable-checking --disable-multilib && \ make -j8 && make install cd .. && rm -rf temp_gcc82 - cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && + cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && ln -s /usr/local/gcc-8.2/lib64/libgfortran.so.5 ${lib_so_5} && \ ln -s /usr/local/gcc-8.2/lib64/libstdc++.so.6 ${lib_so_6} && \ cp /usr/local/gcc-8.2/lib64/libstdc++.so.6.0.25 ${lib_path} elif [ "$1" == "gcc54" ]; then - wget -q http://ftp.tsukuba.wide.ad.jp/software/gcc/releases/gcc-5.4.0/gcc-5.4.0.tar.bz2 + wget -q http://ftp.tsukuba.wide.ad.jp/software/gcc/releases/gcc-5.4.0/gcc-5.4.0.tar.bz2 tar -xvf gcc-5.4.0.tar.bz2 && \ cd gcc-5.4.0 && \ unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \ @@ -53,7 +53,7 @@ elif [ "$1" == "gcc54" ]; then ../gcc-5.4.0/configure --prefix=/usr/local/gcc-5.4 --enable-checking=release --enable-languages=c,c++ --disable-multilib && \ make -j8 && make install cd .. && rm -rf temp_gcc54 - cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && + cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && ln -s /usr/local/gcc-5.4/lib64/libgfortran.so.5 ${lib_so_5} && \ ln -s /usr/local/gcc-5.4/lib64/libstdc++.so.6 ${lib_so_6} && \ cp /usr/local/gcc-5.4/lib64/libstdc++.so.6.0.21 ${lib_path} diff --git a/tools/cinn/gen_c++_tutorial.py b/tools/cinn/gen_c++_tutorial.py index 97e6d16fef088..be391b44ef730 100644 --- a/tools/cinn/gen_c++_tutorial.py +++ b/tools/cinn/gen_c++_tutorial.py @@ -59,13 +59,13 @@ def code_block(self, lang: str, block: List[str]): break else: tail_valid_offset += 1 - logging.warning("block0: %s" % block) + logging.warning(f"block0: {block}") block = ( block[pre_valid_offset:-tail_valid_offset] if tail_valid_offset > 0 else block[pre_valid_offset:] ) - logging.warning("block1: %s" % block) + logging.warning(f"block1: {block}") if not block: return @@ -189,7 +189,7 @@ def eat_roc(self, header: str, content: ContentGenerator) -> None: code_block.append(line) line: str = content.get_line() - logging.warning("DOC content: %s" % code_block) + logging.warning(f"DOC content: {code_block}") self.doc.code_block(lang, code_block) diff --git a/tools/codestyle/clang-tidy.py b/tools/codestyle/clang-tidy.py index 404413b9b9945..7fe5029cd1823 100644 --- a/tools/codestyle/clang-tidy.py +++ b/tools/codestyle/clang-tidy.py @@ -166,9 +166,9 @@ def get_tidy_invocation( os.close(handle) start.append(name) for arg in extra_arg: - start.append('-extra-arg=%s' % arg) + start.append(f'-extra-arg={arg}') for arg in extra_arg_before: - start.append('-extra-arg-before=%s' % arg) + start.append(f'-extra-arg-before={arg}') start.append('-p=' + build_path) if quiet: start.append('-quiet') diff --git a/tools/continuous_integration/bisect.py b/tools/continuous_integration/bisect.py index c4b31bb6e8729..2feaf7be5ec6e 100644 --- a/tools/continuous_integration/bisect.py +++ b/tools/continuous_integration/bisect.py @@ -84,11 +84,11 @@ def print_arguments(): [f'git rev-list --first-parent {args.good_commit}...{args.bad_commit}'], shell=True, ) -sys.stdout.write('commits found:\n%s\n' % ret) +sys.stdout.write(f'commits found:\n{ret}\n') commits = ret.strip().split('\n') os.chdir(args.build_dir) # Clean up previous logs. -subprocess.check_output(['echo "" > %s' % args.log_file], shell=True) +subprocess.check_output([f'echo "" > {args.log_file}'], shell=True) last_culprit = '' while True: @@ -96,8 +96,7 @@ def print_arguments(): os.chdir(args.git_dir) subprocess.check_output( [ - 'git checkout %s && git clean -fd && git checkout .' - % args.bisect_branch + f'git checkout {args.bisect_branch} && git clean -fd && git checkout .' ], shell=True, ) @@ -109,7 +108,7 @@ def print_arguments(): pick_idx = len(commits) / 2 pick = commits[pick_idx] os.chdir(args.git_dir) - subprocess.check_output(['git checkout %s' % pick], shell=True) + subprocess.check_output([f'git checkout {pick}'], shell=True) # Clean builds and compile. # We assume mainline commits should always compile. @@ -120,7 +119,7 @@ def print_arguments(): 'rm -rf * && ' f'cmake -DWITH_TESTING=ON {args.git_dir} >> {args.log_file} && make -j{args.build_parallel} >> {args.log_file}' ) - sys.stdout.write('cmd: %s\n' % cmd) + sys.stdout.write(f'cmd: {cmd}\n') try: subprocess.check_output([cmd], shell=True) except subprocess.CalledProcessError as e: @@ -130,7 +129,7 @@ def print_arguments(): passed = True try: cmd = f'ctest --repeat-until-fail {args.test_times} -R {args.test_target} >> {args.log_file}' - sys.stdout.write('cmd: %s\n' % cmd) + sys.stdout.write(f'cmd: {cmd}\n') subprocess.check_output([cmd], shell=True) except subprocess.CalledProcessError as e: passed = False @@ -145,4 +144,4 @@ def print_arguments(): break commits = commits[pick_idx + 1 :] -sys.stdout.write('Culprit commit: %s\n' % last_culprit) +sys.stdout.write(f'Culprit commit: {last_culprit}\n') diff --git a/tools/dockerfile/Dockerfile.centos b/tools/dockerfile/Dockerfile.centos index c9ae968c920c5..be2a97b036191 100644 --- a/tools/dockerfile/Dockerfile.centos +++ b/tools/dockerfile/Dockerfile.centos @@ -16,8 +16,8 @@ ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig RUN yum install -y bzip2 gettext-devel sqlite-devel zlib-devel openssl-devel pcre-devel vim tk-devel tkinter libtool xz graphviz wget curl-devel patch COPY build_scripts /build_scripts RUN bash build_scripts/build.sh -#RUN bash build_scripts/install_nccl2.sh -RUN bash build_scripts/install_trt.sh +#RUN bash build_scripts/install_nccl2.sh +RUN bash build_scripts/install_trt.sh RUN rm -rf build_scripts RUN ln -s /usr/local/ssl/include/openssl /usr/include @@ -26,7 +26,7 @@ RUN wget -q https://paddle-ci.gz.bcebos.com/git-2.17.1.tar.gz && \ tar -xvf git-2.17.1.tar.gz && \ cd git-2.17.1 && \ ./configure --with-openssl --prefix=/usr/local && \ - make -j8 && make install + make -j8 && make install ENV SSL_CERT_FILE=/opt/_internal/certs.pem ENV GOROOT=/usr/local/go GOPATH=/root/gopath @@ -43,7 +43,7 @@ RUN wget --no-check-certificate -qO- https://paddle-ci.gz.bcebos.com/go1.15.12.l # protobuf 3.6.1 -RUN cd /opt && wget -q --no-check-certificate https://paddle-ci.cdn.bcebos.com/protobuf-cpp-3.6.1.tar.gz && \ +RUN cd /opt && wget -q --no-check-certificate https://paddle-ci.cdn.bcebos.com/protobuf-cpp-3.6.1.tar.gz && \ tar xzf protobuf-cpp-3.6.1.tar.gz && \ cd protobuf-3.6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1.tar.gz diff --git a/tools/dockerfile/Dockerfile.ipu b/tools/dockerfile/Dockerfile.ipu index 7b9e15bbf5ff1..ef55b8920559f 100644 --- a/tools/dockerfile/Dockerfile.ipu +++ b/tools/dockerfile/Dockerfile.ipu @@ -27,14 +27,14 @@ RUN apt-get update && apt-get install -y rdma-core librdmacm1 # Downgrade gcc&&g++ WORKDIR /usr/bin -COPY tools/dockerfile/build_scripts /build_scripts -RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts -RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ -RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc -RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ -RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc -RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ -ENV PATH=/usr/local/gcc-8.2/bin:$PATH +COPY tools/dockerfile/build_scripts /build_scripts +RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts +RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ +RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc +RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ +RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc +RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ +ENV PATH=/usr/local/gcc-8.2/bin:$PATH # install cmake WORKDIR /home diff --git a/tools/dockerfile/Dockerfile.release.ubuntu20 b/tools/dockerfile/Dockerfile.release.ubuntu20 index 7a14eb6534afa..397ca3cdfce96 100644 --- a/tools/dockerfile/Dockerfile.release.ubuntu20 +++ b/tools/dockerfile/Dockerfile.release.ubuntu20 @@ -27,26 +27,26 @@ RUN apt-get update --allow-unauthenticated && \ apt-get install -y software-properties-common && \ add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ - apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip \ + apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip \ coreutils ntp language-pack-zh-hans libsm6 libxext6 libxrender-dev libgl1-mesa-glx \ bison graphviz libjpeg-dev zlib1g-dev automake locales swig net-tools libtool kmod <install_cpu_package> # Downgrade gcc&&g++ -WORKDIR /usr/bin -COPY tools/dockerfile/build_scripts /build_scripts +WORKDIR /usr/bin +COPY tools/dockerfile/build_scripts /build_scripts RUN bash /build_scripts/install_trt.sh # Older versions of patchelf limited the size of the files being processed and were fixed in this pr. # # https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa # # So install a newer version here. RUN bash /build_scripts/install_patchelf.sh RUN bash /build_scripts/install_gcc.sh gcc121 -RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ -RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/local/bin/gcc -RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/local/bin/g++ -RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/bin/gcc -RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/bin/g++ -ENV PATH=/usr/local/gcc-12.1/bin:$PATH +RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ +RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/local/bin/gcc +RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/local/bin/g++ +RUN ln -s /usr/local/gcc-12.1/bin/gcc /usr/bin/gcc +RUN ln -s /usr/local/gcc-12.1/bin/g++ /usr/bin/g++ +ENV PATH=/usr/local/gcc-12.1/bin:$PATH RUN bash /build_scripts/install_cudnn.sh cudnn841 ENV CUDNN_VERSION=8.4.1 @@ -79,8 +79,8 @@ RUN rm setuptools-68.2.2.tar.gz pip-23.3.1.tar.gz && \ rm -r setuptools-68.2.2 pip-23.3.1 # remove them when apt-get support 2.27 and higher version -RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ - tar -xzf binutils-2.33.1.tar.gz && \ +RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ + tar -xzf binutils-2.33.1.tar.gz && \ cd binutils-2.33.1 && \ ./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz diff --git a/tools/dockerfile/build_scripts/build.sh b/tools/dockerfile/build_scripts/build.sh index cb17a76a1dd05..402111b38e163 100644 --- a/tools/dockerfile/build_scripts/build.sh +++ b/tools/dockerfile/build_scripts/build.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/dockerfile/build_scripts/build_utils.sh b/tools/dockerfile/build_scripts/build_utils.sh index 10088cd2c5b02..2d5d35754551c 100755 --- a/tools/dockerfile/build_scripts/build_utils.sh +++ b/tools/dockerfile/build_scripts/build_utils.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -144,7 +144,7 @@ function do_openssl_build { ./config -fPIC --prefix=/usr/local/ssl > /dev/null make > /dev/null make install > /dev/null - + } diff --git a/tools/dockerfile/build_scripts/install_cudnn.sh b/tools/dockerfile/build_scripts/install_cudnn.sh index 78f03766c6fcf..402122dc205de 100644 --- a/tools/dockerfile/build_scripts/install_cudnn.sh +++ b/tools/dockerfile/build_scripts/install_cudnn.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/dockerfile/build_scripts/install_gcc.sh b/tools/dockerfile/build_scripts/install_gcc.sh index 4451e2783bb6b..5adbdd0faa2ac 100644 --- a/tools/dockerfile/build_scripts/install_gcc.sh +++ b/tools/dockerfile/build_scripts/install_gcc.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,7 +30,7 @@ else fi if [ "$1" == "gcc82" ]; then - wget -q --no-proxy https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz + wget -q --no-proxy https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz tar -xf gcc-8.2.0.tar.xz && \ cd gcc-8.2.0 && \ wget -q --no-proxy https://paddle-ci.gz.bcebos.com/sanitizer_platform_limits_posix.cc.patch @@ -44,7 +44,7 @@ if [ "$1" == "gcc82" ]; then make -j8 && make install cd .. && rm -rf temp_gcc82 gcc-8.2.0 gcc-8.2.0.tar.xz if [ -f "/etc/redhat-release" ];then - cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && + cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} && ln -s /usr/local/gcc-8.2/lib64/libgfortran.so.5 ${lib_so_5} && \ ln -s /usr/local/gcc-8.2/lib64/libstdc++.so.6 ${lib_so_6} && \ cp /usr/local/gcc-8.2/lib64/libstdc++.so.6.0.25 ${lib_path} diff --git a/tools/dockerfile/build_scripts/install_nccl2.sh b/tools/dockerfile/build_scripts/install_nccl2.sh index 2680910834023..b279e0e6f094d 100644 --- a/tools/dockerfile/build_scripts/install_nccl2.sh +++ b/tools/dockerfile/build_scripts/install_nccl2.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/dockerfile/build_scripts/install_patchelf.sh b/tools/dockerfile/build_scripts/install_patchelf.sh index ef6b05ec02468..bdcebff0f3690 100644 --- a/tools/dockerfile/build_scripts/install_patchelf.sh +++ b/tools/dockerfile/build_scripts/install_patchelf.sh @@ -1,11 +1,11 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,4 +34,4 @@ make make install cd .. -rm -rf "$TMP_DIR" +rm -rf "$TMP_DIR" diff --git a/tools/dockerfile/build_scripts/install_trt.sh b/tools/dockerfile/build_scripts/install_trt.sh index 0fdadc8be8d70..6a35d1bfdce38 100644 --- a/tools/dockerfile/build_scripts/install_trt.sh +++ b/tools/dockerfile/build_scripts/install_trt.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,7 +30,7 @@ if [[ "$1" == "trt8034" && "$VERSION" == "11.2" ]];then wget -q --no-proxy https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz --no-check-certificate tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-8.0.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.0.3.4/lib/* /usr/lib/ - rm TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz + rm TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz elif [[ "$1" == "trt8424" ]];then wget https://paddle-qa.bj.bcebos.com/nvidia/trt/TensorRT-8.4.2.4.tgz --no-check-certificate tar -zxf TensorRT-8.4.2.4.tgz -C /usr/local diff --git a/tools/dockerfile/centos7_manylinux.sh b/tools/dockerfile/centos7_manylinux.sh index 09793d8843226..38c443bde9cb8 100755 --- a/tools/dockerfile/centos7_manylinux.sh +++ b/tools/dockerfile/centos7_manylinux.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -61,11 +61,11 @@ function make_cuda123cudnn900trt8616() { } function main() { - local CMD=$1 + local CMD=$1 case $CMD in cuda112cudnn821trt8034) make_cuda112cudnn821trt8034 - ;; + ;; cuda116cudnn840trt8406) make_cuda116cudnn840trt8406 ;; diff --git a/tools/dockerfile/ci_dockerfile.sh b/tools/dockerfile/ci_dockerfile.sh index f1a1db3773b91..cdae7c0c2fe66 100644 --- a/tools/dockerfile/ci_dockerfile.sh +++ b/tools/dockerfile/ci_dockerfile.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -55,7 +55,7 @@ function make_ce_framework_dockcerfile(){ sed -i 's#<install_cpu_package>##g' ${dockerfile_name} sed -i "7i RUN chmod 777 /tmp" ${dockerfile_name} sed -i "${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz \&\& \ - tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name} + tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name} sed -i "${dockerfile_line}i RUN apt-get update && apt install -y zstd pigz libcurl4-openssl-dev gettext ninja-build" ${dockerfile_name} sed -i "${dockerfile_line}i RUN pip3.10 install wheel distro" ${dockerfile_name} sed -i "${dockerfile_line}i RUN pip3.10 install nvidia-cuda-cupti-cu11==11.8.87 nvidia-cuda-runtime-cu11==11.8.89 nvidia-cudnn-cu11==8.7.0.84 nvidia-cublas-cu11==11.11.3.6 nvidia-cufft-cu11==10.9.0.58 nvidia-curand-cu11==10.3.0.86 nvidia-cusolver-cu11==11.4.1.48 nvidia-cusparse-cu11==11.7.5.86 nvidia-nccl-cu11==2.19.3" ${dockerfile_name} diff --git a/tools/dockerfile/ubuntu20_dev.sh b/tools/dockerfile/ubuntu20_dev.sh index 27fe1694287df..ec9d9d9f97e3f 100755 --- a/tools/dockerfile/ubuntu20_dev.sh +++ b/tools/dockerfile/ubuntu20_dev.sh @@ -5,9 +5,9 @@ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/dockerfile/ubuntu20_release.sh b/tools/dockerfile/ubuntu20_release.sh index 8fa08f5326025..e870649e3a695 100755 --- a/tools/dockerfile/ubuntu20_release.sh +++ b/tools/dockerfile/ubuntu20_release.sh @@ -5,9 +5,9 @@ # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/document_preview.sh b/tools/document_preview.sh index 47c5207074046..97c01ee96d03b 100755 --- a/tools/document_preview.sh +++ b/tools/document_preview.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -45,7 +45,7 @@ function get_docs_pr_num_from_paddle_pr_info(){ } # Attention: -# 1. /FluidDoc will be used as the workspace of PaddlePaddle/docs. +# 1. /FluidDoc will be used as the workspace of PaddlePaddle/docs. # 2. And /docs is used as the output of doc-build process. # 3. If conflicted with yours, please modify the definition of FLUIDDOCDIR and # OUTPUTDIR in the subsequent codes. diff --git a/tools/enforce/count_enforce_by_dir.sh b/tools/enforce/count_enforce_by_dir.sh index 77ffe9c158c7d..ba419f77f2bc1 100644 --- a/tools/enforce/count_enforce_by_dir.sh +++ b/tools/enforce/count_enforce_by_dir.sh @@ -15,10 +15,10 @@ # limitations under the License. # This script is used to count detail PADDLE checks in the paddle/fluid directory, -# contains the number of PADDLE checks under each folder, the statistical data +# contains the number of PADDLE checks under each folder, the statistical data # does not include subdirectories, only covers all files under the current directory. -# -# The three columns of data are: total number, valid number, invalid number. +# +# The three columns of data are: total number, valid number, invalid number. # The output format is easy to display as a markdown table. # Usage: bash count_enforce_by_dir.sh (run in tools directory) @@ -70,8 +70,8 @@ function count_dir_independently(){ enforce_count $1"/"$file dir_total_check_cnt dir_valid_check_cnt sub_dir_total_check_cnt=$(($sub_dir_total_check_cnt+$dir_total_check_cnt)) sub_dir_valid_check_cnt=$(($sub_dir_valid_check_cnt+$dir_valid_check_cnt)) - - count_dir_independently $1"/"$file $dir_total_check_cnt $dir_valid_check_cnt + + count_dir_independently $1"/"$file $dir_total_check_cnt $dir_valid_check_cnt fi done total_check_cnt=$(($2-$sub_dir_total_check_cnt)) diff --git a/tools/enforce/count_enforce_by_file.sh b/tools/enforce/count_enforce_by_file.sh index c79d486c62838..b06514a4e03bb 100644 --- a/tools/enforce/count_enforce_by_file.sh +++ b/tools/enforce/count_enforce_by_file.sh @@ -16,8 +16,8 @@ # This script is used to count PADDLE checks by files in the paddle/fluid/operators directory, # contains the number of PADDLE checks under each file. -# -# The three columns of data are: total number, valid number, invalid number. +# +# The three columns of data are: total number, valid number, invalid number. # The output format is easy to display as a markdown table. # Usage: bash count_enforce_by_file.sh [target directory or file] (run in tools directory) diff --git a/tools/externalError/start.sh b/tools/externalError/start.sh index d60a26d157cce..057a67ef46a41 100644 --- a/tools/externalError/start.sh +++ b/tools/externalError/start.sh @@ -1,13 +1,13 @@ #!/usr/bin/env bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/final_ut_parallel_rule.py b/tools/final_ut_parallel_rule.py index e0fc86c19a8cc..2dbfdd39c1a2c 100644 --- a/tools/final_ut_parallel_rule.py +++ b/tools/final_ut_parallel_rule.py @@ -19,7 +19,7 @@ def classify_cases_by_mem(rootPath): """classify cases by mem""" - case_filename = '%s/build/classify_case_by_cardNum.txt' % rootPath + case_filename = f'{rootPath}/build/classify_case_by_cardNum.txt' case_exec_100 = [ 'test_conv_eltwiseadd_bn_fuse_pass', 'test_trt_convert_pool2d', @@ -124,14 +124,14 @@ def classify_cases_by_mem(rootPath): else: case_mem_1[case] = new_lastest_mem[case]["mem_nvidia"] - with open('/pre_test/%s_mem0' % cardType, 'w') as f: + with open(f'/pre_test/{cardType}_mem0', 'w') as f: f.write(case_mem_0) f.close() case_mem_1_sort = sorted(case_mem_1.items(), key=lambda x: x[1]) case_mem_1_line = '^job$' mem_1_sum = 0 - with open('/pre_test/%s' % cardType, 'w') as f_not_0: + with open(f'/pre_test/{cardType}', 'w') as f_not_0: for index in case_mem_1_sort: if mem_1_sum < 14 * 1024 * 2: mem_1_sum += index[1] @@ -150,7 +150,7 @@ def classify_cases_by_mem(rootPath): f_not_0.write(case_mem_1_line + '\n') f_not_0.close() - os.system('cp %s/build/nightly_case /pre_test/' % rootPath) + os.system(f'cp {rootPath}/build/nightly_case /pre_test/') if __name__ == '__main__': diff --git a/tools/gen_alias_mapping.sh b/tools/gen_alias_mapping.sh index 3ab1e68b37557..c57f3f6bba2b1 100755 --- a/tools/gen_alias_mapping.sh +++ b/tools/gen_alias_mapping.sh @@ -17,16 +17,16 @@ # Brief: # This code is used for generating the mapping list of Paddle API alias. # Only the APIs set with the `DEFINE_ALIAS` flag is enable. -# +# # Arguments: # None -# +# # Usage: -# Go into the `Paddle` folder and just run `./tools/gen_alias_mapping.sh` +# Go into the `Paddle` folder and just run `./tools/gen_alias_mapping.sh` # # Returns: # succ: 0 -# +# # Will also print the mapping list to stdout. The format of each line is as below: # <real API implement>\t<API recommend>,<API other alias name1>,<API other alias name2>,... @@ -38,7 +38,7 @@ find ${PADDLE_ROOT}/python/ -name '*.py' \ | grep 'DEFINE_ALIAS' \ | perl -ne ' if (/\/python\/(.*):from (\.*)(\w.*) import (.*?)\s+#DEFINE_ALIAS\s+$/) { - my @arr = split(", ", $4); + my @arr = split(", ", $4); foreach $i (@arr) { printf "%s|%s|%s|%d\n", $3, $i, substr($1, 0, -3), length($2); } @@ -66,7 +66,7 @@ find ${PADDLE_ROOT}/python/ -name '*.py' \ } key = key""new; n2o[key] = val; - } + } END { for (new in n2o) { old = n2o[new] in n2o ? n2o[n2o[new]] : n2o[new]; @@ -78,7 +78,7 @@ find ${PADDLE_ROOT}/python/ -name '*.py' \ { o2n[$1] = o2n[$1] ? o2n[$1]","$3 : $3; } - END { + END { for (i in o2n) { print i"\t"o2n[i]; } diff --git a/tools/gen_tensor_stub.py b/tools/gen_tensor_stub.py index 00c7fb0c2e50c..422b3004f5266 100644 --- a/tools/gen_tensor_stub.py +++ b/tools/gen_tensor_stub.py @@ -15,17 +15,18 @@ from __future__ import annotations import argparse +import importlib import inspect import logging import re +import sys +import types from dataclasses import dataclass from functools import cached_property, lru_cache from typing import Any, Callable, Literal from typing_extensions import TypeAlias -import paddle - logging.basicConfig(style="{", format="{message}", level=logging.INFO) logger = logging.getLogger("Generating stub file for paddle.Tensor") logger.setLevel(logging.INFO) @@ -102,7 +103,6 @@ def find_apis(self, api_name: str) -> list[dict[str, tuple[str, int, int]]]: api = [] for mo in pattern.finditer(self._template): _indent = mo.group('indent') - _def_api = mo.group('def_api') _signature = mo.group('signature') _docstring = mo.group('docstring') _ellipsis = mo.group('ellipsis') @@ -110,26 +110,15 @@ def find_apis(self, api_name: str) -> list[dict[str, tuple[str, int, int]]]: _comment = '' if _comment is None else _comment _start_index, _end_index = mo.span() - - _start_indent = _start_index - _end_indent = _start_indent + len(_indent) - - _start_def_api = _end_indent - _end_def_api = _start_def_api + len(_def_api) - - _start_signature = _end_def_api - _end_signature = _start_signature + len(_signature) - - _start_docstring = _end_signature - _end_docstring = _start_docstring + len(_docstring) - - _start_ellipsis = _end_docstring - _end_ellipsis = _start_ellipsis + len(_ellipsis) - + _start_indent, _end_indent = mo.span('indent') + _start_signature, _end_signature = mo.span('signature') + _start_docstring, _end_docstring = mo.span('docstring') + _start_ellipsis, _end_ellipsis = mo.span('ellipsis') _start_comment = _end_ellipsis _end_comment = _start_comment + len(_comment) - assert _end_index == _end_comment + assert _start_index == _start_indent + assert _end_comment == _end_index _api = { 'indent': (_indent, _start_indent, _end_indent), @@ -216,7 +205,10 @@ def add_doc(self, doc: str): self.insert_template(docstring, _end_index, _end_index) def codegen(self) -> str: - return self._template + header = ( + '# This file is auto generated by `tools/gen_tensor_stub.py`.\n\n' + ) + return header + self._template def is_inherited_member(name: str, cls: type) -> bool: @@ -336,7 +328,27 @@ def func_doc_to_method_doc(func_doc: str) -> str: return method_doc +def try_import_paddle() -> types.ModuleType | None: + try: + return importlib.import_module('paddle') + except ModuleNotFoundError: + sys.stderr.write( + '''ERROR: Can NOT import paddle. + We could import paddle without installation, with all libs (.dll or .so) copied into dir `paddle/libs`, + or path already been set for the system. + ''' + ) + + def get_tensor_members(): + paddle = try_import_paddle() + if not paddle: + raise ( + ModuleNotFoundError( + 'Can NOT import paddle from tools/gen_tensor_stub.py.' + ) + ) + tensor_class = paddle.Tensor members: dict[int, Member] = {} @@ -433,7 +445,7 @@ def get_tensor_template(path: str) -> str: return ''.join(f.readlines()) -def main(): +def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( @@ -442,7 +454,6 @@ def main(): type=str, default="python/paddle/tensor/tensor.prototype.pyi", ) - parser.add_argument( "-o", "--output-file", @@ -452,12 +463,16 @@ def main(): args = parser.parse_args() + return args + + +def generate_stub_file(input_file=None, output_file=None): # Get members of Tensor tensor_members = get_tensor_members() logging.debug(f'total members in Tensor: {len(tensor_members)}') # Get tensor template - tensor_template = get_tensor_template(args.input_file) + tensor_template = get_tensor_template(input_file) # Generate the Tensor stub tensor_gen = TensorGen(tensor_template) @@ -473,9 +488,14 @@ def main(): tensor_gen.add_doc(member.doc) # Write to target file - with open(args.output_file, "w", encoding="utf-8") as f: + with open(output_file, "w", encoding="utf-8") as f: f.write(tensor_gen.codegen()) +def main(): + args = parse_args() + generate_stub_file(args.input_file, args.output_file) + + if __name__ == "__main__": main() diff --git a/tools/get_build_time.sh b/tools/get_build_time.sh index 496c8c12d6ca3..85100bb50c761 100755 --- a/tools/get_build_time.sh +++ b/tools/get_build_time.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/get_cpu_info.sh b/tools/get_cpu_info.sh index bce338a8619e6..b7ec2e77a3a84 100755 --- a/tools/get_cpu_info.sh +++ b/tools/get_cpu_info.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -54,7 +54,7 @@ echo "OS Version : `uname -o`" echo "Kernel Release Version : `uname -r`" echo "Kernel Patch Version : `uname -v`" echo "GCC Version :`gcc --version | head -n 1|awk -F '\\\(GCC\\\)' '{print $2}'`" -if command -v cmake >/dev/null 2>&1; then +if command -v cmake >/dev/null 2>&1; then cmake_ver=`cmake --version | head -n 1 | awk -F 'version' '{print $2}'` else cmake_ver=" Not installed" diff --git a/tools/get_op_list.sh b/tools/get_op_list.sh index 2e4cad13582df..2b5d7f419b1d2 100644 --- a/tools/get_op_list.sh +++ b/tools/get_op_list.sh @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/get_ut_file_map.py b/tools/get_ut_file_map.py index bf469eab98747..42b1c251f19a1 100644 --- a/tools/get_ut_file_map.py +++ b/tools/get_ut_file_map.py @@ -19,8 +19,8 @@ def get_all_paddle_file(rootPath): """get all file in Paddle repo: paddle/fluild, python""" - traverse_files = ['%s' % rootPath] - all_file_paddle = '%s/build/all_file_paddle' % rootPath + traverse_files = [f'{rootPath}'] + all_file_paddle = f'{rootPath}/build/all_file_paddle' all_file_paddle_list = [] with open(all_file_paddle, 'w') as f: for filename in traverse_files: @@ -32,7 +32,7 @@ def get_all_paddle_file(rootPath): def get_all_uts(rootPath): - all_uts_paddle = '%s/build/all_uts_paddle' % rootPath + all_uts_paddle = f'{rootPath}/build/all_uts_paddle' os.system( fr'cd {rootPath}/build && ctest -N -V | grep -Ei "Test[ \t]+#" | grep -oEi "\w+$" > {all_uts_paddle}' ) @@ -42,28 +42,28 @@ def remove_useless_file(rootPath): """remove useless file in ut_file_map.json""" all_file_paddle_list = get_all_paddle_file(rootPath) ut_file_map_new = {} - ut_file_map = "%s/build/ut_file_map.json" % rootPath + ut_file_map = f"{rootPath}/build/ut_file_map.json" with open(ut_file_map, 'r') as load_f: load_dict = json.load(load_f) for key in load_dict: if key in all_file_paddle_list: ut_file_map_new[key] = load_dict[key] - with open("%s/build/ut_file_map.json" % rootPath, "w") as f: + with open(f"{rootPath}/build/ut_file_map.json", "w") as f: json.dump(ut_file_map_new, f, indent=4) print("remove_useless_file ut_file_map success!!") def handle_ut_file_map(rootPath): utNotSuccess_list = [] - ut_map_path = "%s/build/ut_map" % rootPath + ut_map_path = f"{rootPath}/build/ut_map" files = os.listdir(ut_map_path) ut_file_map = {} count = 0 - not_success_file = open("%s/build/prec_delta" % rootPath, 'w') + not_success_file = open(f"{rootPath}/build/prec_delta", 'w') # if testdir is not made,write the test into prec_delta get_all_uts(rootPath) - all_ut = '%s/build/all_uts_paddle' % rootPath + all_ut = f'{rootPath}/build/all_uts_paddle' with open(all_ut, 'r') as f: all_ut_list = [] for ut in f.readlines(): @@ -73,7 +73,7 @@ def handle_ut_file_map(rootPath): for ut in all_ut_list: filedir = f'{rootPath}/build/ut_map/{ut}' if not os.path.exists(filedir): - not_success_file.write('%s\n' % ut) + not_success_file.write(f'{ut}\n') utNotSuccess_list.append(ut) # if fnda.tmp not exists,write the test into prec_delta for ut in files: @@ -108,7 +108,7 @@ def handle_ut_file_map(rootPath): ut_file_map[source_file].append(ut) f.close() else: - not_success_file.write('%s\n' % ut) + not_success_file.write(f'{ut}\n') utNotSuccess_list.append(ut) not_success_file.close() @@ -135,13 +135,13 @@ def handle_ut_file_map(rootPath): if source_file not in ut_file_map: ut_file_map[source_file] = [] f.close() - with open("%s/build/ut_file_map.json" % rootPath, "w") as f: + with open(f"{rootPath}/build/ut_file_map.json", "w") as f: json.dump(ut_file_map, f, indent=4) def notsuccessfuc(rootPath): utNotSuccess = '' - ut_map_path = "%s/build/ut_map" % rootPath + ut_map_path = f"{rootPath}/build/ut_map" files = os.listdir(ut_map_path) count = 0 @@ -154,7 +154,7 @@ def notsuccessfuc(rootPath): pass else: count = count + 1 - utNotSuccess = utNotSuccess + '^%s$|' % ut + utNotSuccess = utNotSuccess + f'^{ut}$|' # ut not exec @@ -166,7 +166,7 @@ def notsuccessfuc(rootPath): if ut not in files: print(ut) count = count + 1 - utNotSuccess = utNotSuccess + '^%s$|' % ut + utNotSuccess = utNotSuccess + f'^{ut}$|' if utNotSuccess != '': print("utNotSuccess count: %s" % count) @@ -176,18 +176,17 @@ def notsuccessfuc(rootPath): def ut_file_map_supplement(rootPath): - ut_file_map_new = "%s/build/ut_file_map.json" % rootPath + ut_file_map_new = f"{rootPath}/build/ut_file_map.json" precision_test_map_store_dir = "/precision_test_map_store" - os.system('mkdir %s' % precision_test_map_store_dir) + os.system(f'mkdir {precision_test_map_store_dir}') os.system( - 'cd %s && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/tmp_test/ut_file_map.json --no-check-certificate' - % precision_test_map_store_dir + f'cd {precision_test_map_store_dir} && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/tmp_test/ut_file_map.json --no-check-certificate' ) - ut_file_map_old = "%s/ut_file_map.json" % precision_test_map_store_dir + ut_file_map_old = f"{precision_test_map_store_dir}/ut_file_map.json" with open(ut_file_map_new, 'r') as load_f: load_dict_new = json.load(load_f) - all_uts_paddle = '%s/build/all_uts_paddle' % rootPath + all_uts_paddle = f'{rootPath}/build/all_uts_paddle' with open(all_uts_paddle, 'r') as f: all_uts_paddle_list = [] @@ -195,15 +194,14 @@ def ut_file_map_supplement(rootPath): all_uts_paddle_list.append(ut.strip()) f.close() - with open("%s/ut_file_map.json" % precision_test_map_store_dir, "w") as f: + with open(f"{precision_test_map_store_dir}/ut_file_map.json", "w") as f: json.dump(load_dict_new, f, indent=4) print("load_dict_new success!!") os.system( - 'cd %s && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/tmp_test/prec_delta --no-check-certificate' - % precision_test_map_store_dir + f'cd {precision_test_map_store_dir} && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/tmp_test/prec_delta --no-check-certificate' ) - prec_delta_new = "%s/build/prec_delta" % rootPath + prec_delta_new = f"{rootPath}/build/prec_delta" with open(prec_delta_new, 'r') as f: prec_delta_new_list = [] for ut in f.readlines(): @@ -212,7 +210,7 @@ def ut_file_map_supplement(rootPath): prec_delta_new_list.append( 'test_py_reader_error_msg' ) # add a python case for pycoverage - prec_delta_file = open("%s/prec_delta" % precision_test_map_store_dir, 'w') + prec_delta_file = open(f"{precision_test_map_store_dir}/prec_delta", 'w') for ut in prec_delta_new_list: prec_delta_file.write(ut + '\n') print("prec_delta_file success!!") @@ -220,7 +218,7 @@ def ut_file_map_supplement(rootPath): def utmap_analysis(rootPath): - ut_file_map_new = "%s/build/ut_file_map.json" % rootPath + ut_file_map_new = f"{rootPath}/build/ut_file_map.json" with open(ut_file_map_new, 'r') as load_f: load_dict_new = json.load(load_f) print(len(load_dict_new)) diff --git a/tools/gpups_test.sh b/tools/gpups_test.sh index 07122405a21d7..e0669fb85e658 100644 --- a/tools/gpups_test.sh +++ b/tools/gpups_test.sh @@ -1,11 +1,11 @@ # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -40,7 +40,7 @@ function get_quickly_disable_ut() { fi } -# disable test: +# disable test: # test_dygraph_dataparallel_bf16 # test_dygraph_sharding_stage2_bf16 # test_dygraph_sharding_stage3_bf16 diff --git a/tools/group_case_for_parallel.py b/tools/group_case_for_parallel.py index 66187ca4b0607..9af5e084bada2 100644 --- a/tools/group_case_for_parallel.py +++ b/tools/group_case_for_parallel.py @@ -40,29 +40,29 @@ def group_case_for_parallel(rootPath): ) # get nightly tests - nightly_tests_file = open('%s/tools/nightly_case' % rootPath, 'r') + nightly_tests_file = open(f'{rootPath}/tools/nightly_case', 'r') nightly_tests = nightly_tests_file.read().strip().split('\n') nightly_tests_file.close() parallel_case_file_list = [ - '%s/tools/single_card_tests_mem0' % rootPath, - '%s/tools/single_card_tests' % rootPath, - '%s/tools/multiple_card_tests_mem0' % rootPath, - '%s/tools/multiple_card_tests' % rootPath, - '%s/tools/exclusive_card_tests_mem0' % rootPath, - '%s/tools/exclusive_card_tests' % rootPath, + f'{rootPath}/tools/single_card_tests_mem0', + f'{rootPath}/tools/single_card_tests', + f'{rootPath}/tools/multiple_card_tests_mem0', + f'{rootPath}/tools/multiple_card_tests', + f'{rootPath}/tools/exclusive_card_tests_mem0', + f'{rootPath}/tools/exclusive_card_tests', ] - case_file = '%s/build/ut_list' % rootPath + case_file = f'{rootPath}/build/ut_list' if os.path.exists(case_file): f = open(case_file, 'r') all_need_run_cases = f.read().strip().split('\n') if len(all_need_run_cases) == 1 and all_need_run_cases[0] == '': f.close() - case_file = '%s/build/all_ut_list' % rootPath + case_file = f'{rootPath}/build/all_ut_list' f = open(case_file, 'r') all_need_run_cases = f.read().strip().split('\n') else: - case_file = '%s/build/all_ut_list' % rootPath + case_file = f'{rootPath}/build/all_ut_list' f = open(case_file, 'r') all_need_run_cases = f.read().strip().split('\n') @@ -71,7 +71,7 @@ def group_case_for_parallel(rootPath): all_group_case = [] for filename in parallel_case_file_list: fi = open(filename, 'r') - new_f = open('%s_new' % filename, 'w') + new_f = open(f'{filename}_new', 'w') lines = fi.readlines() new_case_file_list = [] for line in lines: @@ -88,7 +88,7 @@ def group_case_for_parallel(rootPath): for line in new_case_file_list: cases = '$|^'.join(case for case in line) - cases = '^job$|^%s$' % cases + cases = f'^job$|^{cases}$' new_f.write(cases + '\n') fi.close() new_f.close() @@ -98,10 +98,10 @@ def group_case_for_parallel(rootPath): if len(all_need_run_cases) != 0: for case in all_need_run_cases: if case not in nightly_tests: - cases = cases + '$|^%s' % case - cases = '%s$' % cases + cases = cases + f'$|^{case}' + cases = f'{cases}$' - new_f = open('%s/tools/no_parallel_case_file' % rootPath, 'w') + new_f = open(f'{rootPath}/tools/no_parallel_case_file', 'w') new_f.write(cases + '\n') new_f.close() f.close() diff --git a/tools/handle_h_cu_file.py b/tools/handle_h_cu_file.py index 86458045d3de8..656e47fdba896 100644 --- a/tools/handle_h_cu_file.py +++ b/tools/handle_h_cu_file.py @@ -43,7 +43,7 @@ def threadPool(threadPoolNum): def get_h_file_md5(rootPath): - h_cu_files = '%s/tools/h_cu_files.log' % rootPath + h_cu_files = f'{rootPath}/tools/h_cu_files.log' f = open(h_cu_files) lines = f.readlines() for line in lines: @@ -52,7 +52,7 @@ def get_h_file_md5(rootPath): def insert_pile_to_h_file(rootPath): - h_cu_files = '%s/tools/h_cu_files.log' % rootPath + h_cu_files = f'{rootPath}/tools/h_cu_files.log' f = open(h_cu_files) lines = f.readlines() for line in lines: @@ -60,7 +60,7 @@ def insert_pile_to_h_file(rootPath): func = line.replace('/', '_').replace('.', '_') os.system(f'echo "\n#ifndef _PRECISE{func.upper()}_\n" >> {line}') os.system(f'echo "#define _PRECISE{func.upper()}_" >> {line}') - os.system('echo "\n#include <cstdio>\n" >> %s' % line) + os.system(f'echo "\n#include <cstdio>\n" >> {line}') os.system( f'echo "__attribute__((constructor)) static void calledFirst{func}()\n{{" >> {line}' ) @@ -68,43 +68,40 @@ def insert_pile_to_h_file(rootPath): 'echo \' fprintf(stderr,"precise test map fileeee: %%s\\\\n", __FILE__);\n}\' >> %s' % line ) - os.system('echo "\n#endif" >> %s' % line) + os.system(f'echo "\n#endif" >> {line}') def add_simple_cxx_test(rootPath): - variant_test_path = '%s/paddle/utils/variant_test.cc' % rootPath - variant_test_cmakeflie_path = '%s/paddle/utils/CMakeLists.txt' % rootPath + variant_test_path = f'{rootPath}/paddle/utils/variant_test.cc' + variant_test_cmakeflie_path = f'{rootPath}/paddle/utils/CMakeLists.txt' if os.path.exists(variant_test_path) and os.path.exists( variant_test_cmakeflie_path ): - simple_test_path = '%s/paddle/utils/simple_precision_test.cc' % rootPath - os.system('touch %s' % simple_test_path) + simple_test_path = f'{rootPath}/paddle/utils/simple_precision_test.cc' + os.system(f'touch {simple_test_path}') + os.system(f"echo '#include \"gtest/gtest.h\"\n' >> {simple_test_path}") os.system( - "echo '#include \"gtest/gtest.h\"\n' >> %s" % simple_test_path - ) - os.system( - 'echo "TEST(interface_test, type) { }\n" >> %s' % simple_test_path + f'echo "TEST(interface_test, type) {{ }}\n" >> {simple_test_path}' ) os.system('echo "cc_test(" >> %s' % variant_test_cmakeflie_path) os.system( - 'echo " simple_precision_test" >> %s' % variant_test_cmakeflie_path + f'echo " simple_precision_test" >> {variant_test_cmakeflie_path}' ) os.system( - 'echo " SRCS simple_precision_test.cc" >> %s' - % variant_test_cmakeflie_path + f'echo " SRCS simple_precision_test.cc" >> {variant_test_cmakeflie_path}' ) - os.system('echo " DEPS gtest)\n" >> %s' % variant_test_cmakeflie_path) + os.system(f'echo " DEPS gtest)\n" >> {variant_test_cmakeflie_path}') def remove_pile_from_h_file(rootPath): - h_cu_files = '%s/tools/h_cu_files.log' % rootPath + h_cu_files = f'{rootPath}/tools/h_cu_files.log' f = open(h_cu_files) lines = f.readlines() count = 12 for line in lines: line = line.strip() while count > 0: - os.system("sed -i '$d' %s" % line) + os.system(f"sed -i '$d' {line}") count = count - 1 count = 12 diff --git a/tools/nvcc_lazy.sh b/tools/nvcc_lazy.sh index 31e1a44540133..bb851c11df6db 100755 --- a/tools/nvcc_lazy.sh +++ b/tools/nvcc_lazy.sh @@ -17,7 +17,7 @@ echo "#!/usr/bin/env bash" >> $1 echo "unset GREP_OPTIONS" >> $1 echo "set -e" >> $1 -echo -e >> $1 +echo -e >> $1 echo "# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved." >> $1 echo "#" >> $1 echo "# Licensed under the Apache License, Version 2.0 (the \"License\");" >> $1 @@ -25,7 +25,7 @@ echo "# you may not use this file except in compliance with the License." >> $1 echo "# You may obtain a copy of the License at" >> $1 echo "#" >> $1 echo "# http://www.apache.org/licenses/LICENSE-2.0" >> $1 -echo "#" >> $1 +echo "#" >> $1 echo "# Unless required by applicable law or agreed to in writing, software" >> $1 echo "# distributed under the License is distributed on an \"AS IS\" BASIS," >> $1 echo "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." >> $1 diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 646a40b30dc0c..23fae5e53097d 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -66,7 +66,7 @@ 'test_paddle_inference_api', 'test_reference_count_pass_last_lived_ops', 'test_op_support_gpu', - 'test_conditional_block', + 'test_conditional_block_deprecated', 'test_fleet_rolemaker_init', 'test_pybind_interface', 'test_io_save_load', @@ -228,7 +228,7 @@ 'test_analyzer_capi_exp_pd_threads', 'test_selected_rows', 'test_fleet_sharding_meta_optimizer', - 'test_inference_api', + 'test_inference_api_deprecated', 'test_data_generator', 'test_deprecated_memory_optimize_interfaces', 'test_ir_skip_layernorm_pass', @@ -445,7 +445,7 @@ 'op_version_registry_test', 'test_cudnn_placement_pass', 'cipher_utils_test', - 'test_program_code', + 'test_program_code_deprecated', 'test_save_model_without_var', 'program_utils_test', 'test_fleet_distributed_strategy', @@ -503,8 +503,8 @@ 'test_dist_fleet_heter_program', 'test_dist_fleet_ctr', 'test_collective_allreduce_api', - 'test_dataloader_unkeep_order', - 'test_dataloader_keep_order', + 'test_dataloader_unkeep_order_deprecated', + 'test_dataloader_keep_order_deprecated', 'test_dist_se_resnext_sync', 'test_dist_fleet_ps6', 'test_dist_fleet_a_sync_optimizer_auto_async', @@ -553,7 +553,7 @@ 'test_sync_batch_norm_op', 'test_dist_mnist_batch_merge', 'test_fleet_launch_ps', - 'test_dist_sparse_tensor_load_sgd', + 'test_dist_sparse_tensor_load_sgd_deprecated', 'test_dist_fleet_a_sync_optimizer_auto_geo', 'test_dist_lookup_sparse_table_fuse_ops', 'test_dist_fleet_a_sync_optimizer_geo_deprecated', @@ -728,7 +728,7 @@ 'test_cyclic_cifar_dataset', 'test_dyn_rnn', 'test_multiclass_nms_op', - 'test_communicator_geo', + 'test_communicator_geo_deprecated', 'test_quant_int8_mobilenetv2_mkldnn', 'test_analyzer_seq_pool1', 'test_analyzer_transformer', @@ -851,7 +851,7 @@ 'test_distribution', 'test_box_clip_op', 'custom_tensor_test', - 'test_dataloader_early_reset', + 'test_dataloader_early_reset_deprecated', 'test_gather_nd_op', 'test_tensor_register_hook', 'test_retain_graph', @@ -918,7 +918,7 @@ 'test_nanmedian', 'test_linear', 'test_imperative_qat_amp', - 'test_truncated_gaussian_random_op', + 'test_truncated_gaussian_random_op_deprecated', 'test_lstm_cudnn_op', 'copy_same_tensor_test', 'test_squeeze2_op', @@ -942,6 +942,7 @@ 'test_sign_op', 'selected_rows_functor_gpu_test', 'test_fleet_base', + 'test_fleet_base_deprecated', 'test_logsumexp', 'test_detection', 'test_image_classification_fp16', @@ -1080,7 +1081,7 @@ 'test_prelu_op', 'test_l1_norm_op', 'test_rmsprop_op', - 'test_fuse_bn_act_pass', + 'test_fuse_bn_act_pass_deprecated', 'test_inplace_addto_strategy', 'test_paddle_save_load', 'test_prelu_mkldnn_op', @@ -1232,7 +1233,7 @@ 'test_memory_reuse_exclude_feed_var', 'test_polygon_box_transform', 'math_function_gpu_test', - 'test_program_prune_backward', + 'test_program_prune_backward_deprecated', 'test_ema_fleet', 'test_fleet_amp_init', 'test_normalize', @@ -1265,7 +1266,7 @@ 'test_nn_margin_rank_loss', 'test_arg_min_max_v2_op', 'test_variance_layer', - 'test_quantization_scale_pass', + 'test_quantization_scale_pass_deprecated', 'test_segment_ops', 'test_layers', 'test_imperative_qat_channelwise', @@ -1274,7 +1275,7 @@ 'test_l1_loss', 'test_ifelse', 'test_cache_program', - 'test_ir_fc_fuse_pass', + 'test_ir_fc_fuse_pass_deprecated', 'test_kldiv_loss_op', 'test_switch_case', 'test_unique', @@ -1332,6 +1333,7 @@ 'test_callbacks', 'test_imperative_recurrent_usage', 'test_deform_conv2d', + 'test_deform_conv2d_deprecated', 'test_coalesce_tensor_op', 'test_tsm', 'test_fused_multihead_matmul_op', @@ -1382,7 +1384,7 @@ 'test_deformable_psroi_pooling', 'test_multi_precision_fp16_train', 'test_adam_op_multi_thread', - 'test_decoupled_py_reader', + 'test_decoupled_py_reader_deprecated', 'test_distribute_fpn_proposals_op', 'transform_test', 'test_nan_inf', @@ -1467,11 +1469,11 @@ 'test_trt_matmul', 'test_trt_fc_fuse_pass', 'test_trt_pad_op', - 'test_imperative_lod_tensor_to_selected_rows', + 'test_imperative_lod_tensor_to_selected_rows_deprecated', 'test_gru_unit_op', 'test_amp_check_finite_and_scale_op', 'test_imperative_selected_rows_to_lod_tensor', - 'test_add_reader_dependency', + 'test_add_reader_dependency_deprecated', 'test_imperative_transformer_sorted_gradient', 'test_bicubic_interp_v2_op', 'test_rank_attention_op', @@ -1693,7 +1695,7 @@ 'test_protobuf', 'test_progressbar', 'test_program_to_string', - 'test_program_code', + 'test_program_code_deprecated', 'test_program', 'test_precision_recall_op', 'test_post_training_quantization_resnet50', @@ -1755,7 +1757,7 @@ 'test_infer_shape', 'test_infer_no_need_buffer_slots', 'test_inference_model_io', - 'test_inference_api', + 'test_inference_api_deprecated', 'test_imperative_signal_handler', 'test_imperative_numpy_bridge', 'test_imperative_group', @@ -1880,7 +1882,7 @@ 'test_conv2d_bf16_mkldnn_op', 'test_context_manager', 'test_const_value', - 'test_conditional_block', + 'test_conditional_block_deprecated', 'test_concat_int8_mkldnn_op', 'test_concat_bf16_mkldnn_op', 'test_compat', @@ -1996,7 +1998,7 @@ 'test_dist_fleet_ps_gpu_ctr', 'test_dist_mnist_backward_deps', 'test_dist_fleet_heter_base', - 'test_dist_sparse_tensor_load_sgd', + 'test_dist_sparse_tensor_load_sgd_deprecated', 'test_new_group', 'test_dist_mnist_with_program', 'test_dist_mnist_pg', @@ -2087,7 +2089,7 @@ 'test_fake_init_op', 'brpc_service_sparse_sgd_test', 'test_tf32_cudnn', - 'test_communicator_geo', + 'test_communicator_geo_deprecated', 'test_fleet_dgc_meta_optimizer', 'test_fc_fuse_pass_cc', 'test_communicator_sync', @@ -2230,6 +2232,7 @@ 'test_analyzer_bert', 'test_analyzer_googlenet', 'test_fleet_base', + 'test_fleet_base_deprecated', 'test_dgc_momentum_op', 'test_memcpy_op', 'test_dgc_op', @@ -2258,7 +2261,7 @@ 'test_reshape_op', 'test_fused_transformer_encoder_layer', 'test_eager_deletion_while_op', - 'test_dataloader_unkeep_order', + 'test_dataloader_unkeep_order_deprecated', 'test_correlation', 'test_moving_average_abs_max_scale_op', 'test_flatten_contiguous_range_op', @@ -2291,7 +2294,7 @@ 'test_imperative_trace_non_persistable_inputs', 'test_executor_return_tensor_not_overwriting', 'test_density_prior_box_op', - 'test_dataloader_keep_order', + 'test_dataloader_keep_order_deprecated', 'test_bce_loss', 'test_fetch_lod_tensor_array', 'test_smooth_l1_loss', @@ -2347,6 +2350,7 @@ 'test_diagflat', 'test_determinant_op', 'test_deform_conv2d', + 'test_deform_conv2d_deprecated', 'test_conv_transpose_nn_grad', 'test_conj_op', 'test_complex_reshape', @@ -2527,7 +2531,7 @@ 'test_logical_op', 'test_imperative_deepcf', 'test_cholesky_op', - 'test_ir_fc_fuse_pass', + 'test_ir_fc_fuse_pass_deprecated', 'test_fleet_base_single', 'test_multiprocess_dataloader_iterable_dataset_dynamic', 'test_slice_op', @@ -2576,13 +2580,13 @@ 'test_seqconv_eltadd_relu_fuse_pass', 'test_analysis_predictor', 'test_convert_operators', - 'test_add_reader_dependency', + 'test_add_reader_dependency_deprecated', 'test_is_tensor', 'test_variable', 'test_save_model_without_var', 'test_unfold_op', 'test_conv_bn_fuse_pass', - 'test_truncated_gaussian_random_op', + 'test_truncated_gaussian_random_op_deprecated', 'test_traced_layer_err_msg', 'test_unique_with_counts', 'test_auc_single_pred_op', @@ -2689,7 +2693,7 @@ 'test_imperative_save_load', 'test_imperative_ptb_rnn_sorted_gradient', 'test_mul_op', - 'test_imperative_lod_tensor_to_selected_rows', + 'test_imperative_lod_tensor_to_selected_rows_deprecated', 'test_imperative_data_parallel', 'test_norm_nn_grad', 'test_im2sequence_op', @@ -2764,7 +2768,7 @@ 'test_dot_op', 'test_device', 'test_imperative_layer_apply', - 'test_dataloader_early_reset', + 'test_dataloader_early_reset_deprecated', 'test_imperative_selected_rows_to_lod_tensor', 'test_crop_op', 'test_linear_interp_v2_op', @@ -2815,7 +2819,7 @@ 'test_sync_batch_norm_op', 'test_static_save_load', 'test_coalesce_tensor_op', - 'test_fuse_bn_act_pass', + 'test_fuse_bn_act_pass_deprecated', 'test_shard_index_op', 'test_cuda_random_seed', 'test_dequantize_log_op', @@ -2854,7 +2858,7 @@ 'test_eager_tensor', 'trt_split_converter_test', 'test_user_defined_quantization', - 'test_quantization_scale_pass', + 'test_quantization_scale_pass_deprecated', 'feed_forward_test', 'test_standalone_executor', 'test_imperative_qat_user_defined', @@ -2878,7 +2882,7 @@ 'test_tensor_register_hook', 'test_fused_multihead_matmul_op', 'test_uniform_random_inplace_op', - 'test_decoupled_py_reader', + 'test_decoupled_py_reader_deprecated', 'test_assign_op', 'test_trt_instance_norm_op', 'test_uniform_random_op', diff --git a/tools/print_signatures.py b/tools/print_signatures.py index d09a04abd045c..ba3e08b154541 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -15,19 +15,34 @@ Print all signature of a python module in alphabet order. Usage: - ./print_signature "paddle.base" > signature.txt + python tools/print_signature.py "paddle" > API.spec """ +from __future__ import annotations + import argparse import collections import hashlib import inspect import logging import pkgutil +import re import sys +from typing import Literal import paddle +SpecFields = Literal[ + "args", + "varargs", + "varkw", + "defaults", + "kwonlyargs", + "kwonlydefaults", + "annotations", + "document", +] + member_dict = collections.OrderedDict() visited_modules = set() @@ -61,21 +76,6 @@ def md5(doc): return md5sum -def is_primitive(instance): - int_types = (int,) - pritimitive_types = int_types + (float, str) - if isinstance(instance, pritimitive_types): - return True - elif isinstance(instance, (list, tuple, set)): - for obj in instance: - if not is_primitive(obj): - return False - - return True - else: - return False - - ErrorSet = set() IdSet = set() skiplist = [] @@ -200,9 +200,7 @@ def insert_api_into_dict(full_name, gen_doc_anno=None): if gen_doc_anno: api_info_dict[fc_id]["gen_doc_anno"] = gen_doc_anno if inspect.isfunction(obj): - api_info_dict[fc_id]["signature"] = repr( - inspect.getfullargspec(obj) - ).replace('FullArgSpec', 'ArgSpec', 1) + api_info_dict[fc_id]["signature"] = inspect.getfullargspec(obj) return api_info_dict[fc_id] @@ -239,85 +237,6 @@ def process_module(m, attr="__all__"): return api_counter -def check_public_api(): - modulelist = [ # npqa - paddle, - paddle.amp, - paddle.nn, - paddle.nn.functional, - paddle.nn.initializer, - paddle.nn.utils, - paddle.static, - paddle.static.nn, - paddle.io, - paddle.jit, - paddle.metric, - paddle.distribution, - paddle.optimizer, - paddle.optimizer.lr, - paddle.regularizer, - paddle.text, - paddle.utils, - paddle.utils.download, - paddle.utils.cpp_extension, - paddle.sysconfig, - paddle.vision, - paddle.vision.datasets, - paddle.vision.models, - paddle.vision.transforms, - paddle.vision.ops, - paddle.distributed, - paddle.distributed.fleet, - paddle.distributed.fleet.utils, - paddle.distributed.parallel, - paddle.distributed.utils, - paddle.callbacks, - paddle.hub, - paddle.autograd, - paddle.incubate, - paddle.inference, - paddle.onnx, - paddle.device, - paddle.audio, - paddle.audio.backends, - paddle.audio.datasets, - paddle.sparse, - paddle.sparse.nn, - paddle.sparse.nn.functional, - ] - - apinum = 0 - alldict = {} - for module in modulelist: - if hasattr(module, '__all__'): - old_all = module.__all__ - else: - old_all = [] - dirall = dir(module) - for item in dirall: - if item.startswith('__'): - continue - old_all.append(item) - apinum += len(old_all) - alldict.update({module.__name__: old_all}) - - old_all = [] - dirall = dir(paddle.Tensor) - for item in dirall: - if item.startswith('_'): - continue - old_all.append(item) - apinum += len(old_all) - alldict.update({'paddle.Tensor': old_all}) - - for module, allapi in alldict.items(): - for member_name in allapi: - cur_name = module + '.' + member_name - instance = eval(cur_name) - doc_md5 = md5(instance.__doc__) - member_dict[cur_name] = f"({cur_name}, ('document', '{doc_md5}'))" - - def check_allmodule_callable(): modulelist = [paddle] for m in modulelist: @@ -326,69 +245,89 @@ def check_allmodule_callable(): return member_dict +class ApiSpecFormatter: + def __init__(self, show_fields: SpecFields): + self.show_fields = show_fields + + def format_spec(self, spec: inspect.FullArgSpec | None) -> str: + if spec is None: + return "ArgSpec()" + inner_str = ", ".join( + f"{field}={getattr(spec, field)!r}" + for field in spec._fields + if field in self.show_fields + ) + return f"ArgSpec({inner_str})" + + def format_doc(self, doc: str) -> str: + if "document" not in self.show_fields: + return "('document', '**********')" + return f"('document', '{md5(doc)}')" + + def format(self, api_name: str, spec: inspect.FullArgSpec, doc: str) -> str: + return f"{api_name} ({self.format_spec(spec)}, {self.format_doc(doc)})" + + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Print Apis Signatures') - parser.add_argument('--debug', dest='debug', action="store_true") + parser.add_argument('module', type=str, help='module', default='paddle') parser.add_argument( - '--method', - dest='method', + '--skipped', + dest='skipped', type=str, - default='get_all_api', - help="using get_all_api or from_modulelist", + help='Skip Checking submodules, support regex', + default=r'paddle\.base\.libpaddle\.(eager|pir)\.ops', ) parser.add_argument( - 'module', type=str, help='module', default='paddle' - ) # not used - parser.add_argument( - '--skipped', - dest='skipped', + '--show-fields', type=str, - help='Skip Checking submodules', - default='paddle.base.libpaddle.eager.ops', + default="args,varargs,varkw,defaults,kwonlyargs,kwonlydefaults,annotations,document", + help="show fields in arg spec, separated by comma, e.g. 'args,varargs'", ) - - if len(sys.argv) == 1: - args = parser.parse_args(['paddle']) - return args - # parser.print_help() - # sys.exit(1) - args = parser.parse_args() return args +def create_api_filter(skipped_regex: str): + if not skipped_regex: + return lambda api_name: True + skipped_pattern = re.compile(skipped_regex) + + def api_filter(api_name: str) -> bool: + return not skipped_pattern.match(api_name) + + return api_filter + + if __name__ == '__main__': args = parse_args() check_allmodule_callable() - if args.method == 'from_modulelist': - check_public_api() - for name in member_dict: - print(name, member_dict[name]) - elif args.method == 'get_all_api': - get_all_api() - all_api_names_to_k = {} - for k, api_info in api_info_dict.items(): - # 1. the shortest suggested_name may be renamed; - # 2. some api's fullname is not accessable, the module name of it is overrided by the function with the same name; - api_name = sorted(api_info['all_names'])[0] - all_api_names_to_k[api_name] = k - all_api_names_sorted = sorted(all_api_names_to_k.keys()) - for api_name in all_api_names_sorted: - if args.skipped != '' and api_name.find(args.skipped) >= 0: - continue - api_info = api_info_dict[all_api_names_to_k[api_name]] - print( - "{} ({}, ('document', '{}'))".format( - api_name, - api_info['signature'] - if 'signature' in api_info - else 'ArgSpec()', - md5(api_info['docstring']), - ) + get_all_api(args.module) + api_filter = create_api_filter(args.skipped) + spec_formatter = ApiSpecFormatter(args.show_fields.split(',')) + + all_api_names_to_k = {} + for k, api_info in api_info_dict.items(): + # 1. the shortest suggested_name may be renamed; + # 2. some api's fullname is not accessable, the module name of it is overrided by the function with the same name; + api_name = sorted(api_info['all_names'])[0] + all_api_names_to_k[api_name] = k + all_api_names_sorted = sorted(all_api_names_to_k.keys()) + for api_name in all_api_names_sorted: + if not api_filter(api_name): + continue + api_info = api_info_dict[all_api_names_to_k[api_name]] + + print( + spec_formatter.format( + api_name, + api_info.get('signature'), + api_info['docstring'], ) + ) if len(ErrorSet) == 0: sys.exit(0) diff --git a/tools/prune_for_jetson.py b/tools/prune_for_jetson.py index 12f15a5dec6e1..d3758493d0c00 100644 --- a/tools/prune_for_jetson.py +++ b/tools/prune_for_jetson.py @@ -101,9 +101,9 @@ def prune_phi_kernels(): def apply_patches(): work_path = os.path.dirname(os.path.abspath(__file__)) + "/../" ret = os.system( - "cd %s && rm -f paddle/fluid/inference/api/tensorrt_predictor.* " + f"cd {work_path} && rm -f paddle/fluid/inference/api/tensorrt_predictor.* " " && rm -f paddle/fluid/inference/api/paddle_tensorrt_predictor.h " - " && git apply tools/infer_prune_patches/*.patch && cd -" % work_path + " && git apply tools/infer_prune_patches/*.patch && cd -" ) return ret == 0 @@ -120,7 +120,7 @@ def append_fluid_kernels(): for op in op_white_list: append_str = ( append_str - + "file(APPEND ${pybind_file} \"USE_OP__(%s);\\n\")\n" % op + + f"file(APPEND ${{pybind_file}} \"USE_OP__({op});\\n\")\n" ) with open(file_name, 'r', encoding='utf-8') as f: @@ -154,11 +154,9 @@ def append_fluid_kernels(): for op in op_white_list: patterns = { - "REGISTER_OPERATOR": r"REGISTER_OPERATOR\(\s*%s\s*," % op, - "REGISTER_OP_CPU_KERNEL": r"REGISTER_OP_CPU_KERNEL\(\s*%s\s*," - % op, - "REGISTER_OP_CUDA_KERNEL": r"REGISTER_OP_CUDA_KERNEL\(\s*%s\s*," - % op, + "REGISTER_OPERATOR": rf"REGISTER_OPERATOR\(\s*{op}\s*,", + "REGISTER_OP_CPU_KERNEL": rf"REGISTER_OP_CPU_KERNEL\(\s*{op}\s*,", + "REGISTER_OP_CUDA_KERNEL": rf"REGISTER_OP_CUDA_KERNEL\(\s*{op}\s*,", } for k, p in patterns.items(): matches = re.findall(p, content, flags=re.DOTALL) diff --git a/tools/sampcd_processor_utils.py b/tools/sampcd_processor_utils.py index ff6de2b598326..aaf61fcd88dc0 100644 --- a/tools/sampcd_processor_utils.py +++ b/tools/sampcd_processor_utils.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import argparse import inspect import logging @@ -48,6 +50,12 @@ API_DIFF_SPEC_FN = 'dev_pr_diff_api.spec' TEST_TIMEOUT = 10 +PAT_API_SPEC_MEMBER = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') +# insert ArgSpec for changing the API's type annotation can trigger the CI +PAT_API_SPEC_SIGNATURE = re.compile( + r'^(paddle[^,]+)\s+\((ArgSpec.*),.*document\W*([0-9a-z]{32})' +) + class Result: # name/key for result @@ -66,7 +74,7 @@ class Result: order: int = 0 @classmethod - def msg(cls, count: int, env: typing.Set) -> str: + def msg(cls, count: int, env: set) -> str: """Message for logging with api `count` and running `env`.""" raise NotImplementedError @@ -85,8 +93,8 @@ class MetaResult(type): def __new__( mcs, name: str, - bases: typing.Tuple[type, ...], - namespace: typing.Dict[str, typing.Any], + bases: tuple[type, ...], + namespace: dict[str, typing.Any], ) -> type: cls = super().__new__(mcs, name, bases, namespace) if issubclass(cls, Result): @@ -104,7 +112,7 @@ def get(mcs, name: str) -> type: return mcs.__cls_map.get(name) @classmethod - def cls_map(mcs) -> typing.Dict[str, Result]: + def cls_map(mcs) -> dict[str, Result]: return mcs.__cls_map @@ -290,7 +298,7 @@ def prepare(self, test_capacity: set) -> None: """ pass - def run(self, api_name: str, docstring: str) -> typing.List[TestResult]: + def run(self, api_name: str, docstring: str) -> list[TestResult]: """Extract codeblocks from docstring, and run the test. Run only one docstring at a time. @@ -304,7 +312,7 @@ def run(self, api_name: str, docstring: str) -> typing.List[TestResult]: raise NotImplementedError def print_summary( - self, test_results: typing.List[TestResult], whl_error: typing.List[str] + self, test_results: list[TestResult], whl_error: list[str] ) -> None: """Post process test results and print test summary. @@ -333,17 +341,17 @@ def get_api_md5(path): API_spec = os.path.abspath(os.path.join(os.getcwd(), "..", path)) if not os.path.isfile(API_spec): return api_md5 - pat = re.compile(r'\((paddle[^,]+)\W*document\W*([0-9a-z]{32})') - patArgSpec = re.compile( - r'^(paddle[^,]+)\s+\(ArgSpec.*document\W*([0-9a-z]{32})' - ) + with open(API_spec) as f: for line in f.readlines(): - mo = pat.search(line) - if not mo: - mo = patArgSpec.search(line) + mo = PAT_API_SPEC_MEMBER.search(line) + if mo: api_md5[mo.group(1)] = mo.group(2) + else: + mo = PAT_API_SPEC_SIGNATURE.search(line) + api_md5[mo.group(1)] = f'{mo.group(2)}, {mo.group(3)}' + return api_md5 @@ -397,18 +405,6 @@ def get_full_api_from_pr_spec(): get_full_api_by_walk() -def get_full_api(): - """ - get all the apis - """ - global API_DIFF_SPEC_FN # readonly - from print_signatures import get_all_api_from_modulelist - - member_dict = get_all_api_from_modulelist() - with open(API_DIFF_SPEC_FN, 'w') as f: - f.write("\n".join(member_dict.keys())) - - def extract_code_blocks_from_docstr(docstr, google_style=True): """ extract code-blocks from the given docstring. @@ -599,9 +595,16 @@ def get_test_capacity(run_on_device="cpu"): return sample_code_test_capacity -def get_docstring(full_test=False): +def get_docstring( + full_test: bool = False, + filter_api: typing.Callable[[str], bool] | None = None, +): ''' this function will get the docstring for test. + + Args: + full_test, get all api + filter_api, a function that filter api, if `True` then skip add to `docstrings_to_test`. ''' import paddle import paddle.static.quantization # noqa: F401 @@ -616,6 +619,9 @@ def get_docstring(full_test=False): with open(API_DIFF_SPEC_FN) as f: for line in f.readlines(): api = line.replace('\n', '') + if filter_api is not None and filter_api(api.strip()): + continue + try: api_obj = eval(api) except AttributeError: @@ -637,7 +643,7 @@ def get_docstring(full_test=False): return docstrings_to_test, whl_error -def check_old_style(docstrings_to_test: typing.Dict[str, str]): +def check_old_style(docstrings_to_test: dict[str, str]): old_style_apis = [] for api_name, raw_docstring in docstrings_to_test.items(): for codeblock in extract_code_blocks_from_docstr( @@ -715,8 +721,8 @@ def exec_gen_doc(): def get_test_results( - doctester: DocTester, docstrings_to_test: typing.Dict[str, str] -) -> typing.List[TestResult]: + doctester: DocTester, docstrings_to_test: dict[str, str] +) -> list[TestResult]: """Get test results from doctester with docstrings to test.""" _test_style = ( doctester.style diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 8bfb9caef11c2..f63bc17488e77 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -48,7 +48,7 @@ 'test_adaptive_avg_pool1d', 'test_adaptive_max_pool1d', 'test_add_position_encoding_op', - 'test_add_reader_dependency', + 'test_add_reader_dependency_deprecated', 'test_addmm_op', 'test_affine_grid_op', 'test_allclose_layer', @@ -96,7 +96,7 @@ 'test_compare_reduce_op', 'test_compiled_program', 'test_cond', - 'test_conditional_block', + 'test_conditional_block_deprecated', 'test_context_manager', 'test_conv1d_layer', 'test_conv1d_transpose_layer', @@ -122,12 +122,12 @@ 'test_cumsum_op', 'test_cvm_op', 'test_data', - 'test_dataloader_early_reset', - 'test_dataloader_keep_order', - 'test_dataloader_unkeep_order', + 'test_dataloader_early_reset_deprecated', + 'test_dataloader_keep_order_deprecated', + 'test_dataloader_unkeep_order_deprecated', 'test_debugger', 'test_decayed_adagrad_op', - 'test_decoupled_py_reader', + 'test_decoupled_py_reader_deprecated', 'test_decoupled_py_reader_data_check', 'test_deformable_conv_v1_op', 'test_deformable_psroi_pooling', @@ -252,7 +252,7 @@ 'test_imperative_gan', 'test_imperative_gnn', 'test_imperative_load_static_param', - 'test_imperative_lod_tensor_to_selected_rows', + 'test_imperative_lod_tensor_to_selected_rows_deprecated', 'test_imperative_optimizer', 'test_imperative_ptb_rnn', 'test_imperative_ptb_rnn_sorted_gradient', @@ -372,8 +372,8 @@ 'test_prior_box_op', 'test_profiler', 'test_program', - 'test_program_code', - 'test_program_prune_backward', + 'test_program_code_deprecated', + 'test_program_prune_backward_deprecated', 'test_program_to_string', 'test_protobuf_descs', 'test_proximal_gd_op', @@ -467,7 +467,7 @@ 'test_tril_triu_op', 'test_trilinear_interp_op', 'test_trilinear_interp_v2_op', - 'test_truncated_gaussian_random_op', + 'test_truncated_gaussian_random_op_deprecated', 'test_unbind_op', 'test_unfold_op', 'test_uniform_random_bf16_op', @@ -500,7 +500,7 @@ 'test_communicator_sync', 'test_collective_optimizer', 'test_data_norm_op', - 'test_fuse_bn_act_pass', + 'test_fuse_bn_act_pass_deprecated', 'test_layers', 'test_sequence_conv', 'test_sequence_erase_op', @@ -518,7 +518,7 @@ 'test_sequence_topk_avg_pooling', 'test_sequence_unpad_op', 'test_ir_embedding_eltwise_layernorm_fuse_pass', - 'test_ir_fc_fuse_pass', + 'test_ir_fc_fuse_pass_deprecated', 'test_ir_skip_layernorm_pass', 'test_conv_bias_mkldnn_fuse_pass', 'test_conv_bn_fuse_pass', @@ -603,6 +603,7 @@ 'test_fused_multihead_matmul_op', 'test_rank_attention_op', 'test_fleet_base', + 'test_fleet_base_deprecated', 'test_fleet_meta_optimizer_base', 'test_trt_fc_fuse_pass', 'test_trt_quant_conv2d_dequant_fuse_pass', diff --git a/tools/statistics_UT_resource.sh b/tools/statistics_UT_resource.sh index a6f1f264c4cd2..f97fc6f0dc51d 100644 --- a/tools/statistics_UT_resource.sh +++ b/tools/statistics_UT_resource.sh @@ -1,11 +1,11 @@ # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/tools/test_print_signatures.py b/tools/test_print_signatures.py index 8a3bc60dcf9a7..20345d77b2566 100644 --- a/tools/test_print_signatures.py +++ b/tools/test_print_signatures.py @@ -25,7 +25,7 @@ import hashlib import unittest -from print_signatures import is_primitive, md5 +from print_signatures import md5 def func_example(param_a, param_b): @@ -62,26 +62,5 @@ def test_md5(self): self.assertEqual(digest, md5(func_example.__doc__)) -class Test_is_primitive(unittest.TestCase): - def test_single(self): - self.assertTrue(is_primitive(2)) - self.assertTrue(is_primitive(2.1)) - self.assertTrue(is_primitive("2.1.1")) - self.assertFalse(is_primitive(b"hello paddle")) - self.assertFalse(is_primitive(1j)) - self.assertTrue(is_primitive(True)) - - def test_collection(self): - self.assertTrue(is_primitive([])) - self.assertTrue(is_primitive(())) - self.assertTrue(is_primitive(set())) - self.assertTrue(is_primitive([1, 2])) - self.assertTrue(is_primitive((1.1, 2.2))) - self.assertTrue(is_primitive({1, 2.3})) - self.assertFalse(is_primitive(range(3))) - self.assertFalse(is_primitive({})) - self.assertFalse(is_primitive([1, 1j])) - - if __name__ == '__main__': unittest.main() diff --git a/tools/test_run_by_protobuf_3.py b/tools/test_run_by_protobuf_3.py index 52ce36683e380..7c353b9145c16 100644 --- a/tools/test_run_by_protobuf_3.py +++ b/tools/test_run_by_protobuf_3.py @@ -19,7 +19,7 @@ 'test_ema_fleet', 'test_fleet_base2', 'test_fleet_base3', - 'test_communicator_geo', + 'test_communicator_geo_deprecated', 'test_communicator_async', 'test_dist_fleet_a_sync_optimizer_async', 'test_dist_fleet_a_sync_optimizer_auto', @@ -51,7 +51,7 @@ 'test_dist_sparse_tensor_load_ftrl', 'test_dist_sparse_tensor_load_momentum', 'test_dist_sparse_tensor_load_rmsprop', - 'test_dist_sparse_tensor_load_sgd', + 'test_dist_sparse_tensor_load_sgd_deprecated', 'test_communicator_sync', 'test_dist_fuse_adam_pass', 'test_dist_fuse_bn_act_pass', diff --git a/tools/test_sampcd_processor.py b/tools/test_sampcd_processor.py index 62c51a73ba8a7..c61c7e610f98c 100644 --- a/tools/test_sampcd_processor.py +++ b/tools/test_sampcd_processor.py @@ -103,19 +103,23 @@ def tearDown(self): def test_get_api_md5(self): res = get_api_md5('paddle/fluid/API_PR.spec') self.assertEqual( - "ff0f188c95030158cc6398d2a6c55one", res['paddle.one_plus_one'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6c55one", + res['paddle.one_plus_one'], ) self.assertEqual( - "ff0f188c95030158cc6398d2a6c55two", res['paddle.two_plus_two'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6c55two", + res['paddle.two_plus_two'], ) self.assertEqual( - "ff0f188c95030158cc6398d2a6cthree", res['paddle.three_plus_three'] + "ArgSpec(args=[], varargs=None, keywords=None, defaults=(,)), ff0f188c95030158cc6398d2a6cthree", + res['paddle.three_plus_three'], ) self.assertEqual( "ff0f188c95030158cc6398d2a6c5four", res['paddle.four_plus_four'] ) self.assertEqual( - "ff0f188c95030158cc6398d2a6c5five", res['paddle.five_plus_five'] + "ArgSpec(), ff0f188c95030158cc6398d2a6c5five", + res['paddle.five_plus_five'], ) @@ -302,8 +306,8 @@ def test_global_exec(self): >>> import paddle >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'set_default': """ placeholder @@ -319,8 +323,8 @@ def test_global_exec(self): >>> paddle.set_default_dtype('float64') >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float64, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'after_set_default': """ placeholder @@ -335,8 +339,8 @@ def test_global_exec(self): >>> import paddle >>> a = paddle.to_tensor(.2) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, } @@ -509,10 +513,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.20000000) """, 'cpu_to_cpu': """ @@ -528,10 +532,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'gpu_to_cpu': """ @@ -547,10 +551,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.20000000) """, 'cpu_to_gpu': """ @@ -566,10 +570,10 @@ def test_patch_xdoctest(self): >>> import paddle >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.2) - >>> # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, [0.20000000]) + >>> # Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, 0.20000000) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.20000000]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.20000000) """, 'gpu_to_cpu_array': """ placeholder @@ -701,8 +705,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.123456780) """, 'cpu_to_cpu': """ @@ -719,8 +723,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.123456780) """, 'gpu_to_cpu': """ @@ -737,8 +741,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('gpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.123456780) """, 'cpu_to_gpu': """ @@ -755,8 +759,8 @@ def test_patch_xdoctest(self): >>> paddle.device.set_device('cpu') >>> a = paddle.to_tensor(.123456789) >>> print(a) - Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, - [0.123456780]) + Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, + 0.123456780) """, 'gpu_to_cpu_array': """ placeholder @@ -2046,7 +2050,7 @@ def test_timeout(self): def test_bad_statements(self): docstrings_to_test = { - 'bad_fluid': """ + 'good_fluid': """ this is docstring... Examples: @@ -2191,9 +2195,9 @@ def test_bad_statements(self): tr_10, ) = test_results - self.assertIn('bad_fluid', tr_0.name) - self.assertTrue(tr_0.badstatement) - self.assertFalse(tr_0.passed) + self.assertIn('good_fluid', tr_0.name) + self.assertFalse(tr_0.badstatement) + self.assertTrue(tr_0.passed) self.assertIn('bad_fluid_from', tr_1.name) self.assertTrue(tr_1.badstatement) diff --git a/tools/test_type_checking.py b/tools/test_type_checking.py new file mode 100644 index 0000000000000..714be765ca9b5 --- /dev/null +++ b/tools/test_type_checking.py @@ -0,0 +1,630 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from tools.type_checking import MypyChecker, get_test_results + + +class TestMypyChecker(unittest.TestCase): + def test_mypy_pass(self): + docstrings_pass = { + 'simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import abc + >>> print(1) + 1 + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> print(1-1) + 0 + """, + } + docstrings_from_sampcd = { + 'gpu_to_gpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [0.123456780]) + + """, + 'cpu_to_cpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.123456780]) + + """, + 'gpu_to_cpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.123456780]) + + """, + 'cpu_to_gpu': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor(.123456789) + >>> print(a) + Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [0.123456780]) + """, + 'gpu_to_cpu_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor([[1.123456789 ,2,3], [2,3,4], [3,4,5]]) + >>> print(a) + Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[1.123456780, 2., 3.], + [2., 3., 4.], + [3., 4., 5.]]) + """, + 'cpu_to_gpu_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> a = paddle.to_tensor([[1.123456789,2,3], [2,3,4], [3,4,5]]) + >>> print(a) + Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, + [[1.123456780, 2., 3.], + [2., 3., 4.], + [3., 4., 5.]]) + """, + 'mass_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('gpu') + >>> a = paddle.to_tensor( + ... [[1.123456780, 2., -3, .3], + ... [2, 3, +4., 1.2+10.34e-5j], + ... [3, 5.e-3, 1e2, 3e-8]] + ... ) + >>> # Tensor(shape=[3, 4], dtype=complex64, place=Place(gpu:0), stop_gradient=True, + >>> # [[ (1.1234568357467651+0j) , + >>> # (2+0j) , + >>> # (-3+0j) , + >>> # (0.30000001192092896+0j) ], + >>> # [ (2+0j) , + >>> # (3+0j) , + >>> # (4+0j) , + >>> # (1.2000000476837158+0.00010340000153519213j)], + >>> # [ (3+0j) , + >>> # (0.004999999888241291+0j) , + >>> # (100+0j) , + >>> # (2.999999892949745e-08+0j) ]]) + >>> print(a) + Tensor(shape=[3, 4], dtype=complex64, place=Place(AAA), stop_gradient=True, + [[ (1.123456+0j), + (2+0j), + (-3+0j), + (0.3+0j)], + [ (2+0j), + (3+0j), + (4+0j), + (1.2+0.00010340j)], + [ (3+0j), + (0.00499999+0j), + (100+0j), + (2.999999e-08+0j)]]) + """, + 'float_array': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> x = [[2, 3, 4], [7, 8, 9]] + >>> x = paddle.to_tensor(x, dtype='float32') + >>> print(paddle.log(x)) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.69314718, 1.09861231, 1.38629436], + [1.94591010, 2.07944155, 2.19722462]]) + + """, + 'float_array_diff': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import paddle + >>> paddle.device.set_device('cpu') + >>> x = [[2, 3, 4], [7, 8, 9]] + >>> x = paddle.to_tensor(x, dtype='float32') + >>> print(paddle.log(x)) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.69314712, 1.09861221, 1.386294], + [1.94591032, 2.07944156, 2.1972246]]) + + """, + 'float_begin': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0) + 7. + + """, + 'float_begin_long': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0000023) + 7.0000024 + + """, + 'float_begin_more': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7.0 5.0 6.123457 + + """, + 'float_begin_more_diff': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7.0 5.0 6.123457 + + """, + 'float_begin_more_brief': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0, 5., 6.123456) + 7. 5. 6.123457 + + """, + 'float_begin_fail': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> print(7.0100023) + 7.0000024 + + """, + } + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_pass) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + self.assertFalse(tr.fail) + + test_results = get_test_results(doctester, docstrings_from_sampcd) + self.assertEqual(len(test_results), 15) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + def test_mypy_fail(self): + docstrings_fail = { + 'fail_simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import blabla + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + self.assertTrue(tr.fail) + + def test_mypy_partial_fail(self): + docstrings_fail = { + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> print(1-1) + 0 + """ + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 2) + + tr_0, tr_1 = test_results + self.assertTrue(tr_0.fail) + self.assertFalse(tr_1.fail) + + def test_mypy_ignore(self): + docstrings_ignore = { + 'fail_simple': """ + placeholder + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # type: ignore + >>> import blabla + """, + 'multi': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_ignore) + self.assertEqual(len(test_results), 3) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + docstrings_pass = { + 'pass': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> a = 1 + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> b = 2 + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_pass) + self.assertEqual(len(test_results), 2) + + for tr in test_results: + print(tr.msg) + self.assertFalse(tr.fail) + + docstrings_fail = { + 'fail': """ + placeholder + + .. code-block:: python + :name: code-example-0 + + this is some blabla... + + >>> # doctest: +SKIP('skip') + >>> print(1+1) + 2 + + Examples: + + .. code-block:: python + :name: code-example-1 + + this is some blabla... + + >>> import blabla + >>> a = 1 + >>> # type: ignore + >>> # doctest: -REQUIRES(env:GPU) + >>> blabla + >>> print(1-1) + 0 + + .. code-block:: python + :name: code-example-2 + + this is some blabla... + + >>> import blabla + >>> # type: ignore + >>> # doctest: +REQUIRES(env:GPU, env:XPU, env: DISTRIBUTED) + >>> blabla + >>> print(1-1) + 0 + """, + } + + doctester = MypyChecker('../pyproject.toml') + + test_results = get_test_results(doctester, docstrings_fail) + self.assertEqual(len(test_results), 2) + + for tr in test_results: + print(tr.msg) + self.assertTrue(tr.fail) diff --git a/tools/timeline.py b/tools/timeline.py index ff8d0946378d7..5e16e0b9bf4f3 100644 --- a/tools/timeline.py +++ b/tools/timeline.py @@ -148,7 +148,7 @@ def _allocate_pids(self): self._devices[(k, event.device_id, "CPU")] = pid # -1 device id represents CUDA API(RunTime) call.(e.g. cudaLaunch, cudaMemcpy) if event.device_id == -1: - self._chrome_trace.emit_pid("%s:cuda_api" % k, pid) + self._chrome_trace.emit_pid(f"{k}:cuda_api", pid) else: self._chrome_trace.emit_pid( "%s:cpu:block:%d" % (k, event.device_id), pid diff --git a/tools/timeout_debug_help.sh b/tools/timeout_debug_help.sh index 45de2db87e853..fcc6d473e49eb 100644 --- a/tools/timeout_debug_help.sh +++ b/tools/timeout_debug_help.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,7 +17,7 @@ set +e failed_uts=$1 need_debug_ut_re='test_dist_fleet' cat_log_judge=$(echo "${failed_uts}" | grep 'Timeout' | grep -oEi "$need_debug_ut_re" ) -if [[ "$cat_log_judge" != "" ]];then +if [[ "$cat_log_judge" != "" ]];then echo "==============================================" echo "show timeout ut logs" echo "==============================================" diff --git a/tools/type_checking.py b/tools/type_checking.py new file mode 100644 index 0000000000000..78285cb87eaa4 --- /dev/null +++ b/tools/type_checking.py @@ -0,0 +1,276 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We type-check the `Example` codes from docstring. + +from __future__ import annotations + +import argparse +import doctest +import pathlib +import re +from abc import abstractmethod +from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass, field +from typing import Any + +from mypy import api as mypy_api +from sampcd_processor_utils import ( + extract_code_blocks_from_docstr, + get_docstring, + init_logger, + log_exit, + logger, +) + + +class TypeChecker: + style: str = 'google' + + def __init__(self, *args: Any, **kwargs: Any) -> None: + pass + + @abstractmethod + def run(self, api_name: str, codeblock: str) -> TestResult: + pass + + @abstractmethod + def print_summary( + self, test_results: list[TestResult], whl_error: list[str] + ) -> None: + pass + + +@dataclass +class TestResult: + api_name: str + msg: str + fail: bool = False + extra_info: dict[str, Any] = field(default_factory=dict) + + +class MypyChecker(TypeChecker): + def __init__( + self, config_file: str, cache_dir: str, *args: Any, **kwargs: Any + ) -> None: + self.config_file = config_file + self.cache_dir = cache_dir + super().__init__(*args, **kwargs) + + def run(self, api_name: str, codeblock: str) -> TestResult: + # skip checking when the codeblock startswith `>>> # type: ignore` + codeblock_for_checking = [] + for line in codeblock.splitlines(): + if line.strip().startswith('>>> # type: ignore'): + break + codeblock_for_checking.append(line) + codeblock_for_checking = '\n'.join(codeblock_for_checking) + + # remove `doctest` in the codeblock, or the module `doctest` cannot `get_examples`` correctly + codeblock_for_checking = re.sub( + r'#\s*x?doctest\s*:.*', '', codeblock_for_checking + ) + + # `get_examples` codes with `>>>` and `...` stripped + _example_code = doctest.DocTestParser().get_examples( + codeblock_for_checking + ) + example_code = '\n'.join( + [l for e in _example_code for l in e.source.splitlines()] + ) + + normal_report, error_report, exit_status = mypy_api.run( + [ + f'--config-file={self.config_file}', + f'--cache-dir={self.cache_dir}', + '-c', + example_code, + ] + ) + + logger.debug('-' * 20) + logger.debug(f'>>> Type hints with api {api_name} start ...') + logger.debug(example_code) + logger.debug('>>> Results ...') + logger.debug('>>> mypy normal_report is ...') + logger.debug(normal_report) + logger.debug('>>> mypy error_report is ...') + logger.debug(error_report) + logger.debug('>>> mypy exit_status is ...') + logger.debug(exit_status) + logger.debug(f'>>> Type hints with api {api_name} end...') + + return TestResult( + api_name=api_name, + msg='\n'.join([normal_report, error_report]), + fail=exit_status != 0, + extra_info={ + 'normal_report': normal_report, + 'error_report': error_report, + 'exit_status': exit_status, + }, + ) + + def print_summary( + self, test_results: list[TestResult], whl_error: list[str] + ) -> None: + is_fail = False + + logger.warning("----------------Check results--------------------") + + if whl_error is not None and whl_error: + logger.warning("%s is not in whl.", whl_error) + logger.warning("") + logger.warning("Please check the whl package and API_PR.spec!") + logger.warning( + "You can follow these steps in order to generate API.spec:" + ) + logger.warning("1. cd ${paddle_path}, compile paddle;") + logger.warning( + "2. pip install build/python/dist/(build whl package);" + ) + logger.warning( + "3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'." + ) + for test_result in test_results: + if test_result.fail: + logger.error( + ">>> In addition, mistakes found in type checking: %s", + test_result.api_name, + ) + logger.error(test_result.msg) + log_exit(1) + + else: + for test_result in test_results: + if test_result.fail: + is_fail = True + + logger.error(test_result.api_name) + logger.error(test_result.msg) + + else: + logger.debug(test_result.api_name) + logger.debug(test_result.msg) + + if is_fail: + logger.error(">>> Mistakes found in type checking!") + logger.error(">>> Please recheck the type annotations.") + log_exit(1) + + logger.warning(">>> Type checking is successful!") + logger.warning("----------------End of the Check--------------------") + + +def parse_args() -> argparse.Namespace: + """ + Parse input arguments + """ + parser = argparse.ArgumentParser( + description='run Sample Code Type Checking' + ) + parser.add_argument('--debug', dest='debug', action="store_true") + parser.add_argument( + '--logf', dest='logf', type=str, default=None, help='file for logging' + ) + parser.add_argument( + '--config-file', + dest='config_file', + type=str, + default=None, + help='config file for type checker', + ) + parser.add_argument( + '--cache-dir', + dest='cache_dir', + type=str, + default=None, + help='cache dir for mypy', + ) + parser.add_argument('--full-test', dest='full_test', action="store_true") + + args = parser.parse_args() + return args + + +def get_test_results( + type_checker: TypeChecker, docstrings_to_test: dict[str, str] +) -> list[TestResult]: + _test_style = ( + type_checker.style + if type_checker.style in {'google', 'freeform'} + else 'google' + ) + google_style = _test_style == 'google' + + api_names = [] + codeblocks = [] + for api_name, raw_docstring in docstrings_to_test.items(): + # we may extract more than one codeblocks from docsting. + for codeblock in extract_code_blocks_from_docstr( + raw_docstring, google_style=google_style + ): + codeblock_name = codeblock['name'] + codeblock_id = codeblock['id'] + + api_names.append(f'{api_name}:{codeblock_name or codeblock_id}') + codeblocks.append(codeblock['codes']) + + test_results = [] + with ProcessPoolExecutor() as exe: + test_results = exe.map( + type_checker.run, api_names, codeblocks, timeout=600 + ) + + return list(test_results) + + +def run_type_checker( + args: argparse.Namespace, type_checker: TypeChecker +) -> None: + # init logger + init_logger(debug=args.debug, log_file=args.logf) + + logger.info( + "----------------Codeblock Type Checking Start--------------------" + ) + + logger.info(">>> Get docstring from api ...") + filter_api = lambda api_name: 'libpaddle' in api_name + docstrings_to_test, whl_error = get_docstring( + full_test=args.full_test, filter_api=filter_api + ) + + logger.info(">>> Running type checker ...") + test_results = get_test_results(type_checker, docstrings_to_test) + + logger.info(">>> Print summary ...") + type_checker.print_summary(test_results, whl_error) + + +if __name__ == '__main__': + base_path = pathlib.Path(__file__).resolve().parent.parent + + args = parse_args() + mypy_checker = MypyChecker( + config_file=( + args.config_file + if args.config_file + else (base_path / 'pyproject.toml') + ), + cache_dir=( + args.cache_dir if args.cache_dir else (base_path / '.mypy_cache') + ), + ) + run_type_checker(args, mypy_checker) diff --git a/tools/windows/build_compile_environment.bat b/tools/windows/build_compile_environment.bat index 884cea8ca4cd0..016e2a4ff25cb 100644 --- a/tools/windows/build_compile_environment.bat +++ b/tools/windows/build_compile_environment.bat @@ -16,7 +16,7 @@ :: Build Paddle compile environment :: =============================== :: Description: -:: +:: :: Install compile environment for xly CI. :: :: Include: @@ -55,7 +55,7 @@ if %errorlevel% == 0 ( ) else ( echo Error***** Download wget tool failed, please download it before rerun. exit /b 1 -) +) goto :eof :: ===== end step 0: wget tool ===== @@ -296,7 +296,7 @@ goto tensorrt echo There is not sccache in this PC, will install sccache. echo Download package from https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe wget -O sccache.exe "https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe" -copy sccache.exe C:\Python38 /Y +copy sccache.exe C:\Python38 /Y goto :eof :: ===== end step 7: sccache on windows ===== diff --git a/tools/windows/check_change_of_unittest.sh b/tools/windows/check_change_of_unittest.sh index 576f0e5d238ab..25073435e3fb2 100644 --- a/tools/windows/check_change_of_unittest.sh +++ b/tools/windows/check_change_of_unittest.sh @@ -19,7 +19,7 @@ GITHUB_API_TOKEN=$GITHUB_API_TOKEN GIT_PR_ID=$AGILE_PULL_ID BRANCH=$BRANCH if [ "${GITHUB_API_TOKEN}" == "" ] || [ "${GIT_PR_ID}" == "" ];then - exit 0 + exit 0 fi unittest_spec_diff=$(cat $PADDLE_ROOT/deleted_ut | sed 's/^/ - /g') diff --git a/tools/windows/run_unittests.sh b/tools/windows/run_unittests.sh index ebca6e41296fd..5d259e101b56d 100644 --- a/tools/windows/run_unittests.sh +++ b/tools/windows/run_unittests.sh @@ -16,15 +16,15 @@ # /*================Fixed Disabled Windows CUDA10.x MKL(PR-CI-Windows) unittests===========================*/ # TODO: fix these unittest that is bound to fail disable_wingpu_test="^test_model$|\ -^test_dataloader_early_reset$|\ -^test_add_reader_dependency$|\ +^test_dataloader_early_reset_deprecated$|\ +^test_add_reader_dependency_deprecated$|\ ^test_add_reader_dependency_for_interpretercore$|\ -^test_decoupled_py_reader$|\ -^test_decoupled_py_reader_static_build$|\ +^test_decoupled_py_reader_deprecated$|\ +^test_decoupled_py_reader_deprecated_static_build$|\ ^test_generator_dataloader_deprecated$|\ ^test_parallel_dygraph_sync_batch_norm$|\ ^test_py_reader_using_executor$|\ -^test_program_prune_backward$|\ +^test_program_prune_backward_deprecated$|\ ^test_decoupled_py_reader_data_check$|\ ^test_fleet_base_single$|\ ^test_multiprocess_dataloader_iterable_dataset_dynamic$|\ @@ -35,11 +35,11 @@ disable_wingpu_test="^test_model$|\ ^test_imperative_se_resnext$|\ ^test_sync_batch_norm_op$|\ ^test_sync_batch_norm_op_static_build$|\ -^test_dataloader_keep_order$|\ -^test_dataloader_unkeep_order$|\ +^test_dataloader_keep_order_deprecated$|\ +^test_dataloader_unkeep_order_deprecated$|\ ^test_multiprocess_dataloader_iterable_dataset_static$|\ -^test_fuse_bn_act_pass$|\ -^test_fuse_bn_act_pass_static_build$|\ +^test_fuse_bn_act_pass_deprecated$|\ +^test_fuse_bn_act_pass_deprecated_static_build$|\ ^test_fuse_bn_add_act_pass$|\ ^test_gather_op$|\ ^test_activation_op$|\ @@ -184,7 +184,7 @@ disable_wingpu_cuda12_test="^test_cholesky_op$|\ ^test_functional_conv3d$|\ ^test_functional_conv3d_transpose$|\ ^test_imperative_layer_children$|\ -^test_inference_api$|\ +^test_inference_api_deprecated$|\ ^test_trans_layout_op$|\ ^test_pool2d_op$|\ ^test_conv3d_transpose_op$|\ @@ -211,7 +211,7 @@ disable_wingpu_cuda12_test="^test_cholesky_op$|\ ^test_callback_visualdl$|\ ^test_callback_wandb$|\ ^test_user_defined_quantization$|\ -^test_quantization_scale_pass$|\ +^test_quantization_scale_pass_deprecated$|\ ^test_quantization_pass$|\ ^test_imperative_qat$|\ ^test_graph$|\ @@ -219,24 +219,24 @@ disable_wingpu_cuda12_test="^test_cholesky_op$|\ ^test_gru_unit_op$|\ ^test_matmul_op$|\ ^test_decoupled_py_reader_data_check$|\ -^test_decoupled_py_reader$|\ +^test_decoupled_py_reader_deprecated$|\ ^test_generator_dataloader_deprecated$|\ ^test_py_reader_combination$|\ ^test_reader_reset$|\ ^test_sync_batch_norm_op$|\ ^test_sync_batch_norm_op_static_build$|\ -^test_decoupled_py_reader_static_build$|\ +^test_decoupled_py_reader_deprecated_static_build$|\ ^test_multiprocess_dataloader_iterable_dataset_dynamic$|\ ^test_multiprocess_dataloader_iterable_dataset_static$|\ -^test_dataloader_keep_order$|\ -^test_dataloader_unkeep_order$|\ -^test_add_reader_dependency$|\ -^test_fuse_bn_act_pass$|\ -^test_fuse_bn_act_pass_static_build$|\ +^test_dataloader_keep_order_deprecated$|\ +^test_dataloader_unkeep_order_deprecated$|\ +^test_add_reader_dependency_deprecated$|\ +^test_fuse_bn_act_pass_deprecated$|\ +^test_fuse_bn_act_pass_deprecated_static_build$|\ ^test_fuse_bn_add_act_pass$|\ ^test_model$|\ -^test_dataloader_early_reset$|\ -^test_add_reader_dependency$|\ +^test_dataloader_early_reset_deprecated$|\ +^test_add_reader_dependency_deprecated$|\ ^test_conv2d_fusion_op$|\ ^test_fused_conv2d_add_act_op$|\ ^test_analyzer_detect_functional_mkldnn$|\ @@ -351,6 +351,7 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\ ^test_custom_relu_op_setup$|\ ^test_conv3d_transpose_part2_op$|\ ^test_deform_conv2d$|\ +^test_deform_conv2d_deprecated$|\ ^test_matmul_op$|\ ^test_matmul_op_static_build$|\ ^test_basic_api_transformation$|\ @@ -370,7 +371,7 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\ ^test_graph_khop_sampler$|\ ^test_gru_rnn_op$|\ ^test_masked_select_op$|\ -^test_ir_fc_fuse_pass$|\ +^test_ir_fc_fuse_pass_deprecated$|\ ^test_fc_elementwise_layernorm_fuse_pass$|\ ^test_linalg_pinv_op$|\ ^test_math_op_patch_var_base$|\ @@ -395,23 +396,23 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\ ^test_py_reader_pin_memory$|\ ^test_multiprocess_dataloader_iterable_dataset_dynamic$|\ ^test_multiprocess_dataloader_iterable_dataset_static$|\ -^test_add_reader_dependency$|\ +^test_add_reader_dependency_deprecated$|\ ^test_add_reader_dependency_for_interpretercore$|\ ^test_compat$|\ -^test_decoupled_py_reader$|\ -^test_decoupled_py_reader_static_build$|\ +^test_decoupled_py_reader_deprecated$|\ +^test_decoupled_py_reader_deprecated_static_build$|\ ^test_generator_dataloader_deprecated$|\ ^test_py_reader_using_executor$|\ -^test_dataloader_keep_order$|\ -^test_dataloader_unkeep_order$|\ +^test_dataloader_keep_order_deprecated$|\ +^test_dataloader_unkeep_order_deprecated$|\ ^test_sync_batch_norm_op$|\ ^test_sync_batch_norm_op_static_build$|\ -^test_fuse_bn_act_pass$|\ -^test_fuse_bn_act_pass_static_build$|\ +^test_fuse_bn_act_pass_deprecated$|\ +^test_fuse_bn_act_pass_deprecated_static_build$|\ ^test_fuse_bn_add_act_pass$|\ ^test_decoupled_py_reader_data_check$|\ ^test_parallel_dygraph_sync_batch_norm$|\ -^test_dataloader_early_reset$|\ +^test_dataloader_early_reset_deprecated$|\ ^test_fleet_base_single$|\ ^test_sequence_pool$|\ ^test_simplify_with_basic_ops_pass_autoscan$|\ @@ -436,7 +437,7 @@ disable_wincpu_test="^jit_kernel_test$|\ ^test_vision_models$|\ ^test_dygraph_multi_forward$|\ ^test_imperative_transformer_sorted_gradient$|\ -^test_program_prune_backward$|\ +^test_program_prune_backward_deprecated$|\ ^test_imperative_resnet$|\ ^test_imperative_resnet_sorted_gradient$|\ ^test_imperative_se_resnext$|\ @@ -464,7 +465,7 @@ long_time_test="^test_gru_op$|\ ^test_cross_op$|\ ^test_elementwise_nn_grad$|\ ^test_fused_elemwise_activation_op$|\ -^test_imperative_lod_tensor_to_selected_rows$|\ +^test_imperative_lod_tensor_to_selected_rows_deprecated$|\ ^test_imperative_selected_rows_to_lod_tensor$|\ ^test_layer_norm_op$|\ ^test_layer_norm_op_static_build$|\ diff --git a/tools/xpu/get_xpti_dependence.sh b/tools/xpu/get_xpti_dependence.sh index 95cc4a110ed6d..6801990933d76 100644 --- a/tools/xpu/get_xpti_dependence.sh +++ b/tools/xpu/get_xpti_dependence.sh @@ -1,13 +1,13 @@ #!/bin/bash # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.