diff --git a/.gitmodules b/.gitmodules index 0c41450793fc2..3cdbc077ba1ec 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,8 +70,8 @@ path = third_party/xbyak url = https://github.com/herumi/xbyak.git ignore = dirty -[submodule "third_party/mkldnn"] - path = third_party/mkldnn +[submodule "third_party/onednn"] + path = third_party/onednn url = https://github.com/oneapi-src/oneDNN.git ignore = dirty [submodule "third_party/flashattn"] @@ -118,3 +118,7 @@ path = third_party/cryptopp-cmake url = https://github.com/noloader/cryptopp-cmake.git ignore = dirty +[submodule "third_party/nlohmann_json"] + path = third_party/nlohmann_json + url = https://github.com/nlohmann/json.git + ignore = dirty diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d1ac6a170243..f1dcbd658cb35 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: hooks: - id: copyright_checker name: copyright_checker - entry: python ./tools/codestyle/copyright.hook + entry: python ./tools/codestyle/copyright.py language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$ exclude: | @@ -57,7 +57,7 @@ repos: - id: black files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.0 + rev: v0.3.5 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix, --no-cache] @@ -67,7 +67,7 @@ repos: - id: clang-format name: clang-format description: Format files with ClangFormat. - entry: bash ./tools/codestyle/clang_format.hook -i + entry: bash ./tools/codestyle/clang_format.sh -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$ - repo: local @@ -75,7 +75,7 @@ repos: - id: cpplint-cpp-source name: cpplint description: Check C++ code style using cpplint.py. - entry: bash ./tools/codestyle/cpplint_pre_commit.hook + entry: bash ./tools/codestyle/cpplint_pre_commit.sh language: system files: \.(cc|cxx|cpp|cu|h|hpp|hxx)$ args: diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md deleted file mode 100644 index 6b2614b101108..0000000000000 --- a/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,14 +0,0 @@ -Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us. -Both Chinese and English issues are welcome. - -It's hard to solve a problem when important details are missing. -Before submitting the issue, look over the following criteria before handing your request in. - -- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github. -- [ ] Did you retrieve your issue from widespread search engines ? -- [ ] Is my description of the issue clear enough to reproduce this problem? - * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc. - * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly. -- [ ] Is my description of the issue use the github markdown correctly? - * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc. - * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown. diff --git a/cmake/cinn.cmake b/cmake/cinn.cmake index 05210fd578365..a8627b6f70fd0 100644 --- a/cmake/cinn.cmake +++ b/cmake/cinn.cmake @@ -59,7 +59,7 @@ if(WITH_MKL) add_dependencies(cinn_mklml ${MKLML_PROJECT}) add_definitions(-DCINN_WITH_MKL_CBLAS) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) add_definitions(-DCINN_WITH_DNNL) endif() @@ -164,6 +164,8 @@ cinn_cc_library( isl ginac pybind + op_fusion + cinn_op_dialect ${jitify_deps}) add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB) add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ${core_deps}) @@ -175,9 +177,9 @@ target_link_libraries(cinnapi ${PYTHON_LIBRARIES}) if(WITH_MKL) target_link_libraries(cinnapi cinn_mklml) add_dependencies(cinnapi cinn_mklml) - if(WITH_MKLDNN) + if(WITH_ONEDNN) target_link_libraries(cinnapi ${MKLDNN_LIB}) - add_dependencies(cinnapi ${MKLDNN_PROJECT}) + add_dependencies(cinnapi ${ONEDNN_PROJECT}) endif() endif() @@ -220,21 +222,25 @@ function(gen_cinncore LINKTYPE) schedule_desc_proto absl isl - ginac) + ginac + pybind + op_fusion + cinn_op_dialect + ${jitify_deps}) add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB) add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ${core_deps}) target_link_libraries(${CINNCORE_TARGET} op_dialect pir phi) add_dependencies(${CINNCORE_TARGET} op_dialect pir phi) - add_dependencies(${CINNCORE_TARGET} pybind) + # add_dependencies(${CINNCORE_TARGET} pybind) target_link_libraries(${CINNCORE_TARGET} ${PYTHON_LIBRARIES}) if(WITH_MKL) target_link_libraries(${CINNCORE_TARGET} cinn_mklml) add_dependencies(${CINNCORE_TARGET} cinn_mklml) - if(WITH_MKLDNN) + if(WITH_ONEDNN) target_link_libraries(${CINNCORE_TARGET} ${MKLDNN_LIB}) - add_dependencies(${CINNCORE_TARGET} ${MKLDNN_PROJECT}) + add_dependencies(${CINNCORE_TARGET} ${ONEDNN_PROJECT}) endif() endif() @@ -247,16 +253,16 @@ function(gen_cinncore LINKTYPE) ${CUBLAS} ${CUDNN} ${CURAND} - ${CUSOLVER} - ${jitify_deps}) + ${CUSOLVER}) + # ${jitify_deps}) if(NVTX_FOUND) target_link_libraries(${CINNCORE_TARGET} ${CUDA_NVTX_LIB}) endif() endif() if(WITH_CUTLASS) - target_link_libraries(cinnapi cutlass) - add_dependencies(cinnapi cutlass) + target_link_libraries(${CINNCORE_TARGET} cutlass) + add_dependencies(${CINNCORE_TARGET} cutlass) endif() endfunction() diff --git a/cmake/external/json.cmake b/cmake/external/json.cmake new file mode 100644 index 0000000000000..b219e60cb9950 --- /dev/null +++ b/cmake/external/json.cmake @@ -0,0 +1,43 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +set(JSON_PREFIX_DIR ${THIRD_PARTY_PATH}/nlohmann_json) +set(JSON_INCLUDE_DIR ${JSON_PREFIX_DIR}/include) + +set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/nlohmann_json) +set(SOURCE_INCLUDE_DIR ${SOURCE_DIR}/include) + +include_directories(${JSON_INCLUDE_DIR}) + +set(JSON_BuildTests + OFF + CACHE INTERNAL "") + +ExternalProject_Add( + extern_json + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + SOURCE_DIR ${SOURCE_DIR} + PREFIX ${JSON_PREFIX_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") + +add_library(json INTERFACE) +#target_include_directories(json PRIVATE ${JSON_INCLUDE_DIR}) +add_dependencies(json extern_json) diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake index 515952eae88cd..c350f79945163 100644 --- a/cmake/external/lite.cmake +++ b/cmake/external/lite.cmake @@ -88,7 +88,7 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF -DLITE_WITH_CUDA=OFF - -DWITH_MKLDNN=OFF + -DWITH_ONEDNN=OFF -DLITE_WITH_X86=OFF -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON -DLITE_WITH_PROFILE=OFF @@ -141,7 +141,7 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON -DLITE_WITH_CUDA=OFF - -DWITH_MKLDNN=OFF + -DWITH_ONEDNN=OFF -DLITE_WITH_X86=ON -DLITE_WITH_PROFILE=OFF -DWITH_LITE=OFF diff --git a/cmake/external/mkldnn.cmake b/cmake/external/onednn.cmake similarity index 68% rename from cmake/external/mkldnn.cmake rename to cmake/external/onednn.cmake index 650a2a4196c86..8b1969f87b5a2 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/onednn.cmake @@ -14,13 +14,13 @@ include(ExternalProject) -set(MKLDNN_PROJECT "extern_mkldnn") -set(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn) -set(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +set(ONEDNN_PROJECT "extern_onednn") +set(ONEDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/onednn) +set(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onednn) set(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" - CACHE PATH "mkldnn include directory." FORCE) -set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/mkldnn) + CACHE PATH "oneDNN include directory." FORCE) +set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/onednn) # Introduce variables: # * CMAKE_INSTALL_LIBDIR @@ -36,28 +36,28 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/${LIBDIR}") include_directories(${MKLDNN_INC_DIR} -)# For MKLDNN code to include internal headers. +)# For oneDNN code to include internal headers. if(NOT WIN32) - set(MKLDNN_FLAG + set(ONEDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds" ) - set(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") - set(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") - set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") - set(MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - set(MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + set(ONEDNN_FLAG "${ONEDNN_FLAG} -Wno-unused-result -Wno-unused-value") + set(ONEDNN_CFLAG "${CMAKE_C_FLAGS} ${ONEDNN_FLAG}") + set(ONEDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${ONEDNN_FLAG}") + set(ONEDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(ONEDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") set(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" - CACHE FILEPATH "mkldnn library." FORCE) + CACHE FILEPATH "oneDNN library." FORCE) else() - set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") - set(MKLDNN_CFLAG "${CMAKE_C_FLAGS}") - string(REPLACE "/O2 " "" MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") - string(REPLACE "/O2 " "" MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(ONEDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") + set(ONEDNN_CFLAG "${CMAKE_C_FLAGS}") + string(REPLACE "/O2 " "" ONEDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + string(REPLACE "/O2 " "" ONEDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") set(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" - CACHE FILEPATH "mkldnn library." FORCE) + CACHE FILEPATH "oneDNN library." FORCE) endif() if(LINUX) @@ -67,21 +67,21 @@ else() endif() ExternalProject_Add( - ${MKLDNN_PROJECT} + ${ONEDNN_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} SOURCE_DIR ${SOURCE_DIR} - DEPENDS ${MKLDNN_DEPENDS} - PREFIX ${MKLDNN_PREFIX_DIR} + DEPENDS ${ONEDNN_DEPENDS} + PREFIX ${ONEDNN_PREFIX_DIR} UPDATE_COMMAND "" #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} - -DCMAKE_CXX_FLAGS_RELEASE=${MKLDNN_CXXFLAG_RELEASE} + -DCMAKE_CXX_FLAGS=${ONEDNN_CXXFLAG} + -DCMAKE_CXX_FLAGS_RELEASE=${ONEDNN_CXXFLAG_RELEASE} -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} + -DCMAKE_C_FLAGS=${ONEDNN_CFLAG} -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${MKLDNN_CFLAG_RELEASE} + -DCMAKE_C_FLAGS_RELEASE=${ONEDNN_CFLAG_RELEASE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON @@ -90,7 +90,7 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} BUILD_BYPRODUCTS ${BUILD_BYPRODUCTS_ARGS}) -message(STATUS "MKLDNN library: ${MKLDNN_LIB}") +message(STATUS "OneDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_DNNL) # copy the real so.0 lib to install dir # it can be directly contained in wheel or capi @@ -123,21 +123,21 @@ if(WIN32) COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_LIB} /machine:x64 COMMENT "Generate mkldnn.lib manually--->" - DEPENDS ${MKLDNN_PROJECT} + DEPENDS ${ONEDNN_PROJECT} VERBATIM) - add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_LIB}) + add_custom_target(onednn_cmd ALL DEPENDS ${MKLDNN_LIB}) else() set(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libdnnl.so.3) add_custom_command( OUTPUT ${MKLDNN_SHARED_LIB} COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} - DEPENDS ${MKLDNN_PROJECT}) - add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB}) + DEPENDS ${ONEDNN_PROJECT}) + add_custom_target(onednn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB}) endif() -# generate a static dummy target to track mkldnn dependencies -# for cc_library(xxx SRCS xxx.c DEPS mkldnn) -generate_dummy_static_lib(LIB_NAME "mkldnn" GENERATOR "mkldnn.cmake") +# generate a static dummy target to track onednn dependencies +# for cc_library(xxx SRCS xxx.c DEPS onednn) +generate_dummy_static_lib(LIB_NAME "onednn" GENERATOR "onednn.cmake") -target_link_libraries(mkldnn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) -add_dependencies(mkldnn ${MKLDNN_PROJECT} mkldnn_cmd) +target_link_libraries(onednn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) +add_dependencies(onednn ${ONEDNN_PROJECT} onednn_cmd) diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake index 072658e54705a..28179cbf1ca20 100644 --- a/cmake/external/rocksdb.cmake +++ b/cmake/external/rocksdb.cmake @@ -14,8 +14,6 @@ include(ExternalProject) -# find_package(jemalloc REQUIRED) - set(ROCKSDB_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/rocksdb) set(ROCKSDB_TAG 6.19.fb) @@ -32,28 +30,10 @@ set(ROCKSDB_INCLUDE_DIR set(ROCKSDB_LIBRARIES "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" CACHE FILEPATH "rocksdb library." FORCE) -set(ROCKSDB_COMMON_FLAGS - "-g -pipe -O2 -W -Wall -Wno-unused-parameter -fPIC -fno-builtin-memcmp -fno-omit-frame-pointer" -) -set(ROCKSDB_FLAGS - "-DNDEBUG -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DOS_LINUX -DROCKSDB_FALLOCATE_PRESENT -DHAVE_PCLMUL -DZLIB -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX -DROCKSDB_BACKTRACE -DROCKSDB_SUPPORT_THREAD_LOCAL -DROCKSDB_USE_RTTI -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_AUXV_GETAUXVAL_PRESENT" -) -set(ROCKSDB_CMAKE_CXX_FLAGS - "${ROCKSDB_COMMON_FLAGS} -DROCKSDB_LIBAIO_PRESENT ${ROCKSDB_FLAGS} -fPIC -I${JEMALLOC_INCLUDE_DIR}" -) -if(NOT WITH_ARM) - set(ROCKSDB_FLAGS "${ROCKSDB_FLAGS} -DHAVE_SSE42") - set(ROCKSDB_CMAKE_CXX_FLAGS - "${ROCKSDB_CMAKE_CXX_FLAGS} -msse -msse4.2 -mpclmul") -endif() -set(ROCKSDB_CMAKE_C_FLAGS - "${ROCKSDB_COMMON_FLAGS} ${ROCKSDB_FLAGS} -DROCKSDB_LIBAIO_PRESENT -fPIC -I${JEMALLOC_INCLUDE_DIR}" -) -include_directories(${ROCKSDB_INCLUDE_DIR}) -set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread") - -set(ROCKSDB_CMAKE_SHARED_LINKER_FLAGS "-ldl -lrt -lz") +set(ROCKSDB_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DROCKSDB_LIBAIO_PRESENT -I${JEMALLOC_INCLUDE_DIR}") +set(ROCKSDB_SHARED_LINKER_FLAGS "-Wl,--no-as-needed -ldl") if(WITH_ARM) file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/rocksdb/libaio.h.patch @@ -62,6 +42,7 @@ if(WITH_ARM) git checkout -- . && git checkout ${ROCKSDB_TAG} && patch -Nd ${PADDLE_SOURCE_DIR}/third_party/rocksdb/env/ < ${native_src}) endif() + ExternalProject_Add( extern_rocksdb ${EXTERNAL_PROJECT_LOG_ARGS} @@ -76,25 +57,23 @@ ExternalProject_Add( -DWITH_GFLAGS=OFF -DWITH_TESTS=OFF -DWITH_JEMALLOC=ON - -DWITH_BENCHMARK_TOOLS=OFF - -DFAIL_ON_WARNINGS=OFF # For Clang compatibility -DJeMalloc_LIBRARIES=${JEMALLOC_LIBRARIES} -DJeMalloc_INCLUDE_DIRS=${JEMALLOC_INCLUDE_DIR} - -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS} - -DCMAKE_C_FLAGS=${ROCKSDB_CMAKE_C_FLAGS} - -DCMAKE_SHARED_LINKER_FLAGS=${ROCKSDB_CMAKE_SHARED_LINKER_FLAGS} - INSTALL_COMMAND - mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp - ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb-build/librocksdb.a - ${ROCKSDB_LIBRARIES} && cp -r ${ROCKSDB_SOURCE_DIR}/include - ${ROCKSDB_INSTALL_DIR}/ + -DWITH_BENCHMARK_TOOLS=OFF + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${ROCKSDB_CXX_FLAGS} + -DCMAKE_SHARED_LINKER_FLAGS=${ROCKSDB_SHARED_LINKER_FLAGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${ROCKSDB_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${ROCKSDB_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES}) +add_dependencies(extern_rocksdb snappy extern_jemalloc) add_library(rocksdb STATIC IMPORTED GLOBAL) - -add_dependencies(extern_rocksdb snappy) -add_dependencies(extern_rocksdb extern_jemalloc) set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES}) +include_directories(${ROCKSDB_INCLUDE_DIR}) add_dependencies(rocksdb extern_rocksdb) list(APPEND external_project_dependencies rocksdb) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 5b8dd6e0ffe59..940e3804559ef 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -28,8 +28,11 @@ set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so") if(NOT DEFINED XPU_BASE_DATE) set(XPU_BASE_DATE "20240104") endif() +if(NOT DEFINED XPU_XDNN_BASE_DATE) + set(XPU_XDNN_BASE_DATE "20240327") +endif() if(NOT DEFINED XPU_XHPC_BASE_DATE) - set(XPU_XHPC_BASE_DATE "20240328") + set(XPU_XHPC_BASE_DATE "20240413") endif() set(XPU_XCCL_BASE_VERSION "1.1.8.1") if(NOT DEFINED XPU_XFT_BASE_VERSION) @@ -45,6 +48,10 @@ else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() +set(XPU_XDNN_BASE_URL + "https://klx-sdk-release-public.su.bcebos.com/xdnn/stable/${XPU_XDNN_BASE_DATE}" +) + set(XPU_XCCL_BASE_URL "https://klx-sdk-release-public.su.bcebos.com/xccl/release/${XPU_XCCL_BASE_VERSION}" ) @@ -105,7 +112,7 @@ set(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) set(XPU_XDNN_URL - "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) set(XPU_XCCL_URL "${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" @@ -229,7 +236,7 @@ if(WITH_XPTI) endif() if(WITH_XPU_XHPC) - target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_XBLAS_LIB} + target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_XBLAS_LIB} ${XPU_API_LIB} ${XPU_XFA_LIB}) endif() diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 23f7ff529fe7a..8279f83369ca8 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -167,6 +167,7 @@ if(NOT WIN32) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(COMMON_FLAGS ${COMMON_FLAGS} + -Wno-error=unknown-warning-option # For some unknown warning options in lower version clang -Wno-error=unused-private-field -Wno-error=unused-const-variable -Wno-error=deprecated-copy-with-user-provided-copy # For three/five/zeros rule, clang @@ -211,6 +212,11 @@ if(NOT WIN32) -Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=array-bounds # Warnings in Eigen::array ) + + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(GPU_COMMON_FLAGS -ccbin=${CMAKE_CXX_COMPILER} ${GPU_COMMON_FLAGS}) + endif() + if(NOT WITH_NV_JETSON AND NOT WITH_ARM AND NOT WITH_SW diff --git a/cmake/generic.cmake b/cmake/generic.cmake index d618c9667de83..4c8819e438a2f 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -598,9 +598,9 @@ function(paddle_test_build TARGET_NAME) ${paddle_test_DEPS} common paddle_gtest_main_new) add_dependencies(${TARGET_NAME} ${paddle_lib} ${paddle_test_DEPS} common paddle_gtest_main_new) - if(WITH_MKLDNN) - target_link_libraries(${TARGET_NAME} mkldnn) - add_dependencies(${TARGET_NAME} mkldnn) + if(WITH_ONEDNN) + target_link_libraries(${TARGET_NAME} onednn) + add_dependencies(${TARGET_NAME} onednn) endif() if(WITH_SHARED_PHI) target_link_libraries(${TARGET_NAME} $) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 3005da8aea125..3b81733d279d7 100755 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -114,8 +114,8 @@ function(copy_part_of_third_party TARGET DST) endif() endif() - if(WITH_MKLDNN) - set(dst_dir "${DST}/third_party/install/mkldnn") + if(WITH_ONEDNN) + set(dst_dir "${DST}/third_party/install/onednn") if(WIN32) copy( ${TARGET} diff --git a/cmake/operators.cmake b/cmake/operators.cmake index c7dfb4ac641d2..f089f6e55b17b 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -102,42 +102,42 @@ function(register_cu_kernel TARGET) endforeach() endfunction() -# Just for those mkldnn kernels locating at "fluid/operators/mkldnn/", such as 'layer_norm_mkldnn_op.cc'. +# Just for those onednn kernels locating at "fluid/operators/onednn/", such as 'layer_norm_onednn_op.cc'. # Add other file modes if need in the future. -function(register_mkldnn_kernel TARGET) +function(register_onednn_kernel TARGET) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(register_mkldnn_kernel "${options}" "${oneValueArgs}" + cmake_parse_arguments(register_onednn_kernel "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - set(mkldnn_cc_srcs) + set(onednn_cc_srcs) set(op_common_deps operator op_registry phi layer common_infer_shape_functions) - foreach(mkldnn_src ${register_mkldnn_kernel_SRCS}) - if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$") - list(APPEND mkldnn_cc_srcs mkldnn/${mkldnn_src}) + foreach(onednn_src ${register_onednn_kernel_SRCS}) + if(${onednn_src} MATCHES ".*_onednn_op.cc$") + list(APPEND onednn_cc_srcs onednn/${onednn_src}) endif() endforeach() - list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) - if(${mkldnn_cc_srcs_len} EQUAL 0) + list(LENGTH onednn_cc_srcs onednn_cc_srcs_len) + if(${onednn_cc_srcs_len} EQUAL 0) message( FATAL_ERROR - "The MKLDNN kernel file of ${TARGET} should contains at least one *.*_mkldnn_op.cc file" + "The MKLDNN kernel file of ${TARGET} should contains at least one *.*_onednn_op.cc file" ) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) cc_library( ${TARGET} - SRCS ${mkldnn_cc_srcs} + SRCS ${onednn_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs") - foreach(mkldnn_src ${mkldnn_cc_srcs}) + foreach(onednn_src ${onednn_cc_srcs}) set(op_name "") - find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name) + find_register(${onednn_src} "REGISTER_OP_KERNEL" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n") endif() @@ -161,7 +161,7 @@ function(op_library TARGET) set(miopen_cu_srcs) set(CUDNN_FILE) set(MIOPEN_FILE) - set(mkldnn_cc_srcs) + set(onednn_cc_srcs) set(MKLDNN_FILE) set(op_common_deps operator op_registry phi layer common_infer_shape_functions) @@ -237,10 +237,10 @@ function(op_library TARGET) list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu) endif() endif() - if(WITH_MKLDNN) - string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc) - list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc) + if(WITH_ONEDNN) + string(REPLACE "_op" "_onednn_op" MKLDNN_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/onednn/${MKLDNN_FILE}.cc) + list(APPEND onednn_cc_srcs onednn/${MKLDNN_FILE}.cc) endif() endif() if(WITH_XPU) @@ -275,8 +275,8 @@ function(op_library TARGET) list(APPEND cudnn_cu_cc_srcs ${src}) elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$") list(APPEND cu_cc_srcs ${src}) - elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") - list(APPEND mkldnn_cc_srcs ${src}) + elseif(WITH_ONEDNN AND ${src} MATCHES ".*_onednn_op.cc$") + list(APPEND onednn_cc_srcs ${src}) elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$") list(APPEND xpu_cc_srcs ${src}) elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$") @@ -349,7 +349,7 @@ function(op_library TARGET) if(WITH_UNITY_BUILD AND op_library_UNITY) # Combine the cc and cu source files. compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs} - ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs}) + ${cudnn_cu_cc_srcs} ${onednn_cc_srcs}) compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs} ${cu_srcs}) if(TARGET ${UNITY_TARGET}) @@ -369,7 +369,7 @@ function(op_library TARGET) nv_library( ${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} - ${mkldnn_cc_srcs} ${cu_srcs} + ${onednn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() elseif(WITH_ROCM) @@ -389,19 +389,19 @@ function(op_library TARGET) hip_library( ${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} - ${mkldnn_cc_srcs} ${hip_srcs} + ${onednn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) elseif(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0) xpu_library( ${TARGET} - SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} + SRCS ${cc_srcs} ${onednn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) else() # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. if(WITH_UNITY_BUILD AND op_library_UNITY) # Combine the cc source files. compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} - ${mkldnn_cc_srcs} ${xpu_cc_srcs}) + ${onednn_cc_srcs} ${xpu_cc_srcs}) if(TARGET ${UNITY_TARGET}) # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}) @@ -417,7 +417,7 @@ function(op_library TARGET) else() cc_library( ${TARGET} - SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} + SRCS ${cc_srcs} ${onednn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() endif() @@ -426,7 +426,7 @@ function(op_library TARGET) list(LENGTH hip_srcs hip_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH hip_cc_srcs hip_cc_srcs_len) - list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) + list(LENGTH onednn_cc_srcs onednn_cc_srcs_len) list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len) @@ -463,7 +463,7 @@ function(op_library TARGET) find_register(${cc_src} "REGISTER_OPERATOR" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") - # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn + # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in onednn set(TARGET ${op_name}) set(pybind_flag 1) endif() @@ -474,7 +474,7 @@ function(op_library TARGET) find_register(${cc_src} "REGISTER_ACTIVATION_OP" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") - # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn + # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in onednn set(TARGET ${op_name}) set(pybind_flag 1) endif() @@ -483,7 +483,7 @@ function(op_library TARGET) find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") - # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn + # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in onednn set(TARGET ${op_name}) set(pybind_flag 1) endif() @@ -496,8 +496,8 @@ function(op_library TARGET) # why change TARGET here? # when building paddle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py) # in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add - # and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h - # however, grad_add has no mkldnn kernel. + # and, in the following "onednn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h + # however, grad_add has no onednn kernel. set(TARGET ${op_name}) set(pybind_flag 1) endif() @@ -520,16 +520,16 @@ function(op_library TARGET) endif() endforeach() - # pybind USE_OP_DEVICE_KERNEL for operators/mkldnn/* - list(APPEND mkldnn_srcs ${mkldnn_cc_srcs}) - foreach(mkldnn_src ${mkldnn_srcs}) + # pybind USE_OP_DEVICE_KERNEL for operators/onednn/* + list(APPEND onednn_srcs ${onednn_cc_srcs}) + foreach(onednn_src ${onednn_srcs}) set(op_name "") # Add PHI Kernel Registry Message - find_phi_register(${mkldnn_src} ${pybind_file} "PD_REGISTER_KERNEL") - find_phi_register(${mkldnn_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL") - find_phi_register(${mkldnn_src} ${pybind_file} + find_phi_register(${onednn_src} ${pybind_file} "PD_REGISTER_KERNEL") + find_phi_register(${onednn_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL") + find_phi_register(${onednn_src} ${pybind_file} "PD_REGISTER_KERNEL_FOR_ALL_DTYPE") - find_register(${mkldnn_src} "REGISTER_OP_CUDA_KERNEL" op_name) + find_register(${onednn_src} "REGISTER_OP_CUDA_KERNEL" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") set(pybind_flag 1) @@ -610,14 +610,14 @@ function(op_library TARGET) endif() # pybind USE_OP_DEVICE_KERNEL for MKLDNN - if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) + if(WITH_ONEDNN AND ${onednn_cc_srcs_len} GREATER 0) # Append first implemented MKLDNN activation operator - if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op") + if(${MKLDNN_FILE} STREQUAL "activation_onednn_op") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n") else() - foreach(mkldnn_src ${mkldnn_cc_srcs}) + foreach(onednn_src ${onednn_cc_srcs}) set(op_name "") - find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name) + find_register(${onednn_src} "REGISTER_OP_KERNEL" op_name) if(NOT ${op_name} EQUAL "") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n") @@ -666,7 +666,7 @@ function(register_operators) GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") - string(REPLACE "_mkldnn" "" OPS "${OPS}") + string(REPLACE "_onednn" "" OPS "${OPS}") string(REPLACE "_xpu" "" OPS "${OPS}") string(REPLACE ".cc" "" OPS "${OPS}") list(REMOVE_DUPLICATES OPS) diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 676a25118303c..a305ef4759500 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -11,12 +11,17 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") set(AVX_FLAG "-mavx") set(AVX2_FLAG "-mavx2") set(AVX512F_FLAG "-mavx512f") + set(Wno_Maybe_Uninitialized "-Wno-maybe-uninitialized") + set(FMA_FLAG "-mfma") elseif(MSVC) set(MMX_FLAG "/arch:MMX") set(SSE2_FLAG "/arch:SSE2") set(SSE3_FLAG "/arch:SSE3") set(AVX_FLAG "/arch:AVX") set(AVX2_FLAG "/arch:AVX2") + set(AVX512F_FLAG "/arch:AVX512") + set(Wno_Maybe_Uninitialized "/wd4701") + set(FMA_FLAG "/arch:AVX2") endif() set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 9839f32f83c2b..e90a1c860eb31 100755 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -254,19 +254,19 @@ if(WIN32 OR APPLE) endif() set(WITH_MKLML ${WITH_MKL}) -if(NOT DEFINED WITH_MKLDNN) +if(NOT DEFINED WITH_ONEDNN) if(WITH_MKL AND AVX2_FOUND) - set(WITH_MKLDNN ON) + set(WITH_ONEDNN ON) else() message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN.") - set(WITH_MKLDNN OFF) + set(WITH_ONEDNN OFF) endif() endif() if(WIN32) if(MSVC) if(MSVC_VERSION LESS 1920) - set(WITH_MKLDNN OFF) + set(WITH_ONEDNN OFF) endif() endif() endif() @@ -303,7 +303,7 @@ if(WITH_CINN) if(WITH_MKL) add_definitions(-DCINN_WITH_MKL_CBLAS) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_definitions(-DCINN_WITH_DNNL) endif() include(cmake/cinn/version.cmake) @@ -362,9 +362,9 @@ elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) list(APPEND third_party_deps extern_openblas) endif() -if(WITH_MKLDNN) - include(external/mkldnn) # download, build, install mkldnn - list(APPEND third_party_deps extern_mkldnn) +if(WITH_ONEDNN) + include(external/onednn) # download, build, install onednn + list(APPEND third_party_deps extern_onednn) endif() include(external/protobuf) # find first, then download, build, install protobuf @@ -372,6 +372,11 @@ if(TARGET extern_protobuf) list(APPEND third_party_deps extern_protobuf) endif() +include(external/json) # find first, then build json +if(TARGET extern_json) + list(APPEND third_party_deps extern_json) +endif() + if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) include(external/python) # find python and python_module include(external/pybind11) # prepare submodule pybind11 diff --git a/cmake/version.cmake b/cmake/version.cmake index 28f022e0afa0e..185418127fdf4 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -96,7 +96,7 @@ function(version version_file) "Paddle version: ${PADDLE_VERSION}\n" "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "WITH_MKL: ${WITH_MKL}\n" - "WITH_MKLDNN: ${WITH_MKLDNN}\n" + "WITH_ONEDNN: ${WITH_ONEDNN}\n" "WITH_GPU: ${WITH_GPU}\n" "WITH_ROCM: ${WITH_ROCM}\n" "WITH_IPU: ${WITH_IPU}\n") diff --git a/paddle/cinn/CMakeLists.txt b/paddle/cinn/CMakeLists.txt index 8369b4d8bf8e7..487f7167212fe 100644 --- a/paddle/cinn/CMakeLists.txt +++ b/paddle/cinn/CMakeLists.txt @@ -17,6 +17,7 @@ add_subdirectory(optim) add_subdirectory(hlir) add_subdirectory(pybind) add_subdirectory(frontend) +add_subdirectory(operator_fusion) # Download a model download_and_uncompress("${DOWNLOAD_MODEL_DIR}" "${PADDLE_RESOURCE_URL}" diff --git a/paddle/cinn/api/op_topo_pattern.h b/paddle/cinn/api/op_topo_pattern.h deleted file mode 100644 index 34f17fbfde9e0..0000000000000 --- a/paddle/cinn/api/op_topo_pattern.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -namespace cinn::api { - -template -struct ErrorPattern {}; - -// ElementWise/Broadcast/Injective Ops without reduction ancestors. -template -struct InjectiveSourcePattern {}; - -// Reduce op -template -struct SingleReductionOpPattern {}; - -// ElementWise/Broadcast ops which have shardable dimentions and reduction -// ancestors. -template -struct PartialShardablePattern {}; - -// Reduce base pattern -template -struct ReductionPattern { - using Nothing = std::monostate; - std::variant, PartialShardablePattern> - input; - SingleReductionOpPattern reduce_op_pattern; - - bool HasFusedInput() const { - return !std::holds_alternative(this->input); - } -}; - -// Stmt := IS | R | PS -// ops in StmtPattern will be lowered into a inlined cuda code. -template -using StmtPattern = std::variant, - ReductionPattern, - PartialShardablePattern>; - -// Stmts := [Stmt] -template -using StmtPatternVec = std::vector>; -// fuse rules: -// 1. IS * IS -> IS -// 2. PS * PS -> PS -// 3. IS * PS -> PS -// 4. IS * R -> R -// 5. PS * R -> R -// lifting rules: -// 1. R -> Stmts -// 2. PS -> Stmts -// 3. Stmts * Stmts -> Stmts -// OpTopoPattern := Error | Stmts - -template -using OpTopoPattern = std::variant, StmtPatternVec>; - -} // namespace cinn::api diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc index 45923624945d0..89cfd3f7d462f 100644 --- a/paddle/cinn/ast_gen_ius/ast_gen.cc +++ b/paddle/cinn/ast_gen_ius/ast_gen.cc @@ -68,8 +68,11 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { const std::vector& axis = tensor->axis(); const std::vector& shape = tensor->shape; size_t axis_len = axis.size(); - CHECK_EQ(shape.size(), axis_len) << "Internal Error: Tensor has different " - "shape and axis length in AstGen"; + PADDLE_ENFORCE_EQ( + shape.size(), + axis_len, + phi::errors::InvalidArgument("Internal Error: Tensor has different " + "shape and axis length in AstGen")); std::vector axis_exprs; for (const auto& a : axis) { axis_exprs.push_back(a); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc index 8b99fd6e61e22..cc14fc369d94d 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc @@ -434,9 +434,9 @@ void MultiLevelTiling::ApplyCacheWrite(ir::IRSchedule* ir_schedule, } } -const std::unordered_map +const std::unordered_map MultiLevelTiling::kConfigs{ - {cinn::common::Target::Arch::NVGPU, + {cinn::common::NVGPUArch{}, MultiLevelTiling::Config{ /*bind_axis*/ std::vector{"blockIdx.x", "threadIdx.x"}, @@ -446,7 +446,7 @@ const std::unordered_map /*write_cache_memory_type*/ std::string("local"), /*write_cache_levels*/ std::vector{3}, }}, - {cinn::common::Target::Arch::X86, + {cinn::common::X86Arch{}, MultiLevelTiling::Config{ /*bind_axis*/ std::vector{}, /*tile_struct*/ std::string("SSRSRS"), diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h index 617cc24998bbb..1bbc8da4497d6 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h @@ -53,7 +53,7 @@ class MultiLevelTiling : public AutoGenRule { std::vector write_cache_levels; }; - static const std::unordered_map kConfigs; + static const std::unordered_map kConfigs; MultiLevelTiling(const cinn::common::Target& target, const Config& config); ~MultiLevelTiling() = default; diff --git a/paddle/cinn/auto_schedule/task/tune_task_test.cc b/paddle/cinn/auto_schedule/task/tune_task_test.cc index 733197b0a6f97..2a4ce9e46fdd8 100644 --- a/paddle/cinn/auto_schedule/task/tune_task_test.cc +++ b/paddle/cinn/auto_schedule/task/tune_task_test.cc @@ -301,7 +301,7 @@ TEST(TuneTask, SerializeToString) { } #ifdef CINN_WITH_CUDA - std::string single_add_str = R"ROC(Target + std::string single_add_str = R"ROC(Target Group { (var_1->float32[32,24]) = elementwise_add(A->float32[32,24], B->float32[32,24]) @@ -324,7 +324,7 @@ Group { fused_tasks[0].Initialize(shape_dict, dtype_dict, &op_lowerer); #ifdef CINN_WITH_CUDA - std::string fused_expected_str = R"ROC(Target + std::string fused_expected_str = R"ROC(Target Group { (var_1->float32[32,24]) = elementwise_add(A->float32[32,24], B->float32[32,24]) @@ -332,7 +332,7 @@ Group { } )ROC"; #else - std::string fused_expected_str = R"ROC(Target + std::string fused_expected_str = R"ROC(Target Group { (var_1->float32[32,24]) = elementwise_add(A->float32[32,24], B->float32[32,24]) diff --git a/paddle/cinn/backends/codegen_c.cc b/paddle/cinn/backends/codegen_c.cc index c585aa843a432..85443b02c0a8c 100644 --- a/paddle/cinn/backends/codegen_c.cc +++ b/paddle/cinn/backends/codegen_c.cc @@ -434,31 +434,37 @@ void CodeGenC::Visit(const ir::_Module_ *op) { CINN_NOT_IMPLEMENTED } void CodeGenC::Visit(const ir::_Var_ *op) { str_ += op->name; } void CodeGenC::Visit(const ir::Load *op) { - ir::Expr op_index = op->index(); - Expr dense_strided_ramp = detail::StridedRampBase(op_index, 1); + ir::Expr offset = [&] { + if (load_to_offset_.count(op) == 0) { + load_to_offset_[op] = op->index(); + } + return load_to_offset_.at(op); + }(); + + Expr dense_strided_ramp = detail::StridedRampBase(offset, 1); if (dense_strided_ramp.defined()) { // Loading a continuous Ramp address. CHECK(op->type().is_vector()); - PrintStackVecType(op->type().ElementOf(), op_index.type().lanes()); + PrintStackVecType(op->type().ElementOf(), offset.type().lanes()); str_ += "::"; str_ += "Load("; str_ += op->tensor.As()->name; str_ += ","; IrPrinter::Visit(dense_strided_ramp); str_ += ")"; - } else if (op_index.type().is_vector()) { + } else if (offset.type().is_vector()) { // gather CHECK(op->type().is_vector()); - PrintStackVecType(op->type().ElementOf(), op_index.type().lanes()); + PrintStackVecType(op->type().ElementOf(), offset.type().lanes()); str_ += "::Load("; str_ += op->tensor.As()->name; str_ += ","; - IrPrinter::Visit(op_index); + IrPrinter::Visit(offset); str_ += ")"; } else if (op->is_addr_tensor()) { auto *tensor = op->tensor.As(); str_ += tensor->name; str_ += "["; - IrPrinter::Visit(op_index); + IrPrinter::Visit(offset); str_ += "]"; } else { IrPrinter::Visit(op); @@ -467,12 +473,17 @@ void CodeGenC::Visit(const ir::Load *op) { void CodeGenC::Visit(const ir::Store *op) { CHECK(op->is_addr_tensor()); - + ir::Expr offset = [&] { + if (store_to_offset_.count(op) == 0) { + store_to_offset_[op] = op->index(); + } + return store_to_offset_.at(op); + }(); auto *tensor = op->tensor.As(); CHECK(tensor); str_ += tensor->name; str_ += "["; - IrPrinter::Visit(op->index()); + IrPrinter::Visit(offset); str_ += "]"; str_ += " = "; IrPrinter::Visit(op->value); diff --git a/paddle/cinn/backends/codegen_c.h b/paddle/cinn/backends/codegen_c.h index c50c85741ce56..2904bef80beea 100644 --- a/paddle/cinn/backends/codegen_c.h +++ b/paddle/cinn/backends/codegen_c.h @@ -118,6 +118,8 @@ class CodeGenC : public ir::IrPrinter { Target target_; std::stringstream ss_; bool inline_builtin_codes_{true}; + std::unordered_map store_to_offset_; + std::unordered_map load_to_offset_; }; namespace detail { diff --git a/paddle/cinn/backends/codegen_c_test.cc b/paddle/cinn/backends/codegen_c_test.cc index 61adad6ade461..b0eb626210736 100644 --- a/paddle/cinn/backends/codegen_c_test.cc +++ b/paddle/cinn/backends/codegen_c_test.cc @@ -61,7 +61,7 @@ TEST(CodeGenC, module) { LOG(INFO) << "C.body: " << C->get_compute_op()->body.front(); Target target; - target.arch = Target::Arch::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit::k32; target.os = Target::OS::Linux; Module::Builder builder("module1", target); diff --git a/paddle/cinn/backends/codegen_c_x86_test.cc b/paddle/cinn/backends/codegen_c_x86_test.cc index 9e1821f7b0200..75d9d978dd960 100644 --- a/paddle/cinn/backends/codegen_c_x86_test.cc +++ b/paddle/cinn/backends/codegen_c_x86_test.cc @@ -41,7 +41,7 @@ TEST(CodeGenCX86, basic) { const int bn = 32; Target target; - target.arch = Target::Arch ::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit ::k32; target.os = Target::OS ::Linux; diff --git a/paddle/cinn/backends/codegen_cuda_dev.cc b/paddle/cinn/backends/codegen_cuda_dev.cc index 6b6597b2e208c..9c19c6faffb73 100644 --- a/paddle/cinn/backends/codegen_cuda_dev.cc +++ b/paddle/cinn/backends/codegen_cuda_dev.cc @@ -26,6 +26,7 @@ #include "paddle/cinn/ir/op/ir_operators.h" #include "paddle/cinn/ir/utils/ir_verify.h" #include "paddle/cinn/optim/ir_simplify.h" +#include "paddle/common/errors.h" namespace cinn { namespace backends { @@ -509,5 +510,36 @@ void CodeGenCUDA_Dev::Visit(const ir::Store *op) { } } +ir::Expr CalculateSharedMemory(const ir::Buffer &buffer) { + Expr buffer_size(1); + for (int i = 0; i < buffer->shape.size(); i++) { + buffer_size = buffer_size * buffer->shape[i]; + } + int type_bytes = buffer->dtype.bytes(); + return buffer_size * Expr(type_bytes); +} + +ir::Expr CalculateSharedMemory(const ir::Expr &func_expr) { + auto func = func_expr.as_lowered_func(); + PADDLE_ENFORCE_NOT_NULL( + func, ::common::errors::InvalidType("expr is not a lowered_func")); + auto alloc_temp_buffers = func->PrepareAllocTempBufferExprs(); + ir::Expr shm_size{0}; + for (const auto &alloc : alloc_temp_buffers) { + PADDLE_ENFORCE_NOT_NULL( + alloc.As(), + ::common::errors::InvalidType("expr is not a Alloc node")); + PADDLE_ENFORCE_NOT_NULL( + alloc.As()->destination.as_buffer(), + ::common::errors::InvalidType("expr is not a Buffer node")); + + auto buffer = alloc.As()->destination.as_buffer_ref(); + if (buffer->memory_type == ir::MemoryType::GPUShared) { + shm_size = shm_size + CalculateSharedMemory(buffer); + } + } + return common::AutoSimplify(shm_size); +} + } // namespace backends } // namespace cinn diff --git a/paddle/cinn/backends/codegen_cuda_dev.h b/paddle/cinn/backends/codegen_cuda_dev.h index d1ebfd930f92f..d0995fccc0e06 100644 --- a/paddle/cinn/backends/codegen_cuda_dev.h +++ b/paddle/cinn/backends/codegen_cuda_dev.h @@ -127,5 +127,7 @@ class CodeGenCUDA_Dev : public CodeGenC { std::vector dynamic_alloc_buffers_; }; +ir::Expr CalculateSharedMemory(const ir::Expr& func_expr); + } // namespace backends } // namespace cinn diff --git a/paddle/cinn/backends/codegen_cuda_util.cc b/paddle/cinn/backends/codegen_cuda_util.cc index 1c8d535507cb7..729dcca7be745 100644 --- a/paddle/cinn/backends/codegen_cuda_util.cc +++ b/paddle/cinn/backends/codegen_cuda_util.cc @@ -91,12 +91,7 @@ void detail::CollectBucketStrategyHostFunctionVisitor::ProcessLoweredFunc( ir::Var kernel_ptr(GenDeviceKernelName(func_node->name, predicate), type_of()); - // shared_mem_bytes Can be calculated after codegen_cuda_dev buffer creation - // however, this make CodeGenCUDA_Dev before spliting the host and device - // module Maybe we could reorder the process. - CodeGenCUDA_Dev codegen_dev(cinn::common::DefaultNVGPUTarget()); - codegen_dev.Compile(ir::LoweredFunc(func.as_lowered_func_ref())); - Expr shared_mem_bytes = codegen_dev.GetDynSharedMemOffset(); + Expr shared_mem_bytes = CalculateSharedMemory(func); VLOG(6) << "Add a call node for func_node->name " << func_node->name << "\n" << "grid_dim: (" << func_node->cuda_axis_info.grid_dim(0) << ", " diff --git a/paddle/cinn/backends/compiler.cc b/paddle/cinn/backends/compiler.cc index f63869730a11f..b37090a74fbe1 100644 --- a/paddle/cinn/backends/compiler.cc +++ b/paddle/cinn/backends/compiler.cc @@ -30,6 +30,7 @@ #include "paddle/cinn/runtime/cuda/cuda_util.h" #include "paddle/cinn/runtime/flags.h" #endif +#include "paddle/cinn/adt/adt.h" PD_DECLARE_string(cinn_source_code_save_path); PD_DECLARE_string(cinn_dump_group_lowered_func); @@ -229,41 +230,41 @@ void SourceCodePrint::write(const std::string& source_code) { } void Compiler::Build(const Module& module, const std::string& code) { - if (target_.arch == Target::Arch::NVGPU) { - CompileCudaModule(module, code); - } else if (target_.arch == Target::Arch::X86) { - CompileX86Module(module); - } else { - CINN_NOT_IMPLEMENTED - } + auto PatternMatch = + adt::match{[&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { CompileX86Module(module); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { CompileCudaModule(module, code); }}; + return std::visit(PatternMatch, target_.arch.variant()); } std::string Compiler::GetSourceCode(const ir::Module& module) { - if (target_.arch == Target::Arch::NVGPU) { + return target_.arch.Visit(adt::match{ + [&](common::UnknownArch) -> std::string { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) -> std::string { CINN_NOT_IMPLEMENTED; }, + [&](common::ARMArch) -> std::string { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) -> std::string { #ifdef CINN_WITH_CUDA - auto _host_module_device_module_ = - SplitCudaAndHostModule(module); // NOLINT - auto& host_module = std::get<0>(_host_module_device_module_); - auto& device_module = std::get<1>(_host_module_device_module_); - CodeGenCUDA_Dev codegen(target_); - auto source_code = codegen.Compile(device_module); - return source_code; + auto _host_module_device_module_ = + SplitCudaAndHostModule(module); // NOLINT + auto& host_module = std::get<0>(_host_module_device_module_); + auto& device_module = std::get<1>(_host_module_device_module_); + CodeGenCUDA_Dev codegen(target_); + auto source_code = codegen.Compile(device_module); + return source_code; #else - CINN_NOT_IMPLEMENTED + CINN_NOT_IMPLEMENTED #endif - } else { - CINN_NOT_IMPLEMENTED - } + }}); } void Compiler::BuildDefault(const Module& module) { - if (target_.arch == Target::Arch::NVGPU) { - CompileCudaModule(module); - } else if (target_.arch == Target::Arch::X86) { - CompileX86Module(module); - } else { - CINN_NOT_IMPLEMENTED - } + target_.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { CompileX86Module(module); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { CompileCudaModule(module); }, + }); } void Compiler::CompileCudaModule(const Module& module, diff --git a/paddle/cinn/backends/extern_func_jit_register.h b/paddle/cinn/backends/extern_func_jit_register.h index 383f8b3565a4e..4784187c8eddd 100644 --- a/paddle/cinn/backends/extern_func_jit_register.h +++ b/paddle/cinn/backends/extern_func_jit_register.h @@ -93,15 +93,12 @@ namespace cinn { namespace backends { static const char* TargetToBackendRepr(Target target) { - switch (target.arch) { - case Target::Arch::X86: - return backend_llvm_host; - case Target::Arch::NVGPU: - return backend_nvgpu; - default: - CINN_NOT_IMPLEMENTED - } - return nullptr; + return target.arch.Visit(adt::match{ + [&](common::UnknownArch) -> const char* { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) -> const char* { return backend_llvm_host; }, + [&](common::ARMArch) -> const char* { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) -> const char* { return backend_nvgpu; }, + }); } /** diff --git a/paddle/cinn/backends/llvm/codegen_llvm.cc b/paddle/cinn/backends/llvm/codegen_llvm.cc index e24b5220919cb..2f8a387045bf6 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm.cc @@ -1366,32 +1366,40 @@ llvm::Value *CodeGenLLVM::CreateVecSlice(llvm::Value *vec, vec, undef, llvm::ConstantVector::get(indices)); } +int GetNaiveVecAlignmentImpl(common::UnknownArch, const Target &target) { + PADDLE_THROW(phi::errors::InvalidArgument("unknown Arch found")); +} + +int GetNaiveVecAlignmentImpl(common::X86Arch, const Target &target) { + if (target.bits == Target::Bit::k32) { + return 256; + } else if (target.bits == Target::Bit::k64) { + return 512; + } + PADDLE_THROW(phi::errors::InvalidArgument("get unknown bits")); +} + +int GetNaiveVecAlignmentImpl(common::ARMArch, const Target &target) { + return 128; +} + +int GetNaiveVecAlignmentImpl(common::NVGPUArch, const Target &target) { + return 128; +} + +int GetNaiveVecAlignment(const Target &target) { + return std::visit( + [&](const auto &impl) { return GetNaiveVecAlignmentImpl(impl, target); }, + target.arch.variant()); +} + void CodeGenLLVM::InitTarget(const Target &target) { llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargets(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllAsmParsers(); llvm::InitializeAllAsmPrinters(); - switch (target.arch) { - case Target::Arch::X86: - if (target.bits == Target::Bit::k32) { - naive_vec_alignment_ = 256; - } else if (target.bits == Target::Bit::k64) { - naive_vec_alignment_ = 512; - } else { - PADDLE_THROW(phi::errors::InvalidArgument("get unknown bits")); - } - break; - case Target::Arch::ARM: - naive_vec_alignment_ = 128; - break; - case Target::Arch::NVGPU: - naive_vec_alignment_ = 128; - break; - case Target::Arch::Unk: - PADDLE_THROW(phi::errors::InvalidArgument("unknown Arch found")); - break; - } + naive_vec_alignment_ = GetNaiveVecAlignment(target); } bool LLVM_WillVarLowerAsPointer(const std::string &var_name) { diff --git a/paddle/cinn/backends/llvm/execution_engine_test.cc b/paddle/cinn/backends/llvm/execution_engine_test.cc index a66b63248a50d..a13f329a81259 100644 --- a/paddle/cinn/backends/llvm/execution_engine_test.cc +++ b/paddle/cinn/backends/llvm/execution_engine_test.cc @@ -108,7 +108,7 @@ auto CreateTestCinnModule() { C->Bind(C_buf); cinn::common::Target target; - target.arch = cinn::common::Target::Arch::X86; + target.arch = cinn::common::X86Arch{}; target.bits = cinn::common::Target::Bit::k32; target.os = cinn::common::Target::OS::Linux; ir::Module::Builder builder("module1", target); diff --git a/paddle/cinn/common/CMakeLists.txt b/paddle/cinn/common/CMakeLists.txt index 95227b6f414a4..a8b72866dc1f5 100644 --- a/paddle/cinn/common/CMakeLists.txt +++ b/paddle/cinn/common/CMakeLists.txt @@ -7,6 +7,7 @@ gather_srcs( cinn_value.cc type.cc target.cc + arch_util.cc object.cc debug_manager.cc info_registry.cc diff --git a/paddle/cinn/common/arch.h b/paddle/cinn/common/arch.h new file mode 100644 index 0000000000000..e43dbeadc97ab --- /dev/null +++ b/paddle/cinn/common/arch.h @@ -0,0 +1,71 @@ +// Copyright (c) 2021 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace cinn { +namespace common { + +struct UnknownArch {}; + +struct X86Arch {}; + +struct ARMArch {}; + +struct NVGPUArch {}; + +/** + * The architecture used by the target. Determines the instruction set to use. + */ +using ArchBase = std::variant; +struct Arch final : public ArchBase { + using ArchBase::ArchBase; + + template + decltype(auto) Visit(VisitorT&& visitor) const { + return std::visit(visitor, variant()); + } + + const ArchBase& variant() const { + return static_cast(*this); + } + + bool operator==(const auto& other) const { + return this->index() == other.index(); + } + + bool operator!=(const auto& other) const { return !(*this == other); } +}; + +inline bool IsDefined(Arch arch) { + return !std::holds_alternative(arch); +} + +} // namespace common +} // namespace cinn + +namespace std { + +template <> +struct hash<::cinn::common::Arch> { + std::size_t operator()(const ::cinn::common::Arch& arch) const { + return arch.index(); + } +}; + +} // namespace std diff --git a/paddle/cinn/common/arch_util.cc b/paddle/cinn/common/arch_util.cc new file mode 100644 index 0000000000000..4f67fff471b6e --- /dev/null +++ b/paddle/cinn/common/arch_util.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2021 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/common/arch_util.h" + +namespace cinn { +namespace common { + +std::string GetArchNameImpl(UnknownArch arch) { return "Unk"; } + +std::string GetArchNameImpl(X86Arch arch) { return "X86"; } + +std::string GetArchNameImpl(ARMArch arch) { return "ARM"; } + +std::string GetArchNameImpl(NVGPUArch arch) { return "NVGPU"; } + +std::string GetArchName(Arch arch) { + return std::visit([](const auto& impl) { return GetArchNameImpl(impl); }, + arch.variant()); +} + +std::ostream& operator<<(std::ostream& os, Arch arch) { + os << GetArchName(arch); + return os; +} + +} // namespace common +} // namespace cinn diff --git a/paddle/cinn/common/arch_util.h b/paddle/cinn/common/arch_util.h new file mode 100644 index 0000000000000..6f2f2adc9700b --- /dev/null +++ b/paddle/cinn/common/arch_util.h @@ -0,0 +1,31 @@ +// Copyright (c) 2021 CINN Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/cinn/common/arch.h" + +namespace cinn { +namespace common { + +std::string GetArchName(Arch arch); +std::ostream& operator<<(std::ostream& os, Arch arch); + +} // namespace common +} // namespace cinn diff --git a/paddle/cinn/common/dev_info_manager.h b/paddle/cinn/common/dev_info_manager.h index 0f9989f7c67e4..c9a1a9040950c 100644 --- a/paddle/cinn/common/dev_info_manager.h +++ b/paddle/cinn/common/dev_info_manager.h @@ -24,7 +24,7 @@ namespace cinn { namespace common { -template +template struct GetDevType { using DevType = DevInfoBase; }; @@ -32,11 +32,11 @@ struct GetDevType { // Extra device should be added here class NVGPUDevInfo; template <> -struct GetDevType { +struct GetDevType { using DevType = NVGPUDevInfo; }; -template +template class DevInfoMgr final { private: explicit DevInfoMgr(int device_num = 0) : device_num_(device_num) { diff --git a/paddle/cinn/common/target.cc b/paddle/cinn/common/target.cc index c24c89c29ae1a..57657d01d45a8 100644 --- a/paddle/cinn/common/target.cc +++ b/paddle/cinn/common/target.cc @@ -22,6 +22,7 @@ #include #include "paddle/cinn/backends/cuda_util.h" +#include "paddle/cinn/common/arch_util.h" #include "paddle/cinn/common/target.h" #include "paddle/cinn/runtime/cinn_runtime.h" #include "paddle/common/enforce.h" @@ -43,29 +44,57 @@ bool Target::operator==(const Target &other) const { features == other.features; } -int Target::runtime_arch() const { - switch (arch) { - case Arch::Unk: - return cinn_unk_device; - case Arch::X86: - return cinn_x86_device; - case Arch::ARM: - return cinn_arm_device; - default: - PADDLE_THROW(phi::errors::InvalidArgument("Not supported arch")); - } - return -1; +int GetRuntimeArchImpl(UnknownArch) { return cinn_unk_device; } + +int GetRuntimeArchImpl(X86Arch) { return cinn_x86_device; } + +int GetRuntimeArchImpl(ARMArch) { return cinn_arm_device; } + +int GetRuntimeArchImpl(NVGPUArch) { + PADDLE_THROW(phi::errors::InvalidArgument("Not supported arch")); } -int Target::max_num_threads() const { - CHECK(arch == Arch::NVGPU) - << "The target is not NVGPU! Cannot get max number of threads."; - return 1024; +int GetRuntimeArch(Arch arch) { + return std::visit([](const auto &impl) { return GetRuntimeArchImpl(impl); }, + arch.variant()); } -int Target::get_multi_processor_count() const { - CHECK(arch == Arch::NVGPU) - << "The target is not NVGPU! Cannot get multi processor count"; +int Target::runtime_arch() const { return GetRuntimeArch(arch); } + +int GetMaxNumThreadsImpl(UnknownArch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get max number of threads."; +} + +int GetMaxNumThreadsImpl(X86Arch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get max number of threads."; +} + +int GetMaxNumThreadsImpl(ARMArch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get max number of threads."; +} + +int GetMaxNumThreadsImpl(NVGPUArch arch) { return 1024; } + +int GetMaxNumThreads(Arch arch) { + return std::visit([](const auto &impl) { return GetMaxNumThreadsImpl(impl); }, + arch.variant()); +} + +int Target::max_num_threads() const { return GetMaxNumThreads(arch); } + +int GetMultiProcessCountImpl(UnknownArch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get multi processor count."; +} + +int GetMultiProcessCountImpl(X86Arch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get multi processor count."; +} + +int GetMultiProcessCountImpl(ARMArch arch) { + LOG(FATAL) << "The target is not GPU! Cannot get multi processor count."; +} + +int GetMultiProcessCountImpl(NVGPUArch arch) { int num_sm = 0; #ifdef CINN_WITH_CUDA cudaDeviceGetAttribute( @@ -74,9 +103,32 @@ int Target::get_multi_processor_count() const { return num_sm; } -int Target::get_max_threads_per_sm() const { - CHECK(arch == Arch::NVGPU) - << "The target is not NVGPU! Cannot get max threads per stream processor"; +int GetMultiProcessCount(Arch arch) { + return std::visit( + [](const auto &impl) { return GetMultiProcessCountImpl(impl); }, + arch.variant()); +} + +int Target::get_multi_processor_count() const { + return GetMultiProcessCount(arch); +} + +int GetMaxThreadsPerSmImpl(UnknownArch arch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max threads per stream processor"; +} + +int GetMaxThreadsPerSmImpl(X86Arch arch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max threads per stream processor"; +} + +int GetMaxThreadsPerSmImpl(ARMArch arch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max threads per stream processor"; +} + +int GetMaxThreadsPerSmImpl(NVGPUArch arch) { int max_thread = 0; #ifdef CINN_WITH_CUDA cudaDeviceGetAttribute( @@ -85,9 +137,30 @@ int Target::get_max_threads_per_sm() const { return max_thread; } -int Target::get_max_blocks_per_sm() const { - CHECK(arch == Arch::NVGPU) - << "The target is not NVGPU! Cannot get max blocks per stream processor"; +int GetMaxThreadsPerSm(Arch arch) { + return std::visit( + [](const auto &impl) { return GetMaxThreadsPerSmImpl(impl); }, + arch.variant()); +} + +int Target::get_max_threads_per_sm() const { return GetMaxThreadsPerSm(arch); } + +int GetMaxBlocksPerSmImpl(UnknownArch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max blocks per stream processor"; +} + +int GetMaxBlocksPerSmImpl(X86Arch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max blocks per stream processor"; +} + +int GetMaxBlocksPerSmImpl(ARMArch) { + LOG(FATAL) + << "The target is not GPU! Cannot get max blocks per stream processor"; +} + +int GetMaxBlocksPerSmImpl(NVGPUArch) { int max_blocks = 1; #ifdef CINN_WITH_CUDA cudaDeviceGetAttribute( @@ -96,6 +169,14 @@ int Target::get_max_blocks_per_sm() const { return max_blocks; } +int GetMaxBlocksPerSm(Arch arch) { + return std::visit( + [](const auto &impl) { return GetMaxBlocksPerSmImpl(impl); }, + arch.variant()); +} + +int Target::get_max_blocks_per_sm() const { return GetMaxBlocksPerSm(arch); } + std::vector Target::get_target_libs() const { return libs; } int Target::get_target_bits() const { @@ -133,21 +214,7 @@ std::ostream &operator<<(std::ostream &os, const Target &target) { } os << ","; - - switch (target.arch) { - case Target::Arch::X86: - os << "x86"; - break; - case Target::Arch::ARM: - os << "arm"; - break; - case Target::Arch::NVGPU: - os << "nvgpu"; - break; - case Target::Arch::Unk: - os << "unk"; - break; - } + os << target.arch; os << ","; switch (target.bits) { @@ -166,38 +233,19 @@ std::ostream &operator<<(std::ostream &os, const Target &target) { return os; } -std::ostream &operator<<(std::ostream &os, Target::Arch arch) { - switch (arch) { - case Target::Arch::Unk: - os << "Unk"; - break; - case Target::Arch::X86: - os << "X86"; - break; - case Target::Arch::ARM: - os << "ARM"; - break; - case Target::Arch::NVGPU: - os << "NVGPU"; - break; - } - return os; -} - const Target &UnkTarget() { static Target target( - Target::OS::Unk, Target::Arch::Unk, Target::Bit::Unk, {}, {}); + Target::OS::Unk, UnknownArch{}, Target::Bit::Unk, {}, {}); return target; } const Target &DefaultHostTarget() { - static Target target( - Target::OS::Linux, Target::Arch::X86, Target::Bit::k64, {}, {}); + static Target target(Target::OS::Linux, X86Arch{}, Target::Bit::k64, {}, {}); return target; } const Target &DefaultNVGPUTarget() { static Target target( - Target::OS::Linux, Target::Arch::NVGPU, Target::Bit::k64, {}, {}); + Target::OS::Linux, NVGPUArch{}, Target::Bit::k64, {}, {}); return target; } diff --git a/paddle/cinn/common/target.h b/paddle/cinn/common/target.h index 9fdc1d9939360..6df1d1ece8c5f 100644 --- a/paddle/cinn/common/target.h +++ b/paddle/cinn/common/target.h @@ -17,7 +17,10 @@ #include #include #include +#include #include +#include "paddle/cinn/adt/adt.h" +#include "paddle/cinn/common/arch.h" namespace cinn { namespace common { @@ -33,16 +36,6 @@ struct Target { Windows, }; - /** - * The architecture used by the target. Determines the instruction set to use. - */ - enum class Arch : int { - Unk = -1, - X86, - ARM, - NVGPU, - }; - enum class Bit : int { Unk = -1, k32, @@ -50,7 +43,7 @@ struct Target { }; OS os{OS::Unk}; - Arch arch{Arch::Unk}; + Arch arch{UnknownArch{}}; Bit bits{Bit::Unk}; enum class Feature : int { @@ -69,13 +62,13 @@ struct Target { std::vector libs; explicit Target(OS o = OS::Linux, - Arch a = Arch::Unk, + Arch a = UnknownArch{}, Bit b = Bit::Unk, const std::vector& features = {}, const std::vector& libs = {}); bool defined() const { - return os != OS::Unk && arch != Arch::Unk && bits != Bit::Unk; + return os != OS::Unk && IsDefined(arch) && bits != Bit::Unk; } //! Get the Runtime architecture, it is casted to integer to avoid header file @@ -113,7 +106,5 @@ int GetMaxThreads(); int GetMaxBlocks(); -std::ostream& operator<<(std::ostream& os, Target::Arch arch); - } // namespace common } // namespace cinn diff --git a/paddle/cinn/frontend/CMakeLists.txt b/paddle/cinn/frontend/CMakeLists.txt index 2ba6ccd12e5bf..1d5315e1f965a 100755 --- a/paddle/cinn/frontend/CMakeLists.txt +++ b/paddle/cinn/frontend/CMakeLists.txt @@ -62,7 +62,7 @@ add_subdirectory(paddle) add_subdirectory(decomposer) add_subdirectory(op_mappers) add_subdirectory(pass) -# add_subdirectory(group_cluster) +#add_subdirectory(group_cluster) cinn_cc_test(test_op_mapper_registry SRCS op_mapper_registry_test.cc DEPS cinncore) diff --git a/paddle/cinn/frontend/computation.cc b/paddle/cinn/frontend/computation.cc index ee7d2ce6b3a82..387fd87f9c709 100644 --- a/paddle/cinn/frontend/computation.cc +++ b/paddle/cinn/frontend/computation.cc @@ -58,12 +58,16 @@ std::shared_ptr CompileProgram( if (ctx->compile_options.use_default_passes) { hlir::framework::ApplyPass(ctx->graph.get(), "InferShape"); - + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { #ifndef CINN_WITH_CUDA - if (target.arch == Target::Arch::X86) { - hlir::framework::ApplyPass(ctx->graph.get(), "AlterLayout"); - } + hlir::framework::ApplyPass(ctx->graph.get(), "AlterLayout"); #endif + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { CINN_NOT_IMPLEMENTED; }, + }); hlir::framework::ApplyPass(ctx->graph.get(), "ConstPropagate"); hlir::framework::ApplyPasses(ctx->graph.get(), DefaultOpFusionPasses()); } @@ -200,34 +204,37 @@ void CinnComputation::SetTensorData(hlir::framework::Tensor &t, size_t size) { void *tdata = t->mutable_data(context_->target, t->type()); CHECK_EQ(size, t->shape().numel() * t->type().bytes()); - if (context_->target.arch == Target::Arch::NVGPU) { + context_->target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { memcpy(tdata, data, size); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(tdata, data, size, cudaMemcpyHostToDevice)); + CUDA_CALL(cudaMemcpy(tdata, data, size, cudaMemcpyHostToDevice)); #else - CINN_NOT_IMPLEMENTED + CINN_NOT_IMPLEMENTED; #endif - } else if (context_->target.arch == Target::Arch::X86) { - memcpy(tdata, data, size); - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); } + void CinnComputation::GetTensorData(hlir::framework::Tensor &t, void *data, size_t size) { void *tdata = t->mutable_data(context_->target, t->type()); CHECK_EQ(size, t->shape().numel() * t->type().bytes()); - if (context_->target.arch == Target::Arch::NVGPU) { + context_->target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { memcpy(data, tdata, size); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(data, tdata, size, cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(data, tdata, size, cudaMemcpyDeviceToHost)); #else - CINN_NOT_IMPLEMENTED + CINN_NOT_IMPLEMENTED; #endif - } else if (context_->target.arch == Target::Arch::X86) { - memcpy(data, tdata, size); - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); } void CinnComputation::GetTensorData(const std::string &tname, diff --git a/paddle/cinn/frontend/group_cluster/CMakeLists.txt b/paddle/cinn/frontend/group_cluster/CMakeLists.txt deleted file mode 100644 index 14cb3c1cfa0e8..0000000000000 --- a/paddle/cinn/frontend/group_cluster/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -gather_srcs(group_cluster_src SRCS common_utils.cc pattern_node.cc - pattern_graph.cc) - -add_subdirectory(cluster_policy) - -cc_library(group_cluster SRCS ${group_cluster_src}) diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt b/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt deleted file mode 100644 index c5328419c7f7b..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -gather_srcs(group_cluster_src SRCS general_topo_policy.cc policy_manager.cc) - -add_subdirectory(shardable_axes_policy) diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc deleted file mode 100644 index 87f8523eda49f..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h" - -namespace cinn::frontend::group_cluster::policy { - -bool GeneralTopoPolicy::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { - // TODO(wuzhanfei) topo policy (if lead to loop) - return false; -} - -} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc deleted file mode 100644 index 3f54bacbd3ecd..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.cc +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" -#include "paddle/common/enforce.h" - -namespace cinn::frontend::group_cluster::policy { - -bool PolicyManager::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { - for (const auto& policy : policies_) { - if (!policy->CanFuse(upstream, downstream)) return false; - } - return true; -} - -} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h b/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h deleted file mode 100644 index f7a2f100add82..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/cinn/frontend/group_cluster/pattern_node.h" - -namespace cinn::frontend::group_cluster::policy { - -class Policy { - public: - virtual bool CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) = 0; -}; - -using PolicyPtr = std::shared_ptr; - -class PolicyManager { - public: - explicit PolicyManager(const std::vector& policies) - : policies_(policies) {} - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); - - private: - std::vector policies_; -}; - -} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/CMakeLists.txt b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/CMakeLists.txt deleted file mode 100644 index 8d3f64fa5bc96..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -gather_srcs(group_cluster_src SRCS shardable_axes_base.cc - shardable_axes_policy.cc) diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc deleted file mode 100644 index ef58985330b70..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.cc +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h" -#include "paddle/cinn/frontend/group_cluster/common_utils.h" - -namespace cinn::frontend::group_cluster::policy { - -std::string ShardableAxesInfoManager::GetUniqueName() { - static std::atomic counter = 0; - return "D" + std::to_string(counter); -} - -std::vector CreateNewNamesWithRank(int64_t rank) { - auto result = std::vector(); - for (int64_t i = 0; i < rank; i++) { - result.emplace_back(ShardableAxesInfoManager::GetUniqueName()); - } - return result; -} - -ShardableAxesSignature CreateDefaultSignature(const pir::Operation* op) { - ShardableAxesSignature result = ShardableAxesSignature(); - for (int i = 0; i < op->num_operands(); ++i) { - result.inputs.emplace_back( - CreateNewNamesWithRank(GetRank(op->operand_source(i)))); - } - for (int i = 0; i < op->num_results(); ++i) { - result.outputs.emplace_back(CreateNewNamesWithRank(GetRank(op->result(i)))); - } - return result; -} - -std::optional CreateSignatureForSpecialOps( - const pir::Operation* op) { - if (op->isa()) { - return CreateDefaultSignature(op); - } - return std::nullopt; -} - -ShardableAxesSignature CreateSignatureForReduce( - const pir::Operation* reduce_op) { - CHECK_EQ(reduce_op->num_operands(), 1); - CHECK_EQ(reduce_op->num_results(), 1); - ShardableAxesSignature result = ShardableAxesSignature(); - const size_t input_rank = GetRank(reduce_op->operand_source(0)); - auto input_axes = CreateNewNamesWithRank(input_rank); - - const auto& reduce_axis_idx = GetReduceAxisIdx(reduce_op); - bool keep_dim = GetReduceOpKeepDims(reduce_op); - auto output_axes = std::vector(); - - for (int i = 0; i < input_rank; i++) { - if (std::find(reduce_axis_idx.begin(), reduce_axis_idx.end(), i) != - reduce_axis_idx.end()) { - if (keep_dim) { - output_axes.emplace_back("constant_1"); - } // else do nothing - } else { - output_axes.emplace_back(input_axes[i]); - } - } - - result.inputs.emplace_back(input_axes); - result.outputs.emplace_back(output_axes); - - return result; -} - -ShardableAxesSignature CreateSignatureForElementWise(const pir::Operation* op) { - ShardableAxesSignature result = ShardableAxesSignature(); - - int64_t rank = GetRank(op->result(0)); - auto same_axes = CreateNewNamesWithRank(rank); - - for (int i = 0; i < op->num_operands(); ++i) { - CHECK(rank == GetRank(op->operand_source(i))); - result.inputs.emplace_back(same_axes); - } - for (int i = 0; i < op->num_results(); ++i) { - CHECK(rank == GetRank(op->result(i))); - result.outputs.emplace_back(same_axes); - } - return result; -} - -ShardableAxesSignature CreateSignatureForBroadcast(const pir::Operation* op) { - const auto& broad_cast_value = GetBroadcastOpInputOuputValue(op); - if (!broad_cast_value.has_value()) { - return CreateDefaultSignature(op); - } - const auto& [input, output] = broad_cast_value.value(); - // TODO(wuzhanfei) support broadcast - return CreateDefaultSignature(op); -} - -ShardableAxesSignature CreateShardableSignature(const pir::Operation* op) { - auto special_result = CreateSignatureForSpecialOps(op); - if (special_result != std::nullopt) { - return special_result.value(); - } - - CHECK(op->num_results() == 1) - << "Now we do not support op with multi outputs"; - ShardableAxesSignature result; - const hlir::framework::OpPatternKind kind = GetOpPatternKind(op); - if (kind == hlir::framework::kReduction) { - result = CreateSignatureForReduce(op); - } else if (kind == hlir::framework::kElementWise) { - result = CreateSignatureForElementWise(op); - } else if (kind == hlir::framework::kBroadcast) { - result = CreateSignatureForBroadcast(op); - } else { - result = CreateDefaultSignature(op); - } - VLOG(4) << "[ShardableAxesInfoManager] Create Shardable Axes Signature : \n" - << op->name() << " : " << result.DebugStr(); - return result; -} - -ShardableAxesInfoManager::ShardableAxesInfoManager( - const std::vector& ops, - const pir::ShapeConstraintIRAnalysis* shape_analysis) - : ops_(ops), shape_analysis_(shape_analysis) { - for (const auto& op : ops) { - op_signature_map_[op] = CreateShardableSignature(op); - } - - // TODO(wuzhanfei) update value_axes_map_ name_union_ -} - -std::string ShardableAxes::DebugStr() { - std::stringstream ss; - for (const auto& name : axis_names) { - ss << name << ", "; - } - return ss.str(); -} - -std::string ShardableAxesSignature::DebugStr() { - std::stringstream ss; - ss << "ShardableAxes Signature:\n"; - for (int i = 0; i < inputs.size(); i++) { - ss << "input " << i << ": " << inputs[i].DebugStr() << "\n"; - } - for (int i = 0; i < outputs.size(); i++) { - ss << "output " << i << ": " << outputs[i].DebugStr() << "\n"; - } - return ss.str(); -} - -} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h b/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h deleted file mode 100644 index 43b0634fcb2b6..0000000000000 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" -#include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h" - -namespace cinn::frontend::group_cluster::policy { - -class ShardableAxesPolicy final : virtual public Policy { - public: - ShardableAxesPolicy(const std::vector& ops, - const pir::ShapeConstraintIRAnalysis* shape_analysis) - : axes_info_(ops, shape_analysis) {} - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); - - private: - ShardableAxesInfoManager axes_info_; -}; - -} // namespace cinn::frontend::group_cluster::policy diff --git a/paddle/cinn/frontend/group_cluster/common_utils.cc b/paddle/cinn/frontend/group_cluster/common_utils.cc deleted file mode 100644 index 304b05193983e..0000000000000 --- a/paddle/cinn/frontend/group_cluster/common_utils.cc +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/cinn/frontend/group_cluster/common_utils.h" - -namespace cinn::frontend::group_cluster { - -OpPatternKind GetOpPatternKind(const ::pir::Operation* op) { - return hlir::framework::pir::CompatibleInfo::OpKind(*op); -} - -size_t GetRank(pir::Value value) { - return value.type().dyn_cast().dims().size(); -} - -std::vector GetReduceAxisIdx(const pir::Operation* reduce_op) { - const size_t input_rank = GetRank(reduce_op->operand_source(0)); - const auto& attr_val = reduce_op->attributes().at("dim"); - CHECK(attr_val.isa<::pir::ArrayAttribute>()); - const auto& axis_attr = attr_val.dyn_cast<::pir::ArrayAttribute>(); - std::vector reduce_axis_idx; - for (int i = 0; i < axis_attr.size(); ++i) { - int64_t axis = axis_attr.at(i).dyn_cast<::pir::Int64Attribute>().data(); - if (axis < 0) { - axis += input_rank; - } - CHECK_GE(axis, 0); - CHECK_LT(axis, input_rank); - reduce_axis_idx.push_back(axis); - } - return reduce_axis_idx; -} - -bool GetReduceOpKeepDims(const pir::Operation* reduce_op) { - const auto& attr_val = reduce_op->attributes().at("keep_dim"); - CHECK(attr_val.isa<::pir::BoolAttribute>()); - return attr_val.dyn_cast<::pir::BoolAttribute>(); -} - -std::string OpsDebugStr(std::vector ops) { - std::stringstream ss; - pir::IrPrinter printer(ss); - for (const auto* op : ops) { - printer.PrintOperation(const_cast(op)); - ss << "\n"; - } - return ss.str(); -} - -std::optional> GetBroadcastOpInputOuputValue( - const pir::Operation* op) { - auto* mut_op = const_cast(op); - if (op->isa()) { - auto expand_op = mut_op->dyn_cast(); - return std::make_pair(expand_op.x(), expand_op.out()); - } - if (op->isa()) { - auto broadcast_op = mut_op->dyn_cast(); - return std::make_pair(broadcast_op.x(), broadcast_op.out()); - } - VLOG(4) << "[ShardableAxesSignature] Unsupported Broadcast op: " - << op->name(); - return std::nullopt; -} -} // namespace cinn::frontend::group_cluster - -namespace cinn::frontend::group_cluster { - -bool IsTrivialPattern(const StmtPattern& pattern) { - return std::holds_alternative(pattern); -} - -bool IsReducePattern(const StmtPattern& pattern) { - return std::holds_alternative(pattern); -} - -bool IsUnsupportPattern(const StmtPattern& pattern) { - return std::holds_alternative(pattern); -} - -std::vector GetOpsInPattern(const StmtPattern& pattern) { - return std::visit([](const auto& impl) { return impl.ops_; }, pattern); -} - -std::string StmtPatternDebugStr(const StmtPattern& stmt) { - std::stringstream ss; - auto all_ops = GetOpsInPattern(stmt); - ss << "StmtPattern, size " << all_ops.size() << " :\n"; - ss << OpsDebugStr(all_ops); - return ss.str(); -} - -StmtPattern MergePattern(const StmtPattern& first, const StmtPattern& second) { - std::vector ops = - MergeVector(GetOpsInPattern(first), GetOpsInPattern(second)); - if (IsUnsupportPattern(first) || IsUnsupportPattern(second)) { - return UnsupportPattern(ops); - } else if (IsReducePattern(first) || IsReducePattern(second)) { - return ReducePattern(ops); - } else { - return TrivialPattern(ops); - } -} - -StmtPattern ConvertToStmtPattern(const pir::Operation* op) { - const auto& kind = GetOpPatternKind(op); - if (kind == hlir::framework::kReduction) { - return ReducePattern({op}); - } else if (kind == hlir::framework::kElementWise || - kind == hlir::framework::kBroadcast || - kind == hlir::framework::kInjective) { - return TrivialPattern({op}); - } else { - return UnsupportPattern({op}); - } -} - -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/common_utils.h b/paddle/cinn/frontend/group_cluster/common_utils.h deleted file mode 100644 index af2b6c5cde97d..0000000000000 --- a/paddle/cinn/frontend/group_cluster/common_utils.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "glog/logging.h" - -#include "paddle/cinn/frontend/group_cluster/pattern.h" - -#include "paddle/cinn/common/bfs_walker.h" -#include "paddle/cinn/common/topo_walker.h" - -#include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" -#include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" -#include "paddle/cinn/hlir/framework/op.h" -#include "paddle/cinn/utils/string.h" -#include "paddle/pir/include/dialect/control_flow/ir/cf_op.h" - -namespace cinn::frontend::group_cluster { - -using OpPatternKind = cinn::hlir::framework::OpPatternKind; - -OpPatternKind GetOpPatternKind(const ::pir::Operation* op); -size_t GetRank(pir::Value value); -std::vector GetReduceAxisIdx(const pir::Operation* reduce_op); -bool GetReduceOpKeepDims(const pir::Operation* reduce_op); -std::string OpsDebugStr(std::vector ops); -std::optional> GetBroadcastOpInputOuputValue( - const pir::Operation* op); -} // namespace cinn::frontend::group_cluster - -namespace cinn::frontend::group_cluster { - -bool IsTrivialPattern(const StmtPattern& pattern); -bool IsReducePattern(const StmtPattern& pattern); -bool IsUnsupportPattern(const StmtPattern& pattern); - -template -void ExtendVector(std::vector* first, const std::vector& second) { - std::unordered_set visited = - std::unordered_set(first->begin(), first->end()); - for (auto iter = second.begin(); iter != second.end(); iter++) { - if (visited.find(*iter) == visited.end()) { - visited.emplace(*iter); - first->emplace_back(*iter); - } - } -} - -template -std::vector MergeVector(const std::vector& first, - const std::vector& second) { - std::vector result = std::vector(first); - ExtendVector(&result, second); - return result; -} - -std::vector GetOpsInPattern(const StmtPattern& pattern); -std::string StmtPatternDebugStr(const StmtPattern& pattern); -StmtPattern MergePattern(const StmtPattern& first, const StmtPattern& second); - -StmtPattern ConvertToStmtPattern(const pir::Operation* op); -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/group_cluster.h b/paddle/cinn/frontend/group_cluster/group_cluster.h deleted file mode 100644 index 950c3b77942a6..0000000000000 --- a/paddle/cinn/frontend/group_cluster/group_cluster.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h" -#include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h" -#include "paddle/cinn/frontend/group_cluster/pattern_graph.h" - -namespace cinn::frontend { - -inline std::vector> ClusterOps( - const cinn::dialect::GroupOp& group_op) { - const auto& ops = [&] { - std::vector ops; - for (const auto& op : group_op.GetOperators()) { - ops.emplace_back(op); - } - return ops; - }(); - - VLOG(4) << "Start Cluster Ops!"; - VLOG(4) << "Input Group with size " << ops.size() << " :\n" - << group_cluster::OpsDebugStr(ops); - - const auto* shape_analysis = - &pir::ShapeAnalysisManager::Instance().Get(group_op->GetParentProgram()); - - auto shardable_axes_policy = - std::make_shared( - ops, shape_analysis); - auto general_topo_policy = - std::make_shared(); - - auto policy_manager = group_cluster::policy::PolicyManager( - {shardable_axes_policy, general_topo_policy}); - - group_cluster::PatternGraph graph(ops, policy_manager); - return graph.ClusterOps(); -} - -} // namespace cinn::frontend diff --git a/paddle/cinn/frontend/group_cluster/pattern.h b/paddle/cinn/frontend/group_cluster/pattern.h deleted file mode 100644 index c4d7928c28ba2..0000000000000 --- a/paddle/cinn/frontend/group_cluster/pattern.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "paddle/pir/include/core/operation.h" - -namespace cinn::frontend::group_cluster { - -struct TrivialPattern { - explicit TrivialPattern(const std::vector& ops) - : ops_(ops) {} - std::vector ops_; -}; - -struct ReducePattern { - explicit ReducePattern(const std::vector& ops) - : ops_(ops) {} - std::vector ops_; -}; - -struct UnsupportPattern { - explicit UnsupportPattern(const std::vector& ops) - : ops_(ops) {} - std::vector ops_; -}; - -// UnsupportedPattern can't fuse with any pattern -// Step 1: T x T|R => T|R TrivialPattern can always fuse with -// downstream Step 2: R x T|R => R Use Shardable Axes Policy -// to judge - -// If we want add MatmulPattern => -// StmtPattern = std::variant; Fusion with different Pattern will have specialized logic -// to Judge, Update policy logic for MatmulPattern -using StmtPattern = - std::variant; - -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_graph.cc b/paddle/cinn/frontend/group_cluster/pattern_graph.cc deleted file mode 100644 index 57d2fd1388f77..0000000000000 --- a/paddle/cinn/frontend/group_cluster/pattern_graph.cc +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/cinn/frontend/group_cluster/pattern_graph.h" - -namespace cinn::frontend::group_cluster { - -std::vector> PatternGraph::ClusterOps() { - SinkTrivialPattern(); - FuseReducePattern(); - // TODO(wuzhanfei) need sort here, or do not return from all_pattern_nodes_ - std::vector> result; - std::transform(all_pattern_nodes_.begin(), - all_pattern_nodes_.end(), - std::back_inserter(result), - [](const PatternNodePtr node) { return node->GetOps(); }); - return result; -} - -void PatternGraph::SinkTrivialPattern() { - // TODO(wuzhanfei): need consider Unsupport op here - const auto FindTrivialNode = - [](std::unordered_set all_nodes) -> PatternNodePtr { - for (PatternNodePtr node : all_nodes) { - if (node->IsTrivial() && !node->downstream_.empty()) return node; - } - return nullptr; - }; - - PatternNodePtr upstream; - while ((upstream = FindTrivialNode(all_pattern_nodes_)) != nullptr) { - std::vector fusion_candidate = upstream->downstream_; - upstream->downstream_.clear(); - for (const auto& downstream : fusion_candidate) { - PatternNodePtr new_node = - std::make_shared(upstream, downstream); - AppendNode(new_node); - RemoveNode(downstream); - } - RemoveNode(upstream); - } -} - -void PatternGraph::FuseReducePattern() { - // TODO(wuzhanfei) reduce fusion, similar with implementation in backend -} - -PatternGraph::PatternGraph(const std::vector& ops, - const policy::PolicyManager policy_manager) - : policy_manager_(policy_manager) { - std::unordered_map op_to_node_map; - - for (int i = 0; i < ops.size(); ++i) { - PatternNodePtr node = std::make_shared(ops[i]); - op_to_node_map[ops[i]] = node; - all_pattern_nodes_.emplace(node); - node->sink_op_ = ops[i]; - } - - for (const pir::Operation* op : ops) { - PatternNodePtr cur_node = op_to_node_map[op]; - - // add upstream nodes - for (int i = 0; i < op->num_operands(); ++i) { - ::pir::Operation* input_op = op->operand_source(i).defining_op(); - if (op_to_node_map.find(input_op) != op_to_node_map.end()) { - PatternNodePtr upstream_node = op_to_node_map[input_op]; - cur_node->upstream_.push_back(upstream_node); - upstream_node->downstream_.push_back(cur_node); - } - } - - // add downstream nodes - for (int i = 0; i < op->num_results(); ++i) { - pir::Value related_value = op->result(i); - for (auto consumer_it = related_value.use_begin(); - consumer_it != related_value.use_end(); - ++consumer_it) { - ::pir::Operation* output_op = consumer_it->owner(); - if (op_to_node_map.find(output_op) != op_to_node_map.end()) { - PatternNodePtr downstream_node = op_to_node_map[output_op]; - cur_node->downstream_.push_back(downstream_node); - downstream_node->upstream_.push_back(cur_node); - } - } - } - - if (cur_node->upstream_.empty()) { - entrance_nodes_.emplace(cur_node); - } - - if (cur_node->downstream_.empty()) { - exit_nodes_.emplace(cur_node); - } - } - - VLOG(4) << "PatternGraph Created, pattern node size: " - << all_pattern_nodes_.size(); -} - -void PatternGraph::RemoveNode(PatternNodePtr node) { - if (all_pattern_nodes_.find(node) != all_pattern_nodes_.end()) { - all_pattern_nodes_.erase(node); - } - if (entrance_nodes_.find(node) != entrance_nodes_.end()) { - entrance_nodes_.erase(node); - } - if (exit_nodes_.find(node) != exit_nodes_.end()) { - exit_nodes_.erase(node); - } -} - -void PatternGraph::AppendNode(PatternNodePtr node) { - all_pattern_nodes_.emplace(node); - if (node->upstream_.empty()) { - entrance_nodes_.emplace(node); - } - if (node->downstream_.empty()) { - exit_nodes_.emplace(node); - } -} - -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_graph.h b/paddle/cinn/frontend/group_cluster/pattern_graph.h deleted file mode 100644 index cc3c811eba519..0000000000000 --- a/paddle/cinn/frontend/group_cluster/pattern_graph.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" -#include "paddle/cinn/frontend/group_cluster/common_utils.h" -#include "paddle/cinn/frontend/group_cluster/pattern_node.h" - -namespace cinn::frontend::group_cluster { - -class PatternGraph { - public: - PatternGraph(const std::vector& ops, - const policy::PolicyManager policy_manager); - - std::vector> ClusterOps(); - - private: - void SinkTrivialPattern(); - void FuseReducePattern(); - - void RemoveNode(PatternNodePtr node); - void AppendNode(PatternNodePtr node); - - private: - std::unordered_set all_pattern_nodes_; - std::unordered_set entrance_nodes_; - std::unordered_set exit_nodes_; - - const policy::PolicyManager policy_manager_; -}; - -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_node.cc b/paddle/cinn/frontend/group_cluster/pattern_node.cc deleted file mode 100644 index 50c287e679bb4..0000000000000 --- a/paddle/cinn/frontend/group_cluster/pattern_node.cc +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/cinn/frontend/group_cluster/pattern_node.h" - -namespace cinn::frontend::group_cluster { - -PatternNode::PatternNode(const pir::Operation* op) - : sink_op_(op), stmt_pattern_(ConvertToStmtPattern(op)) {} - -PatternNode::PatternNode(PatternNodePtr fused_up_node, - PatternNodePtr fused_down_node) - : sink_op_(fused_down_node->sink_op_), - stmt_pattern_(MergePattern(fused_up_node->stmt_pattern_, - fused_down_node->stmt_pattern_)) { - const auto FindFromVector = - [](std::vector vec, - PatternNodePtr item) -> std::vector::iterator { - return std::find(vec.begin(), vec.end(), item); - }; - - ExtendVector(&upstream_, fused_up_node->upstream_); - ExtendVector(&upstream_, fused_down_node->upstream_); - - upstream_.erase(FindFromVector(upstream_, fused_up_node)); - - ExtendVector(&downstream_, fused_up_node->downstream_); - ExtendVector(&downstream_, fused_down_node->downstream_); - downstream_.erase(FindFromVector(downstream_, fused_down_node)); - - std::vector::iterator iter; - for (const auto& upstream_node : upstream_) { - iter = FindFromVector(upstream_node->downstream_, fused_up_node); - if (iter != upstream_node->downstream_.end()) { - upstream_node->downstream_.erase(iter); - } - iter = FindFromVector(upstream_node->downstream_, fused_down_node); - if (iter != upstream_node->downstream_.end()) { - upstream_node->downstream_.erase(iter); - } - } - - for (const auto& downstream_node : downstream_) { - iter = FindFromVector(downstream_node->upstream_, fused_up_node); - if (iter != downstream_node->upstream_.end()) { - downstream_node->upstream_.erase(iter); - } - iter = FindFromVector(downstream_node->upstream_, fused_down_node); - if (iter != downstream_node->upstream_.end()) { - downstream_node->upstream_.erase(iter); - } - } -} - -std::vector PatternNode::GetOps() const { - return GetOpsInPattern(stmt_pattern_); -} - -bool PatternNode::IsTrivial() const { return IsTrivialPattern(stmt_pattern_); } - -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/group_cluster/pattern_node.h b/paddle/cinn/frontend/group_cluster/pattern_node.h deleted file mode 100644 index 2eb957329904a..0000000000000 --- a/paddle/cinn/frontend/group_cluster/pattern_node.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/cinn/frontend/group_cluster/common_utils.h" - -namespace cinn::frontend::group_cluster { - -struct PatternNode { - using PatternNodePtr = std::shared_ptr; - - explicit PatternNode(const pir::Operation* op); - explicit PatternNode(PatternNodePtr fused_up_node, - PatternNodePtr fused_down_node); - - bool IsTrivial() const; - std::vector GetOps() const; - - StmtPattern stmt_pattern_; - const pir::Operation* sink_op_; - - std::vector upstream_; - std::vector downstream_; -}; - -using PatternNodePtr = PatternNode::PatternNodePtr; -} // namespace cinn::frontend::group_cluster diff --git a/paddle/cinn/frontend/op_mappers/paddle/conv2d.cc b/paddle/cinn/frontend/op_mappers/paddle/conv2d.cc index 21f1645752ffb..c44c77e6f0a1f 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/conv2d.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/conv2d.cc @@ -73,8 +73,15 @@ void Conv2dOpMapper(const paddle::cpp::OpDesc& op_desc, ctx.AddVarModelToProgram(out_name, out->id); } -void DepthwiseConv2dOpMapper(const paddle::cpp::OpDesc& op_desc, - const OpMapperContext& ctx) { +void DepthwiseConv2dOpMapperImpl(common::UnknownArch, + const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { + LOG(FATAL) << "NotImplemented."; +} + +void DepthwiseConv2dOpMapperImpl(common::X86Arch, + const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { CHECK_EQ(op_desc.Input("Input").size(), 1UL); auto x_name = op_desc.Input("Input").front(); CHECK_EQ(op_desc.Input("Filter").size(), 1UL); @@ -103,30 +110,83 @@ void DepthwiseConv2dOpMapper(const paddle::cpp::OpDesc& op_desc, auto y = ctx.GetVar(y_name); Variable out; - if (ctx.Target().arch == Target::Arch::X86) { - out = ctx.Builder()->Conv2d(x, - y, - strides, - paddings, - dilations, - groups, - data_format, - padding_algorithm); - } else { - out = ctx.Builder()->DepthwiseConv2d(x, - y, - strides, - paddings, - dilations, - groups, - data_format, - padding_algorithm); + out = ctx.Builder()->Conv2d(x, + y, + strides, + paddings, + dilations, + groups, + data_format, + padding_algorithm); + ctx.AddVar(out_name, out); + ctx.AddVarModelToProgram(out_name, out->id); +} + +void DepthwiseConv2dOpMapperImpl(common::ARMArch, + const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { + LOG(FATAL) << "NotImplemented."; +} + +void DepthwiseConv2dOpMapperImpl(common::NVGPUArch, + const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { + CHECK_EQ(op_desc.Input("Input").size(), 1UL); + auto x_name = op_desc.Input("Input").front(); + CHECK_EQ(op_desc.Input("Filter").size(), 1UL); + auto y_name = op_desc.Input("Filter").front(); + + CHECK_EQ(op_desc.Output("Output").size(), 1UL); + auto out_name = op_desc.Output("Output").front(); + + auto strides = + utils::GetAttrOrDefault>(op_desc, "strides", {1, 1}); + auto paddings = + utils::GetAttrOrDefault>(op_desc, "paddings", {0, 0}); + auto dilations = + utils::GetAttrOrDefault>(op_desc, "dilations", {1, 1}); + auto groups = utils::GetAttrOrDefault(op_desc, "groups", 1); + + auto data_format = + utils::GetAttrOrDefault(op_desc, "data_format", "NCHW"); + if (data_format == "AnyLayout") { + data_format = "NCHW"; } + auto padding_algorithm = utils::GetAttrOrDefault( + op_desc, "padding_algorithm", "EXPLICIT"); + auto x = ctx.GetVar(x_name); + auto y = ctx.GetVar(y_name); + + Variable out; + out = ctx.Builder()->DepthwiseConv2d(x, + y, + strides, + paddings, + dilations, + groups, + data_format, + padding_algorithm); + ctx.AddVar(out_name, out); ctx.AddVarModelToProgram(out_name, out->id); } +void DepthwiseConv2dOpMapperByArch(common::Arch arch, + const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { + return std::visit( + [&](const auto& impl) { + return DepthwiseConv2dOpMapperImpl(impl, op_desc, ctx); + }, + arch.variant()); +} + +void DepthwiseConv2dOpMapper(const paddle::cpp::OpDesc& op_desc, + const OpMapperContext& ctx) { + return DepthwiseConv2dOpMapperByArch(ctx.Target().arch, op_desc, ctx); +} + void Conv2dGradOpMapper(const paddle::cpp::OpDesc& op_desc, const OpMapperContext& ctx) { // get dy diff --git a/paddle/cinn/frontend/paddle/model_parser.cc b/paddle/cinn/frontend/paddle/model_parser.cc index 086cf11fe34b5..cc59f7a8bdb38 100644 --- a/paddle/cinn/frontend/paddle/model_parser.cc +++ b/paddle/cinn/frontend/paddle/model_parser.cc @@ -77,48 +77,50 @@ void TensorFromStream(std::istream &is, void *buf; size_t size = tensor->shape().numel() * SizeOfType(desc.data_type()); // allocate memory - if (target.arch == Target::Arch::X86) { - switch (static_cast(desc.data_type())) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + switch (static_cast(desc.data_type())) { #define SET_TENSOR(desc, type, precision) \ case Type::VarType_Type_##desc: \ buf = tensor->mutable_data(target); \ tensor->set_type(precision); \ break - - SET_TENSOR(FP32, float, Float(32)); - SET_TENSOR(INT8, int8_t, Int(8)); - SET_TENSOR(INT16, int16_t, Int(16)); - SET_TENSOR(INT32, int32_t, Int(32)); - SET_TENSOR(INT64, int64_t, Int(64)); + SET_TENSOR(FP32, float, Float(32)); + SET_TENSOR(INT8, int8_t, Int(8)); + SET_TENSOR(INT16, int16_t, Int(16)); + SET_TENSOR(INT32, int32_t, Int(32)); + SET_TENSOR(INT64, int64_t, Int(64)); #undef SET_TENSOR - default: - std::stringstream ss; - ss << "unknown type " << desc.data_type(); - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); - } - // tensor->set_persistable(true); - is.read(static_cast(buf), size); - } else if (target.arch == Target::Arch::NVGPU) { + default: + std::stringstream ss; + ss << "unknown type " << desc.data_type(); + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); + } + // tensor->set_persistable(true); + is.read(static_cast(buf), size); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - if (desc.data_type() != Type::VarType_Type_FP32) - PADDLE_THROW( - phi::errors::InvalidArgument("[CUDA] The type is not fp32!!")); - auto *data = tensor->mutable_data(target); - tensor->set_type(Float(32)); - std::vector temp(tensor->shape().numel()); - // LOG(INFO) <<"[CUDA] The tensor's size is "<< tensor->shape().numel(); - is.read(reinterpret_cast(temp.data()), size); - CUDA_CALL(cudaMemcpy(reinterpret_cast(data), - temp.data(), - tensor->shape().numel() * sizeof(float), - cudaMemcpyHostToDevice)); + if (desc.data_type() != Type::VarType_Type_FP32) + PADDLE_THROW( + phi::errors::InvalidArgument("[CUDA] The type is not fp32!!")); + auto *data = tensor->mutable_data(target); + tensor->set_type(Float(32)); + std::vector temp(tensor->shape().numel()); + // LOG(INFO) <<"[CUDA] The tensor's size is "<< tensor->shape().numel(); + is.read(reinterpret_cast(temp.data()), size); + CUDA_CALL(cudaMemcpy(reinterpret_cast(data), + temp.data(), + tensor->shape().numel() * sizeof(float), + cudaMemcpyHostToDevice)); #else - PADDLE_THROW(phi::errors::Fatal( - "To use CUDA backends, you need to set WITH_CUDA ON!")); + PADDLE_THROW(phi::errors::Fatal( + "To use CUDA backends, you need to set WITH_CUDA ON!")); #endif - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); } void LoadLoDTensor(std::istream &is, diff --git a/paddle/cinn/frontend/paddle_model_to_program.cc b/paddle/cinn/frontend/paddle_model_to_program.cc index 7249c35f19d26..b7e512fe18260 100644 --- a/paddle/cinn/frontend/paddle_model_to_program.cc +++ b/paddle/cinn/frontend/paddle_model_to_program.cc @@ -410,39 +410,98 @@ void PaddleModelToProgram::AddOpMapper_relu6() { var_model_to_program_map_[out_name] = out->id; }; } + +template +Variable AddOpMapperDepthwiseConv2dImpl(common::UnknownArch, + T* net_builder, + const paddle::cpp::OpDesc& op_desc, + const Variable& x, + const Variable& y) { + LOG(FATAL) << "NotImplemented."; +} + +template +Variable AddOpMapperDepthwiseConv2dImpl(common::X86Arch, + T* net_builder, + const paddle::cpp::OpDesc& op_desc, + const Variable& x, + const Variable& y) { + CHECK(op_desc.HasAttr("paddings")); + auto paddings = op_desc.GetAttr>("paddings"); + CHECK(op_desc.HasAttr("strides")); + auto strides = op_desc.GetAttr>("strides"); + CHECK(op_desc.HasAttr("dilations")); + auto dilations = op_desc.GetAttr>("dilations"); + CHECK(op_desc.HasAttr("groups")); + auto groups = op_desc.GetAttr("groups"); + CHECK(op_desc.HasAttr("data_format")); + std::string data_format = op_desc.GetAttr("data_format"); + if (data_format == "AnyLayout") { + data_format = "NCHW"; + } + return net_builder->Conv2d( + x, y, strides, paddings, dilations, groups, data_format); +} + +template +Variable AddOpMapperDepthwiseConv2dImpl(common::ARMArch, + T* net_builder, + const paddle::cpp::OpDesc& op_desc, + const Variable& x, + const Variable& y) { + LOG(FATAL) << "NotImplemented."; +} + +template +Variable AddOpMapperDepthwiseConv2dImpl(common::NVGPUArch, + T* net_builder, + const paddle::cpp::OpDesc& op_desc, + const Variable& x, + const Variable& y) { + CHECK(op_desc.HasAttr("paddings")); + auto paddings = op_desc.GetAttr>("paddings"); + CHECK(op_desc.HasAttr("strides")); + auto strides = op_desc.GetAttr>("strides"); + CHECK(op_desc.HasAttr("dilations")); + auto dilations = op_desc.GetAttr>("dilations"); + CHECK(op_desc.HasAttr("groups")); + auto groups = op_desc.GetAttr("groups"); + CHECK(op_desc.HasAttr("data_format")); + std::string data_format = op_desc.GetAttr("data_format"); + if (data_format == "AnyLayout") { + data_format = "NCHW"; + } + Variable out; + return net_builder->DepthwiseConv2d( + x, y, strides, paddings, dilations, groups, data_format); +} + +template +Variable AddOpMapperDepthwiseConv2d(common::Arch arch, + T* net_builder, + const paddle::cpp::OpDesc& op_desc, + const Variable& x, + const Variable& y) { + return std::visit( + [&](const auto& impl) { + return AddOpMapperDepthwiseConv2dImpl(impl, net_builder, op_desc, x, y); + }, + arch.variant()); +} + void PaddleModelToProgram::AddOpMapper_depthwise_conv2d() { op_mappers_["depthwise_conv2d"] = [&](const paddle::cpp::OpDesc& op_desc) { CHECK_EQ(op_desc.Input("Input").size(), 1UL); auto x_name = op_desc.Input("Input").front(); CHECK_EQ(op_desc.Input("Filter").size(), 1UL); auto y_name = op_desc.Input("Filter").front(); - CHECK_EQ(op_desc.Output("Output").size(), 1UL); - auto out_name = op_desc.Output("Output").front(); - - CHECK(op_desc.HasAttr("paddings")); - auto paddings = op_desc.GetAttr>("paddings"); - CHECK(op_desc.HasAttr("strides")); - auto strides = op_desc.GetAttr>("strides"); - CHECK(op_desc.HasAttr("dilations")); - auto dilations = op_desc.GetAttr>("dilations"); - CHECK(op_desc.HasAttr("groups")); - auto groups = op_desc.GetAttr("groups"); - CHECK(op_desc.HasAttr("data_format")); - std::string data_format = op_desc.GetAttr("data_format"); - if (data_format == "AnyLayout") { - data_format = "NCHW"; - } auto x = GetVar(TransValidVarName(x_name)); auto y = GetVar(TransValidVarName(y_name)); - Variable out; - if (target_.arch == Target::Arch::X86) { - out = net_builder_->Conv2d( - x, y, strides, paddings, dilations, groups, data_format); - } else { - out = net_builder_->DepthwiseConv2d( - x, y, strides, paddings, dilations, groups, data_format); - } - + auto* net_builder = net_builder_.get(); + Variable out = + AddOpMapperDepthwiseConv2d(target_.arch, net_builder, op_desc, x, y); + CHECK_EQ(op_desc.Output("Output").size(), 1UL); + auto out_name = op_desc.Output("Output").front(); AddVar(TransValidVarName(out_name), out); var_model_to_program_map_[out_name] = out->id; }; @@ -635,13 +694,13 @@ void PaddleModelToProgram::TransposeVar(const std::string& name) { auto* var = scope_->FindVar(name); if (var) { auto& tensor = absl::get(*var); - if (target_.arch == Target::Arch::X86) { + if (std::holds_alternative(target_.arch)) { float* data = tensor->mutable_data(target_); CHECK(tensor->shape().size() == 2) << "The y data's shape size of op [mul] is not equal to 2! Please " "check."; TransposeData(data, tensor->shape().data()[0], tensor->shape().data()[1]); - } else if (target_.arch == Target::Arch::NVGPU) { + } else if (std::holds_alternative(target_.arch)) { #ifdef CINN_WITH_CUDA // To use cublas mul api, there is no need to transpose data. #ifndef CINN_WITH_CUDNN @@ -691,13 +750,13 @@ void PaddleModelToProgram::ReverseHWVar(const std::string& name) { auto* var = scope_->FindVar(name); if (var) { auto& tensor = absl::get(*var); - if (target_.arch == Target::Arch::X86) { + if (std::holds_alternative(target_.arch)) { float* data = tensor->mutable_data(target_); CHECK(tensor->shape().size() == 4) << "The y data's shape size of op [conv2d] is not equal to 4! Please " "check."; ReverseHWData(data, tensor->shape().data()); - } else if (target_.arch == Target::Arch::NVGPU) { + } else if (std::holds_alternative(target_.arch)) { #ifdef CINN_WITH_CUDA std::vector data(tensor->shape().numel()); CUDA_CALL(cudaMemcpy( diff --git a/paddle/cinn/frontend/pass/gemm_rewriter.cc b/paddle/cinn/frontend/pass/gemm_rewriter.cc index fe178c0b88137..fae47d5e2a9c5 100644 --- a/paddle/cinn/frontend/pass/gemm_rewriter.cc +++ b/paddle/cinn/frontend/pass/gemm_rewriter.cc @@ -40,7 +40,8 @@ class GemmRewriterPass : public ProgramPass { void ApplyImpl(Program* prog, const std::unordered_set& fetch_ids, const cinn::common::Target& target) override { - if (target.arch != Target::Arch::NVGPU || !prog->size()) { + if (!std::holds_alternative(target.arch) || + !prog->size()) { return; } diff --git a/paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.cc b/paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.cc index 0ce1ad6bab5c0..b70ef1d78e6b3 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.cc +++ b/paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.cc @@ -617,6 +617,13 @@ bool MakeGenerateShapeOpAttribute( "they are handled by other passes"; return false; } + // When minimal_inputs is empty, it means that all of out dim_expr must be + // int64. + if (minimal_inputs->empty()) { + for (const auto& dim_expr : out_dim_exprs) { + if (!dim_expr.isa()) return false; + } + } // generate output_dim_expr_attrs ConvertDimExprToAttributes( ir_context, out_dim_exprs, /*out*/ output_dim_expr_attrs); @@ -627,6 +634,26 @@ bool MakeGenerateShapeOpAttribute( *minimal_inputs, symbol_names_in_out_dim_exprs, /*out*/ symbol_bindings); + + // check all dim exprs have symbol binding + for (const auto& symbol_name : symbol_names_in_out_dim_exprs) { + bool has_symbol_binding = false; + for (const auto& symbol_binding : *symbol_bindings) { + const std::string& symbol_binding_name = std::visit( + [&](const auto& symbol_binding) -> const std::string& { + return symbol_binding.symbol_name; + }, + symbol_binding); + if (symbol_name == symbol_binding_name) { + has_symbol_binding = true; + break; + } + } + if (!has_symbol_binding) { + LOG(WARNING) << "no symbol binding found for dim expr: " << symbol_name; + return false; + } + } return true; } diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc index 71f0b9f33f4ec..40008b51a54f2 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc @@ -65,7 +65,6 @@ void GroupOp::Build(pir::Builder& builder, // NOLINT std::unique_ptr&& block) { VLOG(4) << "Start build GroupOp"; if (block && !block->empty()) { - // IR_ENFORCE(block->back().isa()); PADDLE_ENFORCE_EQ(block->back().isa(), true); auto& op = block->back(); for (size_t i = 0; i < op.num_operands(); ++i) { @@ -83,7 +82,10 @@ pir::Block* GroupOp::block() { pir::Block* GroupOp::block() const { pir::Region& region = (*this)->region(0); - CHECK(!region.empty()); + PADDLE_ENFORCE_EQ(region.empty(), + false, + ::common::errors::Unavailable( + "Required GroupOp's region must not be emptpy.")); return ®ion.front(); } @@ -156,7 +158,16 @@ pir::Block* FusionOp::block() { return ®ion.front(); } -std::vector FusionOp::GetOperators() { +pir::Block* FusionOp::block() const { + pir::Region& region = (*this)->region(0); + PADDLE_ENFORCE_EQ(region.empty(), + false, + ::common::errors::Unavailable( + "Required FusionOp's region must not be emptpy.")); + return ®ion.front(); +} + +std::vector FusionOp::GetOperators() const { std::vector rt_ops; for (auto& op : *block()) { rt_ops.push_back(&op); @@ -192,6 +203,13 @@ void YieldStoreOp::Build(pir::Builder& builder, void YieldStoreOp::VerifySig() {} +bool YieldStoreOp::InferSymbolicShape( + pir::ShapeConstraintIRAnalysis* shape_analysis) { + shape_analysis->SetShapeOrDataForValue( + result(0), shape_analysis->GetShapeOrDataForValue(operand_source(0))); + return true; +} + bool ConcatOp::InferSymbolicShape( pir::ShapeConstraintIRAnalysis* shape_analysis) { VLOG(4) << "Infer symbolic shape for cinn_op.concat"; @@ -298,7 +316,9 @@ void GenerateShapeOp::Build( if (inputs.empty()) { VLOG(3) << "GenerateShapeOp inputs is empty"; for (const auto& attr : output_dim_exprs) { - CHECK(attr.isa()); + PADDLE_ENFORCE(attr.isa(), + ::common::errors::PreconditionNotMet( + "Reqiured attr must be Int64Attribute.")); } } argument.AddInputs(inputs); @@ -460,11 +480,15 @@ bool GenerateShapeOp::InferSymbolicShape( const auto attr_dim_exprs = [&] { std::vector dim_exprs{}; pir::Attribute dim_expr_attr = this->attributes().at("output_dim_exprs"); - CHECK(dim_expr_attr.isa()); + PADDLE_ENFORCE(dim_expr_attr.isa(), + ::common::errors::PreconditionNotMet( + "Required dim_expr_attr is ArrayAttribute.")); auto array = dim_expr_attr.dyn_cast(); for (int i = 0; i < array.size(); ++i) { const auto& dim_expr = ConvertAttributeToDimExpr(array.at(i)); - CHECK(dim_expr.has_value()); + PADDLE_ENFORCE(dim_expr.has_value(), + ::common::errors::PreconditionNotMet( + "Required dim_expr.has_value()==true.")); dim_exprs.push_back(dim_expr.value()); } return dim_exprs; @@ -474,7 +498,9 @@ bool GenerateShapeOp::InferSymbolicShape( this->attributes().at("symbol_bindings"); auto symbol_bindings = ConvertAttributeToSymbolBindings(symbol_bindings_attr); - CHECK(symbol_bindings.has_value()); + PADDLE_ENFORCE(symbol_bindings.has_value(), + ::common::errors::PreconditionNotMet( + "Required symbol_bindings.has_value()==true.")); return symbol_bindings.value(); }(); auto DimExprs4InputDim = diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h index d350cbb3d5208..34c53ed2ebe6b 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h @@ -77,7 +77,8 @@ class IR_API FusionOp : public pir::Op { const cinn::dialect::GroupInfo &group_info); pir::Block *block(); - std::vector GetOperators(); + pir::Block *block() const; + std::vector GetOperators() const; void VerifySig(); @@ -86,7 +87,9 @@ class IR_API FusionOp : public pir::Op { // YieldStoreOp represents a store operation for // seperate local variable and ouptut -class IR_API YieldStoreOp : public pir::Op { +class IR_API YieldStoreOp + : public pir::Op { public: using Op::Op; static const char *name() { return "cinn_op.yield_store"; } @@ -98,6 +101,8 @@ class IR_API YieldStoreOp : public pir::Op { pir::Type output_type); void VerifySig(); + + bool InferSymbolicShape(pir::ShapeConstraintIRAnalysis *shape_analysis); }; class IR_API ConcatOp diff --git a/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt b/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt index e329b8886f18b..de3f79a112b6f 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt +++ b/paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt @@ -7,7 +7,7 @@ set(cinn_transforms_deps cinn_op_dialect op_dialect_vjp cinn_runtime_dialect - # group_cluster + op_fusion pir_compiler) cinn_cc_library(cinn_transforms SRCS ${cinn_transforms_srcs} DEPS @@ -16,4 +16,4 @@ cinn_cc_library(cinn_transforms SRCS ${cinn_transforms_srcs} DEPS cc_library( add_cinn_pass SRCS add_cinn_pass.cc - DEPS op_dialect pir cinn_op_dialect cinnapi pir_transforms cinn_transforms) + DEPS pir_transforms cinn_transforms) diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc index 3b6b1adcdbda1..e69b0e7d96bd1 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.cc @@ -29,8 +29,8 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/dynamic_reshape_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/fuse_shape_ops_into_generate_shape_op_pass.h" -#include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_0d_to_1d_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.h" @@ -38,11 +38,9 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/move_generate_shape_ops_to_prologue_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/simplify_dim_expr_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/single_op_fallback_to_phi.h" -#include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/insert_broadcast_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.h" -#include "paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/replace_dynamic_expand_pass.h" #include "paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.h" #include "paddle/fluid/pir/transforms/build_cinn_pass.h" @@ -50,13 +48,16 @@ #include "paddle/fluid/pir/transforms/shape_optimization_pass.h" COMMON_DECLARE_bool(print_ir); -COMMON_DECLARE_bool(check_infer_symbolic); +COMMON_DECLARE_bool(disable_dyshape_in_train); PD_DECLARE_bool(group_schedule_tiling_first); namespace cinn::dialect::ir { namespace { bool HasDynamicShape(const pir::Program& program) { + if (FLAGS_disable_dyshape_in_train) { + return false; + } for (const auto& op : *program.block()) { if (op.isa()) { continue; @@ -92,11 +93,6 @@ void ApplyCinnPreprocessPass( std::shared_ptr pass_manager = CreatePassManager(); bool has_dynamic_shape = HasDynamicShape(*program); - if (!has_dynamic_shape && FLAGS_check_infer_symbolic) { - pass_manager->AddPass(pir::CreateShapeOptimizationPass()); - pass_manager->AddPass(cinn::dialect::ir::CreateCheckInferSymbolicPass()); - } - if (has_dynamic_shape) { pass_manager->AddPass(cinn::dialect::ir::CreateConvert0DTo1DPass()); pass_manager->AddPass(pir::CreateShapeOptimizationPass()); @@ -118,7 +114,7 @@ void ApplyBuildGroupOpPass( if (has_dynamic_shape) { pass_manager->AddPass(pir::CreateShapeOptimizationPass()); } - pass_manager->AddPass(cinn::dialect::ir::CreateRemoveUnchangedReshapePass()); + pass_manager->AddPass(cinn::dialect::ir::CreateFoldManipulationOpsPass()); pass_manager->AddPass(pir::CreateBuildCinnPass()); @@ -134,8 +130,6 @@ void ApplyGroupOpPass(::pir::Program* program, if (HasDynamicShape(*program)) { pass_manager->AddPass(::pir::CreateShapeOptimizationPass()); pass_manager->AddPass(cinn::dialect::ir::CreateInsertBroadcastPass()); - pass_manager->AddPass( - cinn::dialect::ir::CreateSubstituteDimExprBasedOnConstraintsPass()); pass_manager->AddPass(cinn::dialect::ir::CreateSimplifyDimExprPass()); pass_manager->AddPass( cinn::dialect::ir::CreateFuseShapeOpsIntoGenerateShapeOpPass()); @@ -145,7 +139,7 @@ void ApplyGroupOpPass(::pir::Program* program, pass_manager->AddPass(cinn::dialect::ir::CreateDynamicReshapeOpPass()); pass_manager->AddPass(pir::CreateDeadCodeEliminationPass()); - pass_manager->AddPass(cinn::dialect::ir::CreateRemoveUnchangedReshapePass()); + pass_manager->AddPass(cinn::dialect::ir::CreateFoldManipulationOpsPass()); pass_manager->Run(program); } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc index 143f72985a3bf..d66943dfc8bf9 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_store_in_fusion_op_pass.cc @@ -35,42 +35,11 @@ class AddYieldStoreInFusionOpPattern auto& shape_analysis = pir::ShapeAnalysisManager::Instance().Get(op->GetParentProgram()); for (auto i = 0; i < op->num_operands(); ++i) { - if (auto reshape_op = op->operand_source(i) - .defining_op() - ->dyn_cast()) { - if (reshape_op.operand_source(0).defining_op() == nullptr) { - continue; - } - auto pre_name = reshape_op.operand_source(0).defining_op()->name(); - - if (op->operand_source(i).use_count() > 1) { - continue; - } - - if ((pre_name != "cinn_op.reduce_sum") && - (pre_name != "cinn_op.reduce_max")) { - auto store_op = rewriter.Build( - op->operand_source(i).defining_op()->operand_source(0), - op->operand_source(i).type()); - - if (shape_analysis.HasShapeOrDataForValue(reshape_op->result(0))) { - shape_analysis.SetShapeOrDataForValue( - store_op.result(0), - shape_analysis.GetShapeOrDataForValue(reshape_op->result(0))); - } - - op->operand(i).set_source(store_op.result(0)); - if (reshape_op->result(0).use_count() == 0) { - rewriter.EraseOp(reshape_op); - } - continue; - } - } - if (op->operand_source(i).use_count() == 1) { continue; } + rewriter.SetInsertionPointAfter(op->operand_source(i).defining_op()); auto store_op = rewriter.Build( op->operand_source(i), op->operand_source(i).type()); auto orignal_base = op->operand_source(i); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc new file mode 100644 index 0000000000000..d5ec3042186e3 --- /dev/null +++ b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.cc @@ -0,0 +1,339 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.h" + +#include "paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.h" +#include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" +#include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" +#include "paddle/cinn/hlir/dialect/runtime/ir/runtime_dialect.h" +#include "paddle/common/ddim.h" +#include "paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h" +#include "paddle/fluid/pir/dialect/operator/ir/control_flow_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/pir/include/core/builtin_type.h" +#include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h" + +namespace cinn { +namespace dialect { +namespace ir { + +namespace { + +class BlockDimExprsAsserter { + public: + BlockDimExprsAsserter(const DimExprs4ValueT& func, + pir::IrContext* ir_ctx, + pir::Block* block) + : GraphDimExprs4Value(func), + block_(block), + ir_ctx_(ir_ctx), + builder_(ir_ctx, block) {} + + void AssertDimExprs() { + const auto ops = [&] { + std::vector ops; + ops.reserve(block_->size()); + for (auto& op : *block_) { + ops.push_back(&op); + } + return ops; + }(); + for (auto* op : ops) { + if (op->num_regions() == 0) { + AssertDimExprForOutput(op); + } else { + AssertOpRegions(op); + } + } + } + + private: + void AssertOpRegions(const pir::Operation* op) { + for (std::size_t i = 0; i < op->num_regions(); ++i) { + for (auto& block : op->region(i)) { + BlockDimExprsAsserter asserter(GraphDimExprs4Value, ir_ctx_, &block); + asserter.AssertDimExprs(); + } + } + } + + void InitLocalShapeAnalysis(const pir::Operation& op, + pir::ShapeConstraintIRAnalysis* shape_analysis) { + auto VisitEachInputAndDimExprs = [&](const auto& Visit) { + for (int i = 0; i < op.num_operands(); ++i) { + pir::Value input = op.operand_source(i); + const auto& value_dim_exprs = GraphDimExprs4Value(input); + Visit(input, value_dim_exprs); + } + }; + auto NewSymbolReplacedDimExprs = [&](const auto& dim_exprs) { + auto NewSymbolReplaced = [shape_analysis](const auto& dim_expr) { + if (dim_expr.template isa()) return dim_expr; + return symbol::DimExpr(shape_analysis->GetNextSymName()); + }; + std::vector ret; + ret.reserve(dim_exprs.size()); + for (const auto& dim_expr : dim_exprs) { + ret.push_back(NewSymbolReplaced(dim_expr)); + } + return ret; + }; + auto NewSymbolReplacedTensor = + [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { + auto shape = NewSymbolReplacedDimExprs(tensor_shape_or_data.shape()); + const auto& data = tensor_shape_or_data.data(); + if (!data.has_value()) { + return symbol::ShapeOrDataDimExprs( + symbol::TensorShapeOrDataDimExprs(shape)); + } else { + auto replaecd_data = NewSymbolReplacedDimExprs(data.value()); + return symbol::ShapeOrDataDimExprs( + symbol::TensorShapeOrDataDimExprs(shape, replaecd_data)); + } + }; + auto NewSymbolReplacedTensorList = + [&](const symbol::TensorListShapeOrDataDimExprs& shape_or_data_list) { + symbol::TensorListShapeOrDataDimExprs ret; + ret.reserve(shape_or_data_list.size()); + for (auto& shape_or_data : shape_or_data_list) { + const auto& replaced_shape_or_data = + NewSymbolReplacedTensor(shape_or_data); + ret.push_back(replaced_shape_or_data + .dyn_cast()); + } + return symbol::ShapeOrDataDimExprs(ret); + }; + auto GetNewSymbolReplaced = [&](const auto& value_dim_exprs) { + auto patterns = symbol::Overloaded{NewSymbolReplacedTensor, + NewSymbolReplacedTensorList}; + return std::visit(patterns, value_dim_exprs.variant()); + }; + VisitEachInputAndDimExprs([&](auto value, const auto& value_dim_exprs) { + const auto& new_symbol_replaced = GetNewSymbolReplaced(value_dim_exprs); + shape_analysis->SetShapeOrDataForValue(value, new_symbol_replaced); + }); + } + + DimExprs4ValueT MakeOpDimExprs4Value(const pir::Operation* op) { + auto shape_analysis = std::make_shared(); + InitLocalShapeAnalysis(*op, shape_analysis.get()); + + pir::Operation* mut_op = const_cast(op); + auto interface = + mut_op->dyn_cast(); + if (!interface) { + PADDLE_THROW(phi::errors::Unimplemented( + op->name() + " DOES NOT have InferSymbolicShapeInterface!")); + } else { + bool infer_result = interface.InferSymbolicShape(shape_analysis.get()); + PADDLE_ENFORCE_EQ(infer_result, + true, + ::common::errors::PreconditionNotMet( + "InferSymbolicShape for %s failed.", op->name())); + } + return [shape_analysis]( + pir::Value value) -> const symbol::ShapeOrDataDimExprs& { + return shape_analysis->GetShapeOrDataForValue(value); + }; + } + + void AssertDimExprForOutput(pir::Operation* op) { // NOLINT + VLOG(5) << "Add assert for result of [ " << op->name() << " ]"; + if (!op->HasInterface()) { + LOG(INFO) << "skip the checking for [ " << op->name() << " ]"; + return; + } + auto OpDimExprs4Value = MakeOpDimExprs4Value(op); + const auto& inputs = [&] { + std::vector inputs; + inputs.reserve(op->num_operands()); + for (int i = 0; i < op->num_operands(); ++i) { + const auto& input = op->operand_source(i); + if (input.type().isa()) { + return std::vector{}; + } + inputs.push_back(input); + } + return inputs; + }(); + if (inputs.empty()) return; + builder_.SetInsertionPointAfter(op); + for (std::size_t i = 0; i < op->num_results(); ++i) { + pir::Value output = op->result(i); + const auto& shape_or_data_dim_expr = GraphDimExprs4Value(output); + if (!shape_or_data_dim_expr.isa()) + continue; + if (shape_or_data_dim_expr.data().has_value()) { + TryAssertDimExprsForOutputData(inputs, output, OpDimExprs4Value); + } else { + TryAssertDimExprsForOutputShape(inputs, output, OpDimExprs4Value); + } + } + } + + void TryAssertDimExprsForOutputShape( + const std::vector& inputs, + pir::Value output, + const DimExprs4ValueT& OpDimExprs4Value) { + if (!::common::contain_unknown_dim( + output.type() + .dyn_cast() + .dims())) { + return; + } + auto opt_shape_tensor_from_dim_exprs = + BuildShapeTensorFromShapeDimExprs(inputs, output, OpDimExprs4Value); + if (!opt_shape_tensor_from_dim_exprs.has_value()) return; + const auto& shape_tensor_from_dim_exprs = + opt_shape_tensor_from_dim_exprs.value(); + auto shape_tensor_from_infer_meta = BuildShapeTensorFromInferMeta(output); + AddAssertEqual(shape_tensor_from_dim_exprs, shape_tensor_from_infer_meta); + } + + std::optional BuildShapeTensorFromShapeDimExprs( + const std::vector& inputs, + pir::Value output, + const DimExprs4ValueT& OpDimExprs4Value) { + const auto& shape_or_data = GraphDimExprs4Value(output); + const auto& dim_exprs = shape_or_data.shape(); + return BuildShapeTensorFromDimExprs(inputs, dim_exprs, OpDimExprs4Value); + } + + std::optional BuildShapeTensorFromDataDimExprs( + const std::vector& inputs, + pir::Value output, + const DimExprs4ValueT& OpDimExprs4Value) { + const auto& shape_or_data = GraphDimExprs4Value(output); + const auto& dim_exprs = shape_or_data.data(); + if (!dim_exprs.has_value()) return std::nullopt; + return BuildShapeTensorFromDimExprs( + inputs, dim_exprs.value(), OpDimExprs4Value); + } + + std::optional BuildShapeTensorFromDimExprs( + const std::vector& inputs, + const std::vector& dim_exprs, + const DimExprs4ValueT& OpDimExprs4Value) { + const auto& LocalDimExprs4Value = + [&](pir::Value value) -> const symbol::ShapeOrDataDimExprs& { + return OpDimExprs4Value(value); + }; + std::vector input_tensors{}; + std::vector output_dim_expr_attrs{}; + GenerateShapeOp::SymbolBindings symbol_bindings{}; + bool success = + MakeGenerateShapeOpAttribute(ir_ctx_, + LocalDimExprs4Value, + dim_exprs, + /*origin inputs*/ inputs, + /*minimal inputs*/ &input_tensors, + &output_dim_expr_attrs, + &symbol_bindings); + if (!success) return std::nullopt; + auto out_shape_value = + builder_ + .Build( + input_tensors, output_dim_expr_attrs, symbol_bindings) + .out(); + return builder_ + .Build(out_shape_value, phi::DataType::INT32) + .out(); + } + + pir::Value BuildShapeTensorFromInferMeta(pir::Value output) { + return builder_.Build(output).out(); + } + + void TryAssertDimExprsForOutputData(const std::vector& inputs, + pir::Value output, + const DimExprs4ValueT& OpDimExprs4Value) { + auto opt_shape_tensor_from_dim_exprs = + BuildShapeTensorFromDataDimExprs(inputs, output, OpDimExprs4Value); + if (!opt_shape_tensor_from_dim_exprs.has_value()) return; + AddAssertEqual(opt_shape_tensor_from_dim_exprs.value(), output); + } + + size_t GetNumel(pir::Value value) { + const auto& dims = value.type().dyn_cast().dims(); + int64_t numel = ::common::product(dims); + PADDLE_ENFORCE_GE( + numel, + 0, + ::common::errors::InvalidArgument( + "The numel of value must be >= 0, but received numel is %d.", + numel)); + return numel; + } + + void AddAssertEqual(pir::Value lhs, pir::Value rhs) { + size_t lhs_numel = GetNumel(lhs); + size_t rhs_numel = GetNumel(rhs); + PADDLE_ENFORCE_EQ(lhs_numel, + rhs_numel, + ::common::errors::InvalidArgument( + "The numel of lhs and rhs must be equal, but " + "received lhs's numel is [%d], rhs's numel is [%d]", + lhs_numel, + rhs_numel)); + pir::Value lhs_eq_rhs = + builder_.Build(lhs, rhs).out(); + pir::Value all_eq = + builder_.Build(lhs_eq_rhs).out(); + builder_.Build(all_eq, lhs_eq_rhs, lhs_numel); + } + + DimExprs4ValueT GraphDimExprs4Value; + pir::IrContext* ir_ctx_; + pir::Block* block_; + pir::Builder builder_; +}; + +class CheckInferSymbolicPass : public pir::Pass { + public: + explicit CheckInferSymbolicPass(const DimExprs4ValueT& func) + : pir::Pass("check_infer_symbolic", 1), GraphDimExprs4Value(func) {} + + void Run(pir::Operation* op) override { + for (uint32_t i = 0; i < op->num_regions(); ++i) { + for (auto& block : op->region(i)) { + auto* ir_ctx = pir::IrContext::Instance(); + BlockDimExprsAsserter asserter(GraphDimExprs4Value, ir_ctx, &block); + asserter.AssertDimExprs(); + } + } + } + + bool CanApplyOn(pir::Operation* op) const override { + return op->isa() && op->num_regions() > 0; + } + + private: + DimExprs4ValueT GraphDimExprs4Value; +}; + +} // namespace + +std::unique_ptr<::pir::Pass> CreateCheckInferSymbolicPass( + const DimExprs4ValueT& GraphDimExprs4Value) { + return std::make_unique(GraphDimExprs4Value); +} + +} // namespace ir +} // namespace dialect +} // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.h b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.h similarity index 70% rename from paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.h rename to paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.h index 30c0dd7b6a7b6..527632d1c1008 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.h @@ -14,15 +14,21 @@ #pragma once +#include +#include +#include +#include "paddle/pir/include/dialect/shape/utils/shape_or_data_expr.h" #include "paddle/pir/include/pass/pass.h" namespace cinn { namespace dialect { namespace ir { -// This is a helper pass for substituting DimExpr based on the -// constraints symbol::Equal. -std::unique_ptr<::pir::Pass> CreateSubstituteDimExprBasedOnConstraintsPass(); +using DimExprs4ValueT = + std::function; +std::unique_ptr<::pir::Pass> CreateCheckInferSymbolicPass( + const DimExprs4ValueT& OptDimExprs4Value); + } // namespace ir } // namespace dialect } // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.cc b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.cc new file mode 100644 index 0000000000000..ff9b9dcd07d9c --- /dev/null +++ b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h" +#include +#include +#include +#include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.h" +#include "paddle/common/flags.h" +#include "paddle/fluid/pir/transforms/shape_optimization_pass.h" + +COMMON_DECLARE_bool(check_infer_symbolic); +PD_DECLARE_bool(prim_all); + +namespace cinn { +namespace dialect { +namespace ir { + +namespace { + +DimExprs4ValueT MakeDimExprs4Value( + pir::Program* program, const PassManagerCreater& CreatePassManager) { + std::shared_ptr pass_manager = CreatePassManager(); + pass_manager->AddPass(pir::CreateShapeOptimizationPass()); + pass_manager->Run(program); + const auto* shape_analysis = + &pir::ShapeAnalysisManager::Instance().Get(program); + return + [shape_analysis](pir::Value value) -> const symbol::ShapeOrDataDimExprs& { + return shape_analysis->GetShapeOrDataForValue(value); + }; +} + +} // namespace + +void CheckInferSymbolicIfNeed(pir::Program* program, + const PassManagerCreater& CreatePassManager) { + if (!FLAGS_prim_all || !FLAGS_check_infer_symbolic) return; + const auto& GraphDimExprs4Value = + MakeDimExprs4Value(program, CreatePassManager); + std::shared_ptr pass_manager = CreatePassManager(); + pass_manager->AddPass(CreateCheckInferSymbolicPass(GraphDimExprs4Value)); + pass_manager->AddPass(CreateSplitGenerateShapeIntoShapeOpsPass()); + pass_manager->Run(program); +} + +} // namespace ir +} // namespace dialect +} // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h similarity index 74% rename from paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h rename to paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h index 825efa23eedf6..d61dd2c6d27f3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h @@ -14,16 +14,21 @@ #pragma once +#include #include #include #include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_manager.h" namespace cinn { namespace dialect { namespace ir { -// This is a helper pass for checking the symbolic inference accuracy. -std::unique_ptr<::pir::Pass> CreateCheckInferSymbolicPass(); +using PassManagerCreater = std::function()>; + +void CheckInferSymbolicIfNeed(pir::Program* program, + const PassManagerCreater& CreatePassManager); + } // namespace ir } // namespace dialect } // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc index 2b8926bca6e60..606d07fd59826 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc @@ -33,6 +33,7 @@ #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h" #include "paddle/cinn/hlir/framework/pir/utils.h" +#include "paddle/cinn/operator_fusion/group_cluster.h" #include "paddle/common/ddim.h" #include "paddle/common/flags.h" #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" @@ -48,8 +49,7 @@ #include "paddle/pir/include/pattern_rewrite/pattern_match.h" #include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h" -// #include "paddle/cinn/frontend/group_cluster/group_cluster.h" -// PD_DECLARE_bool(cinn_new_cluster_op_method); +PD_DECLARE_bool(cinn_new_cluster_op_method); namespace cinn { namespace dialect { @@ -249,7 +249,6 @@ std::vector<::pir::Value> GenerateOutputValue( if (outside_need_value.count(op->result(i))) { if (!inserted_val.count(op->result(i))) { temp_out.push_back(op->result(i)); - inserted_val.insert(op->result(i)); } } @@ -835,30 +834,39 @@ std::vector NodeMergeWithNode( return second_stage_output; } -// std::vector NewOpMergeWithOp( -// cinn::dialect::GroupOp group_op) { -// const auto cluster_result = frontend::ClusterOps(group_op); - -// // Each stmts corresponds to each fusion op(cluster node). -// // Concat all the ops of patterns in the stmts, and make them the op list -// of -// // cluster node. -// VLOG(4) << "Start Creating Cluster Nodes!"; -// std::vector output_cluster_nodes; -// for (const auto& op_set : cluster_result) { -// GroupClusterNode cluster_node; -// for (const auto* op : op_set) { -// cluster_node.ops.push_back(const_cast(op)); -// auto op_kind = cinn::hlir::framework::pir::CompatibleInfo::OpKind(*op); -// cluster_node.group_kind = -// cluster_node.group_kind > op_kind ? cluster_node.group_kind : -// op_kind; -// } -// output_cluster_nodes.push_back(cluster_node); -// } -// VLOG(4) << "Finished Creating Cluster Nodes!"; -// return output_cluster_nodes; -// } +std::vector NewOpMergeWithOp( + cinn::dialect::GroupOp group_op) { + std::function func = + [](pir::Operation* op) { return cinn::fusion::FrontendContent(op); }; + const auto& contents = cinn::fusion::MapVector(group_op.GetOperators(), func); + auto cluster_result = cinn::fusion::ClusterOps(contents); + std::vector> result; + std::transform( + cluster_result.begin(), + cluster_result.end(), + std::back_inserter(result), + [](const cinn::fusion::PatternNodePtr node) { + return cinn::fusion::GetOpsInPattern(node->stmt_pattern_); + }); + + // Each stmts corresponds to each fusion op(cluster node). + // Concat all the ops of patterns in the stmts, and make them the op list of + // cluster node. + VLOG(4) << "Start Creating Cluster Nodes!"; + std::vector output_cluster_nodes; + for (const auto& op_set : result) { + GroupClusterNode cluster_node; + for (const auto* op : op_set) { + cluster_node.ops.push_back(const_cast(op)); + auto op_kind = cinn::hlir::framework::pir::CompatibleInfo::OpKind(*op); + cluster_node.group_kind = + cluster_node.group_kind > op_kind ? cluster_node.group_kind : op_kind; + } + output_cluster_nodes.push_back(cluster_node); + } + VLOG(4) << "Finished Creating Cluster Nodes!"; + return output_cluster_nodes; +} std::vector OpMergeWithOp(cinn::dialect::GroupOp group_op) { // op merge with op @@ -926,9 +934,9 @@ std::vector OpMergeWithOp(cinn::dialect::GroupOp group_op) { std::vector GroupSplit(cinn::dialect::GroupOp group_op) { // stage 1 - // if (FLAGS_cinn_new_cluster_op_method) { - // return NewOpMergeWithOp(group_op); - // } + if (FLAGS_cinn_new_cluster_op_method) { + return NewOpMergeWithOp(group_op); + } auto first_stage_output = OpMergeWithOp(group_op); @@ -1044,14 +1052,12 @@ class CinnGroupClusterPattern // update ir mapping for (size_t i = 0; i < output_values.size(); ++i) { ir_mapping.Add(output_values[i], new_group_op->result(i)); - if (shape_analysis.HasShapeOrDataForValue(output_values[i])) { shape_analysis.SetShapeOrDataForValue( new_group_op->result(i), shape_analysis.GetShapeOrDataForValue(output_values[i])); } } - for (size_t i = 0; i < output_values.size(); ++i) { auto find_it = all_output_values.find(output_values[i]); if ((find_it != all_output_values.end()) && @@ -1062,6 +1068,7 @@ class CinnGroupClusterPattern } } } + rewriter.EraseOp(group_op); return true; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc similarity index 69% rename from paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc rename to paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc index a2c09cc14a8dc..bbd79947314d2 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h" #include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" #include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h" @@ -75,7 +75,7 @@ bool RemoveOp(pir::Operation* op, pir::PatternRewriter* rewriter) { } template -class RemoveUnchangedReshapePattern : public pir::OpRewritePattern { +class RemoveUnchangedOpPattern : public pir::OpRewritePattern { public: using pir::OpRewritePattern::OpRewritePattern; @@ -85,18 +85,19 @@ class RemoveUnchangedReshapePattern : public pir::OpRewritePattern { } }; -class MergeReshapePattern - : public pir::OpRewritePattern { +template +class MergeRedundantOpPattern : public pir::OpRewritePattern { public: - using pir::OpRewritePattern::OpRewritePattern; + using pir::OpRewritePattern::OpRewritePattern; - bool MatchAndRewrite(cinn::dialect::ReshapeOp op, + bool MatchAndRewrite(OPTYPE op, pir::PatternRewriter& rewriter) const override { - if (auto pre_shape = op->operand_source(0) - .defining_op() - ->dyn_cast()) { - op->operand(0).set_source(pre_shape->operand_source(0)); - + if (auto pre_op = (op->operand_source(0).defining_op()) + ->template dyn_cast()) { + op->operand(0).set_source(pre_op->operand_source(0)); + if (pre_op->use_empty()) { + rewriter.EraseOp(pre_op); + } return true; } @@ -104,18 +105,24 @@ class MergeReshapePattern } }; -class RemoveUnchangedReshapePass : public pir::PatternRewritePass { +class FoldManipulationOpsPass : public pir::PatternRewritePass { public: - RemoveUnchangedReshapePass() - : pir::PatternRewritePass("remove_unchanged_reshape_pass", 1) {} + FoldManipulationOpsPass() + : pir::PatternRewritePass("fold_manipulation_ops_pass", 1) {} pir::RewritePatternSet InitializePatterns(pir::IrContext* context) override { pir::RewritePatternSet ps(context); - // remove out_shape equal in_shape reshape op - ps.Add>(context); - ps.Add>(context); - ps.Add(context); + // remove out_shape equal in_shape ops + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + // merge redundant ops + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); + ps.Add>(context); ps.Add(context); return ps; @@ -126,13 +133,12 @@ class RemoveUnchangedReshapePass : public pir::PatternRewritePass { } }; -std::unique_ptr CreateRemoveUnchangedReshapePass() { - return std::make_unique(); +std::unique_ptr CreateFoldManipulationOpsPass() { + return std::make_unique(); } - } // namespace ir } // namespace dialect } // namespace cinn -REGISTER_IR_PASS(remove_unchanged_reshape_pass, - ::cinn::dialect::ir::RemoveUnchangedReshapePass); +REGISTER_IR_PASS(fold_manipulation_ops_pass, + ::cinn::dialect::ir::FoldManipulationOpsPass); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h similarity index 93% rename from paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h rename to paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h index ef75306748af2..239ba863389f7 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/remove_unchanged_reshape_pass.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/fold_manipulation_ops_pass.h @@ -21,7 +21,7 @@ namespace cinn { namespace dialect { namespace ir { -std::unique_ptr CreateRemoveUnchangedReshapePass(); +std::unique_ptr CreateFoldManipulationOpsPass(); } // namespace ir } // namespace dialect } // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.cc deleted file mode 100644 index 953e268b27a80..0000000000000 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.cc +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/check_infer_symbolic_pass.h" - -#include "paddle/cinn/hlir/dialect/operator/ir/generate_shape_util.h" -#include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" -#include "paddle/cinn/hlir/dialect/runtime/ir/runtime_dialect.h" -#include "paddle/cinn/runtime/flags.h" -#include "paddle/common/flags.h" -#include "paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h" -#include "paddle/pir/include/core/builtin_type.h" - -namespace cinn { -namespace dialect { -namespace ir { - -namespace { - -std::string SprintShape(const std::vector>& shapes) { - std::string str; - for (int i = 0; i < shapes.size(); i++) { - str += "["; - for (int j = 0; j < shapes[i].size(); j++) { - str += std::to_string(shapes[i][j]); - if (j != shapes[i].size() - 1) { - str += ", "; - } - } - str += "]"; - if (i != shapes.size() - 1) { - str += ", "; - } - } - return str; -} - -void PrintProgram(pir::ModuleOp m, const std::string& mgs) { - std::ostringstream print_stream; - print_stream << "\n\n"; - m.program()->Print(print_stream); - print_stream << "\n\n"; - VLOG(4) << "===================== " << mgs << " =====================\n" - << print_stream.str(); -} - -std::vector> GetStaticValueShape(pir::Value value) { - std::vector> static_shape; - if (const pir::DenseTensorType& dense_tensor = - value.type().dyn_cast<::pir::DenseTensorType>()) { - static_shape.push_back(::common::vectorize(dense_tensor.dims())); - } else if (const pir::VectorType vector_tensor = - value.type().dyn_cast<::pir::VectorType>()) { - for (size_t i = 0; i < vector_tensor.size(); i++) { - if (vector_tensor[i].isa()) { - const pir::DenseTensorType& dense_tensor = - vector_tensor[i].dyn_cast<::pir::DenseTensorType>(); - static_shape.push_back(::common::vectorize(dense_tensor.dims())); - } - } - } else { - IR_THROW("error:the value doesn't have DenseTensorType"); - } - return static_shape; -} - -std::vector GetShapeFromTensor( - const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - std::vector dynamic_shape; - for (const auto& dim_expr_shape : tensor_shape_or_data.shape()) { - CHECK(dim_expr_shape.Has()); - dynamic_shape.push_back(dim_expr_shape.Get()); - } - return dynamic_shape; -} - -std::vector> GetDynamicValueShape( - pir::Value value, const pir::ShapeConstraintIRAnalysis& shape_analysis) { - std::vector> dynamic_shapes; - if (!shape_analysis.HasShapeOrDataForValue(value)) { - return dynamic_shapes; - } - symbol::ShapeOrDataDimExprs shape_or_data = - shape_analysis.GetShapeOrDataForValue(value); - auto lambdas = symbol::Overloaded{ - [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - dynamic_shapes.push_back(GetShapeFromTensor(tensor_shape_or_data)); - }, - [&](const symbol::TensorListShapeOrDataDimExprs& tensor_list) { - for (const auto& tensor_shape_or_data : tensor_list) { - dynamic_shapes.push_back(GetShapeFromTensor(tensor_shape_or_data)); - } - }}; - std::visit(lambdas, shape_or_data.variant()); - return dynamic_shapes; -} - -void CompareStaticAndDynamicValueShape( - pir::Value value, - const pir::ShapeConstraintIRAnalysis& shape_analysis, - int op_index, - pir::ModuleOp module_op) { - std::vector> static_value_shape = - GetStaticValueShape(value); - std::vector> dynamic_value_shape = - GetDynamicValueShape(value, shape_analysis); - if (static_value_shape != dynamic_value_shape) { - VLOG(4) << "CheckInferSymbolic failed, in the following program, the " - << op_index - << "th op : the shape is not equal\nthe static shape is: " - << SprintShape(static_value_shape) << ", and the dynamic shape is: " - << SprintShape(dynamic_value_shape); - PrintProgram(module_op, "CheckInferSymbolic"); - } -} - -void CheckInferSymbolic(pir::ModuleOp module_op) { - VLOG(4) << "CheckInferSymbolic start"; - int op_index = 0; - const auto& shape_analysis = - pir::ShapeAnalysisManager::Instance().Get(module_op.program()); - for (uint32_t i = 0; i < module_op->num_regions(); i++) { - for (const auto& block : module_op->region(i)) { - for (const auto& op : block) { - for (std::size_t j = 0; j < op.num_operands(); ++j) { - CompareStaticAndDynamicValueShape( - op.operand_source(j), shape_analysis, op_index, module_op); - } - for (std::size_t j = 0; j < op.num_results(); ++j) { - CompareStaticAndDynamicValueShape( - op.result(j), shape_analysis, op_index, module_op); - } - op_index++; - } - } - } - VLOG(4) << "CheckInferSymbolic end"; -} - -class CheckInferSymbolicPass : public pir::Pass { - public: - CheckInferSymbolicPass() : pir::Pass("check_infer_symbolic_pass", 1) {} - - void Run(pir::Operation* op) override { - pir::ModuleOp module_op = op->dyn_cast(); - CheckInferSymbolic(module_op); - } - - bool CanApplyOn(pir::Operation* op) const override { - return op->isa() && op->num_regions() > 0; - } -}; - -} // namespace - -std::unique_ptr<::pir::Pass> CreateCheckInferSymbolicPass() { - return std::make_unique(); -} - -} // namespace ir -} // namespace dialect -} // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc index d1550a2bdf257..72219287fe3e3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_dynamic_to_static_dim_pass.cc @@ -181,10 +181,23 @@ class DynamicToStaticConverter { CHECK(shape_analysis_->HasShapeOrDataForValue(value)); const auto& origin_shape = GetOriginValueShape(value); const auto& target_shape = GetTargetValueShape(value); - CHECK_EQ(origin_shape.size(), target_shape.size()); + PADDLE_ENFORCE_EQ( + origin_shape.size(), + target_shape.size(), + phi::errors::InvalidArgument( + "The size of origin shape and target shape is not equal," + "where the size of origin shape:%d but the size of target " + "shape:%d.", + origin_shape.size(), + target_shape.size())); for (std::size_t i = 0; i < origin_shape.size(); ++i) { if (origin_shape.at(i) == -1) { - CHECK_GT(target_shape.at(i), 0); + PADDLE_ENFORCE_GT(target_shape.at(i), + 0, + phi::errors::InvalidArgument( + "The size of target shape is incorrect." + "Expected size is larger than 0, but receive %d.", + target_shape.at(i))); update = true; } else { CHECK(origin_shape.at(i) == target_shape.at(i)); diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc index e67cb5aacabfa..e20cab270cdd3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/convert_static_dim_to_dynamic_pass.cc @@ -154,7 +154,15 @@ struct StaticDimToDynamicConverter { const auto& origin_shape = GetOriginValueShape(value); const auto& target_shape = GetTargetValueShape( shape_analysis->GetShapeOrDataForValue(value).shape()); - CHECK_EQ(origin_shape.size(), target_shape.size()); + PADDLE_ENFORCE_EQ( + origin_shape.size(), + target_shape.size(), + phi::errors::InvalidArgument( + "The size of origin shape and target shape is not equal," + "where the size of origin shape:%d but the size of target " + "shape:%d.", + origin_shape.size(), + target_shape.size())); const auto& origin_type = value.type().dyn_cast<::pir::DenseTensorType>(); pir::DenseTensorType target_type = pir::DenseTensorType::get(pir::IrContext::Instance(), diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc index 79b8a70d28acc..1b0519938c933 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc @@ -1941,7 +1941,14 @@ class GeneralFusionMergePassHelper { } } - CHECK_GE(producer->consumer_groups().size(), candidates.size()); + PADDLE_ENFORCE_GE( + producer->consumer_groups().size(), + candidates.size(), + phi::errors::InvalidArgument( + "The size of producer consumer groups is incorrect." + "Expected size is greater than or equal to %d, but receive %d.", + candidates.size(), + producer->consumer_groups().size())); if (producer->consumer_groups().size() == 0 && candidates.size() == 0 && output_ops_set_.count(producer->CollectOps()[0]) == 0) { producer->belong_groups.insert(*fusionable_consumers->begin()); @@ -2204,8 +2211,24 @@ class GeneralFusionMergePassHelper { CHECK(consumer->belong_groups.size()); consumers.insert(*consumer->belong_groups.begin()); } - CHECK_EQ(group->producer_groups().size(), producers.size()); - CHECK_EQ(group->consumer_groups().size(), consumers.size()); + PADDLE_ENFORCE_EQ( + group->producer_groups().size(), + producers.size(), + phi::errors::InvalidArgument( + "The size of group's producer groups and producers is not equal," + "where the size of group's producer groups:%d but the size of " + "producers:%d.", + group->producer_groups().size(), + producers.size())); + PADDLE_ENFORCE_EQ( + group->consumer_groups().size(), + consumers.size(), + phi::errors::InvalidArgument( + "The size of group's consumer groups and consumers is not equal," + "where the size of group's consumer groups:%d but the size of " + "consumers:%d.", + group->consumer_groups().size(), + consumers.size())); (*group->mut_producer_groups()) = producers; (*group->mut_consumer_groups()) = consumers; } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc deleted file mode 100644 index 97570459eebc1..0000000000000 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.cc +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/cinn/hlir/dialect/operator/transforms/group_merge/substitute_dim_expr_based_on_constraints_pass.h" - -#include "paddle/cinn/common/union_find.h" -#include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" -#include "paddle/pir/include/dialect/shape/ir/shape_attribute.h" -#include "paddle/pir/include/dialect/shape/utils/dim_expr_util.h" - -namespace cinn { -namespace dialect { -namespace ir { - -namespace { - -template -void VisitEachOp(pir::Operation* op, const DoEachT& DoEach) { - DoEach(op); - for (auto& region : *op) { - for (auto& block : region) { - for (auto& op_in_block : block) { - DoEach(&op_in_block); - } - } - } -} - -template -void VisitEachValue(const pir::Operation* op, const DoEachT& DoEach) { - for (std::size_t i = 0; i < op->num_operands(); ++i) { - DoEach(op->operand_source(i)); - } - for (std::size_t i = 0; i < op->num_results(); ++i) { - DoEach(op->result(i)); - } -} - -symbol::TensorShapeOrDataDimExprs SubstituteTensorShapeOrData( - const symbol::TensorShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& - substitution_pattern) { - auto SubstituteOneDimExpr = - [](const std::vector& original_dim_expr, - const std::unordered_map& - substitution_pattern) -> std::vector { - std::vector substituted_dim_expr{}; - for (const symbol::DimExpr& dim_expr : original_dim_expr) { - const auto& tmp_dim_expr = - symbol::SubstituteDimExpr(dim_expr, substitution_pattern); - substituted_dim_expr.push_back(symbol::SimplifyDimExpr(tmp_dim_expr)); - } - return substituted_dim_expr; - }; - - std::vector substituted_shape = - SubstituteOneDimExpr(shape_or_data.shape(), substitution_pattern); - if (!shape_or_data.data().has_value()) { - return symbol::ShapeOrData(substituted_shape); - } else { - std::vector substituted_data = SubstituteOneDimExpr( - shape_or_data.data().value(), substitution_pattern); - return symbol::ShapeOrData(substituted_shape, - substituted_data); - } -} - -symbol::ShapeOrDataDimExprs SubstituteShapeOrData( - const symbol::ShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& - substitution_pattern) { - auto lambdas = symbol::Overloaded{ - [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - return symbol::ShapeOrDataDimExprs(SubstituteTensorShapeOrData( - tensor_shape_or_data, substitution_pattern)); - }, - [&](const symbol::TensorListShapeOrDataDimExprs& tensor_list) { - symbol::TensorListShapeOrDataDimExprs substituted_tensor_list; - for (symbol::TensorShapeOrDataDimExprs tensor_shape_or_data : - tensor_list) { - substituted_tensor_list.push_back(SubstituteTensorShapeOrData( - tensor_shape_or_data, substitution_pattern)); - } - return symbol::ShapeOrDataDimExprs(substituted_tensor_list); - }}; - return std::visit(lambdas, shape_or_data.variant()); -} - -int GetDimExprPriority(const symbol::DimExpr& dim_expr) { - return std::visit( - symbol::Overloaded{ - [&](std::int64_t) { return 0; }, - [&](const std::string&) { return 1; }, - [&](const symbol::Negative&) { return 2; }, - [&](const symbol::Reciprocal&) { return 2; }, - [&](const symbol::Add&) { return 2; }, - [&](const symbol::Mul&) { return 2; }, - [&](const symbol::Max&) { return 2; }, - [&](const symbol::Min&) { return 2; }, - [&](const symbol::Broadcast&) { return 2; }, - }, - dim_expr.variant()); -} - -std::unordered_map GetDimExprSubstitution( - pir::ShapeConstraintIRAnalysis* shape_analysis) { - const std::vector& dim_expr_constraints = - shape_analysis->DimExprBuilder().constraints(); - const cinn::common::UnionFindSet& union_find_set = [&]() { - cinn::common::UnionFindSet union_find_set; - for (const auto& constraint : dim_expr_constraints) { - CHECK(std::holds_alternative>(constraint)) - << "The DimExprConstraint type is no Equal, this part is to " - "be completed."; - const auto& data = - std::get>(constraint).data; - union_find_set.Union(data->lhs, data->rhs); - } - return union_find_set; - }(); - - const std::vector>& dim_expr_clusters = - union_find_set.Clusters(); - std::unordered_map substitution_pattern; - for (const auto& dim_expr_cluster : dim_expr_clusters) { - CHECK(!dim_expr_cluster.empty()); - auto dim_expr_root = dim_expr_cluster[0]; - for (const auto& dim_expr : dim_expr_cluster) { - if (GetDimExprPriority(dim_expr) < GetDimExprPriority(dim_expr_root)) { - dim_expr_root = dim_expr; - } - } - for (const auto& dim_expr : dim_expr_cluster) { - if (dim_expr != dim_expr_root) { - substitution_pattern[dim_expr] = dim_expr_root; - } - } - } - return substitution_pattern; -} - -void SubstituteDimExprBasedOnConstraints(pir::Operation* region_op) { - VLOG(4) << "SubstituteDimExprBasedOnConstraints start"; - pir::ShapeConstraintIRAnalysis* shape_analysis = - &pir::ShapeAnalysisManager::Instance().Get(region_op->GetParentProgram()); - const std::unordered_map& - substitution_pattern = GetDimExprSubstitution(shape_analysis); - - VisitEachOp(region_op, [&](pir::Operation* op) { - VisitEachValue(op, [&](pir::Value value) { - if (!shape_analysis->HasShapeOrDataForValue(value)) { - VLOG(4) << "Can not find ShapeOrData for value of op(" << op->name() - << ") in shape_analysis"; - } else { - const symbol::ShapeOrDataDimExprs& origin_shape_or_data = - shape_analysis->GetShapeOrDataForValue(value); - VLOG(8) << op->name() - << " origin_shape_or_data: " << origin_shape_or_data; - const symbol::ShapeOrDataDimExprs& substituted_shape_or_data = - SubstituteShapeOrData(origin_shape_or_data, substitution_pattern); - VLOG(8) << op->name() - << " substituted_shape_or_data: " << substituted_shape_or_data; - shape_analysis->SetShapeOrDataForValue(value, - substituted_shape_or_data); - } - }); - if (op->num_regions() > 0) { - return; - } - if (op->num_results() > 0) { - pir::shape::SetShapeAttrForOp( - op, shape_analysis->GetShapeOrDataForValue(op->result(0))); - } else { - pir::shape::SetShapeAttrForOp( - op, shape_analysis->GetShapeOrDataForValue(op->operand_source(0))); - } - }); - VLOG(4) << "SubstituteDimExprBasedOnConstraints end"; -} - -class SubstituteDimExprBasedOnConstraintsPass : public pir::Pass { - public: - SubstituteDimExprBasedOnConstraintsPass() - : pir::Pass("substitute_dim_expr_based_on_constraints_pass", 1) {} - - void Run(pir::Operation* op) override { - SubstituteDimExprBasedOnConstraints(op); - } - - bool CanApplyOn(pir::Operation* op) const override { - return op->num_regions() > 0; - } -}; - -} // namespace - -std::unique_ptr<::pir::Pass> CreateSubstituteDimExprBasedOnConstraintsPass() { - return std::make_unique(); -} - -} // namespace ir -} // namespace dialect -} // namespace cinn diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc index 7a8615ad2ef97..22917b41d5b1c 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/broadcast_with_cf.cc @@ -53,7 +53,6 @@ static bool SameInputOutputShape( } void CompileGroupToJitKernelOp( - const std::vector& group_inputs, pir::PatternRewriter& rewriter, // NOLINT std::unordered_map* group_map) { // prepare attribute for jit_kernel_op @@ -73,6 +72,7 @@ void CompileGroupToJitKernelOp( auto& yield_op = block->back(); CHECK(yield_op.isa()) << "Last op of block should be yield"; rewriter.set_insertion_point(&yield_op); + const auto& group_inputs = GetBlockOutsideInput(group->ops()); auto jit_kernel_op = rewriter.Build( group_inputs, op_attr_map.at(group), output_types); CHECK(jit_kernel_op.num_results() == group_output_values.size()); @@ -108,11 +108,12 @@ void UpdateGroupShapeExprs( const auto& origin_shape_or_data = origin_group->GetShapeOrDataExprs(origin_val); if (origin_shape_or_data.data()) { + std::vector shape_dim_expr_shape = { + symbol::DimExpr(static_cast(shape_dim_expr.size()))}; new_group->SetShapeOrDataExprs( new_val, symbol::ShapeOrDataDimExprs{symbol::TensorShapeOrDataDimExprs( - std::vector{shape_dim_expr.size()}, - shape_dim_expr)}); + shape_dim_expr_shape, shape_dim_expr)}); } else { new_group->SetShapeOrDataExprs( new_val, @@ -134,7 +135,9 @@ bool EraseOneExpand( if (!SameInputOutputShape(expand, ShapeOrDataDimExprs4Value)) continue; auto generate_shape_op = expand.shape().defining_op(); - CHECK_NOTNULL(generate_shape_op); + PADDLE_ENFORCE_NOT_NULL(generate_shape_op, + phi::errors::PreconditionNotMet( + "The generate shape op must not be null.")); rewriter.ReplaceAllUsesWith(expand.out(), expand.x()); rewriter.EraseOp(expand); if (generate_shape_op->use_empty()) { @@ -280,7 +283,15 @@ void SetLeafBlockByGroupView( } auto new_group = CloneGroup(origin_group, block, &ir_mapping); - CHECK_EQ(origin_group->ops().size(), new_group->ops().size()); + PADDLE_ENFORCE_EQ( + origin_group->ops().size(), + new_group->ops().size(), + phi::errors::InvalidArgument( + "The size of origin group ops and new group ops is not equal," + "where the size of origin group ops:%d but the size of new group " + "ops:%d.", + origin_group->ops().size(), + new_group->ops().size())); UpdateGroupShapeExprs(new_group, origin_group, ir_mapping, @@ -500,7 +511,7 @@ pir::Operation* CompileBroadcastTreeToConditionBlock( VLOG(6) << "After simply condition block: " << *program; // 3. compile condition block to jit_kernel_op - CompileGroupToJitKernelOp(group_inputs, rewriter, &group_map); + CompileGroupToJitKernelOp(rewriter, &group_map); VLOG(6) << "compile condition block to jit_kernel_op: " << *program; return cond_op; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc index fd5a71e47c105..7526ad1ab6309 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.cc @@ -14,6 +14,7 @@ #include "paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/collect_sym_expr.h" #include "paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h" +#include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_symbolic_shape.h" #include "paddle/pir/include/dialect/shape/utils/dim_expr_util.h" namespace { @@ -135,81 +136,39 @@ bool IsShapeOrDataNeedSubstitute( return ret; } -symbol::TensorShapeOrDataDimExprs SubstituteTensorShapeOrData( - const symbol::TensorShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& dim_expr_map) { - const auto& SimplifyDimExpr = - [&](const std::vector& original_dim_expr) - -> std::vector { - std::vector simplified_dim_expr{}; - for (const symbol::DimExpr& dim_expr : original_dim_expr) { - simplified_dim_expr.push_back(symbol::SimplifyDimExpr( - symbol::SubstituteDimExpr(dim_expr, dim_expr_map))); - } - return simplified_dim_expr; - }; - - std::vector simplified_shape = - SimplifyDimExpr(shape_or_data.shape()); - if (!shape_or_data.data().has_value()) { - return symbol::ShapeOrData(simplified_shape); - } - std::vector simplified_data = - SimplifyDimExpr(shape_or_data.data().value()); - return symbol::ShapeOrData(simplified_shape, - simplified_data); -} - -symbol::ShapeOrDataDimExprs SubstituteShapeOrData( - const symbol::ShapeOrDataDimExprs& shape_or_data, - const std::unordered_map& dim_expr_map) { - auto lambdas = symbol::Overloaded{ - [&](const symbol::TensorShapeOrDataDimExprs& tensor_shape_or_data) { - return symbol::ShapeOrDataDimExprs( - SubstituteTensorShapeOrData(tensor_shape_or_data, dim_expr_map)); - }, - [&](const symbol::TensorListShapeOrDataDimExprs& tensor_list) { - symbol::TensorListShapeOrDataDimExprs simplified_tensor_list; - for (symbol::TensorShapeOrDataDimExprs tensor_shape_or_data : - tensor_list) { - simplified_tensor_list.push_back( - SubstituteTensorShapeOrData(tensor_shape_or_data, dim_expr_map)); - } - return symbol::ShapeOrDataDimExprs(simplified_tensor_list); - }}; - return std::visit(lambdas, shape_or_data.variant()); -} - symbol::ShapeOrDataDimExprs TrySubstitute( const symbol::ShapeOrDataDimExprs& shape_or_data, const std::unordered_map& dim_expr_map) { if (!IsShapeOrDataNeedSubstitute(shape_or_data, dim_expr_map)) { return shape_or_data; } - return SubstituteShapeOrData(shape_or_data, dim_expr_map); + return symbol::SubstituteShapeOrData(shape_or_data, dim_expr_map); } -} // namespace - -namespace cinn::dialect::ir::details { +void InferSymbolicShapeForOperation( + pir::Operation* op, pir::ShapeConstraintIRAnalysis* shape_analysis) { + auto infer_symbolic_shape_interface = + op->dyn_cast(); + if (infer_symbolic_shape_interface) { + infer_symbolic_shape_interface.InferSymbolicShape(shape_analysis); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + op->name() + " DOES NOT have InferSymbolicShapeInterface!")); + } +} std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> -CreateGroupShapeOrDataExprs( - const OpLoweringGroupPtr& group, - pir::ShapeConstraintIRAnalysis& shape_analysis) { // NOLINT - std::unordered_map dim_expr_map = - CollectSubstituteDimExprMap(group, shape_analysis); +GetGroupValue2Shape(const OpLoweringGroupPtr& group, + pir::ShapeConstraintIRAnalysis& shape_analysis) { // NOLINT std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> value2shape; - for (auto* op : group->ops()) { + for (auto op : group->ops()) { for (size_t i = 0; i < op->num_operands(); ++i) { auto operand = op->operand_source(i); if (operand && value2shape.find(operand) == value2shape.end() && shape_analysis.HasShapeOrDataForValue(operand)) { VLOG(6) << "Add value_to_shape_or_data_exprs for " << operand.impl(); value2shape.insert( - {operand, - TrySubstitute(shape_analysis.GetShapeOrDataForValue(operand), - dim_expr_map)}); + {operand, shape_analysis.GetShapeOrDataForValue(operand)}); } } for (size_t i = 0; i < op->num_results(); ++i) { @@ -218,9 +177,49 @@ CreateGroupShapeOrDataExprs( shape_analysis.HasShapeOrDataForValue(result)) { VLOG(6) << "Add value_to_shape_or_data_exprs for " << result.impl(); value2shape.insert( - {result, - TrySubstitute(shape_analysis.GetShapeOrDataForValue(result), - dim_expr_map)}); + {result, shape_analysis.GetShapeOrDataForValue(result)}); + } + } + } + return value2shape; +} + +} // namespace + +namespace cinn::dialect::ir::details { + +std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> +CreateGroupShapeOrDataExprs( + const OpLoweringGroupPtr& group, + pir::ShapeConstraintIRAnalysis& global_shape_analysis) { // NOLINT + std::unordered_map dim_expr_map = + CollectSubstituteDimExprMap(group, global_shape_analysis); + std::unordered_map<::pir::Value, symbol::ShapeOrDataDimExprs> value2shape; + if (dim_expr_map.size() == 0) { + return GetGroupValue2Shape(group, global_shape_analysis); + } + + pir::ShapeConstraintIRAnalysis local_shape_analysis({}); + + // process input values. + VisitEachInputValue(group, [&](::pir::Value value) { + auto new_shape_expr = TrySubstitute( + global_shape_analysis.GetShapeOrDataForValue(value), dim_expr_map); + local_shape_analysis.SetShapeOrDataForValue(value, new_shape_expr); + value2shape.insert({value, new_shape_expr}); + VLOG(6) << "Add value_to_shape_or_data_exprs for " << value.impl(); + }); + + // process the result values of each op. + for (auto* op : group->ops()) { + InferSymbolicShapeForOperation(op, &local_shape_analysis); + for (size_t i = 0; i < op->num_results(); ++i) { + auto result = op->result(i); + if (result && !value2shape.count(result) && + local_shape_analysis.HasShapeOrDataForValue(result)) { + VLOG(6) << "Add value_to_shape_or_data_exprs for " << result.impl(); + value2shape.insert( + {result, local_shape_analysis.GetShapeOrDataForValue(result)}); } } } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc index 0e7ebb8e9499d..3fa26f51b5592 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/lower_cinn_fusion_op_pass.cc @@ -34,6 +34,9 @@ pir::Operation* ProcessDyShapeGroup( const OpLoweringGroupPtr& group, pir::ShapeConstraintIRAnalysis& shape_analysis, // NOLINT pir::PatternRewriter& rewriter) { // NOLINT + // NOTE(dev): Need UpdateShapeOrDataExprs firstly and the logic + // will be migated into BucketLower later. + UpdateGroupShapeOrDataExprs(const_cast(group)); auto group_inputs = GetBlockOutsideInput(group->ops()); GroupDimExprInfo group_dim_expr_info = GetGroupDimExprInfo(group); const auto& leaves = group_dim_expr_info.all_value_dim_exprs; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc index e4724c617dfaf..29c127b42d10d 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.cc @@ -78,7 +78,8 @@ CompileGroupAsOpAttribute(const std::vector& group_list) { std::unordered_map GetJitKernelAttr( const OpLoweringGroupPtr& group) { - auto kernel_info = CompilationCache::Instance().GetKernelInfo(group); + hlir::framework::pir::FusionInfo fusion_info(*group); + auto kernel_info = CompilationCache::Instance().GetKernelInfo(fusion_info); std::unordered_map attrs{ {cinn::dialect::JitKernelOp::kAttrName, cinn::dialect::CINNKernelInfoAttribute::get(pir::IrContext::Instance(), @@ -88,33 +89,36 @@ std::unordered_map GetJitKernelAttr( OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { auto fusion_op = fusion_op_ptr->dyn_cast(); - auto group = std::make_shared(); - group->set_op_pattern_kind( - cinn::hlir::framework::OpPatternKind::kElementWise); + std::vector<::pir::Operation*> ops; + auto group_op_kind = cinn::hlir::framework::OpPatternKind::kElementWise; + // Rebuild ops of the group + for (auto op : fusion_op.GetOperators()) { + if (!op->isa<::pir::YieldOp>()) { + ops.push_back(op); + group_op_kind = static_cast(CompatibleInfo::OpKind(*op)) > + static_cast(group_op_kind) + ? CompatibleInfo::OpKind(*op) + : group_op_kind; + } + } + + auto group = std::make_shared(ops); + if (fusion_op.attributes().count("group_info")) { auto attr = fusion_op.attribute("group_info") .dyn_cast() .data(); - group->set_op_pattern_kind(attr.op_pattern_kind); + group_op_kind = + static_cast(attr.op_pattern_kind) > static_cast(group_op_kind) + ? attr.op_pattern_kind + : group_op_kind; group->set_loop_ranges(attr.loop_ranges); group->set_loop_ranges_expr(attr.loop_ranges_expr); - group->set_reduce_axis(attr.reduce_axis); group->set_alignment_schedule_info(attr.alignment_schedule_info); } - - // Rebuild ops of the group - for (auto op : fusion_op.GetOperators()) { - if (!op->isa<::pir::YieldOp>()) { - group->mut_ops().push_back(op); - auto op_pattern_kind = static_cast(CompatibleInfo::OpKind(*op)) > - static_cast(group->op_pattern_kind()) - ? CompatibleInfo::OpKind(*op) - : group->op_pattern_kind(); - group->set_op_pattern_kind(op_pattern_kind); - } - } + group->set_op_pattern_kind(group_op_kind); // Rebuild output_ops and input_ops of the group auto yield_op = fusion_op.GetOperators().back(); @@ -127,10 +131,7 @@ OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { // Because the group is rebuilt, the order of group.output_values generated // by BuildCUDAJITInfo may not be same with the order bound in the yield op, // so a mapping is required. - auto& shape_analysis = - pir::ShapeAnalysisManager::Instance().Get(fusion_op->GetParentProgram()); - group->set_value_to_shape_or_data_exprs( - CreateGroupShapeOrDataExprs(group, shape_analysis)); + UpdateGroupShapeOrDataExprs(group); if (FLAGS_cinn_enable_map_expr) { cinn::adt::TryGenerateMapExprFromGroup(group); } @@ -139,4 +140,11 @@ OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr) { return group; } +void UpdateGroupShapeOrDataExprs(OpLoweringGroupPtr group) { + auto& shape_analysis = + pir::ShapeAnalysisManager::Instance().Get(group->GetParentProgram()); + group->set_value_to_shape_or_data_exprs( + CreateGroupShapeOrDataExprs(group, shape_analysis)); +} + } // namespace cinn::dialect::ir::details diff --git a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h index 3b3ba4379d57c..5c5d0c104390a 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/lowering_pass/utils.h @@ -31,4 +31,6 @@ std::unordered_map GetJitKernelAttr( OpLoweringGroupPtr BuildOpLoweringGroup(pir::Operation* fusion_op_ptr); +void UpdateGroupShapeOrDataExprs(OpLoweringGroupPtr group); + } // namespace cinn::dialect::ir::details diff --git a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc index 3bf32aa91837d..be57629fe8747 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/pd_to_cinn_pass.cc @@ -761,8 +761,8 @@ class FullWithTensorOpPattern bool MatchAndRewrite(paddle::dialect::FullWithTensorOp op, pir::PatternRewriter &rewriter) const override { - auto shape = op->operand_source(0); - auto value = op->operand_source(1); + auto value = op->operand_source(0); + auto shape = op->operand_source(1); if (paddle::dialect::TransToPhiDataType( value.type() diff --git a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc index 19e7f5060eb96..0f15edcd0b8d6 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/split_generate_shape_into_shape_ops_pass.cc @@ -116,15 +116,32 @@ struct CachedDimExprToValueConverter { } pir::Value ConvertTensorDimToValue(const TensorDimInData& tensor_dim) { - return rewriter - ->Build( - tensor_dim.value, - std::vector{0LL}, - std::vector{tensor_dim.axis}, - std::vector{tensor_dim.axis + 1}, - std::vector{}, - std::vector{}) - .out(); + auto CastToInt64IfNeed = [&](pir::Value value) { + if (value.type() + .dyn_cast() + .dtype() + .isa()) { + return value; + } + return rewriter + ->Build(value, phi::DataType::INT64) + .out(); + }; + if (tensor_dim.value.type() + .dyn_cast() + .dims() + .size() == 0) { + return CastToInt64IfNeed(tensor_dim.value); + } + return CastToInt64IfNeed(rewriter + ->Build( + tensor_dim.value, + std::vector{0LL}, + std::vector{tensor_dim.axis}, + std::vector{tensor_dim.axis + 1}, + std::vector{}, + std::vector{}) + .out()); } pir::Value ConvertToValueImpl( @@ -143,7 +160,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Add& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value acc = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { if (operands->at(i).isa>()) { @@ -162,7 +184,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Mul& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value prod = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { if (operands->at(i).isa>()) { @@ -182,7 +209,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Max& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value max = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); @@ -193,7 +225,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl(const symbol::Min& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value min = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); @@ -205,7 +242,12 @@ struct CachedDimExprToValueConverter { pir::Value ConvertToValueImpl( const symbol::Broadcast& dim_expr) { const auto& [operands] = dim_expr; - CHECK_GT(operands->size(), 0); + PADDLE_ENFORCE_GT(operands->size(), + 0, + phi::errors::InvalidArgument( + "The size of operands is incorrect." + "Expected size is larger than 0, but receive %d.", + operands->size())); pir::Value broadcasted = ConvertToValue(operands->at(0)); for (int i = 1; i < operands->size(); ++i) { pir::Value operand_value = ConvertToValue(operands->at(i)); diff --git a/paddle/cinn/hlir/framework/instruction.cc b/paddle/cinn/hlir/framework/instruction.cc index c7185223843d5..65ac90793472b 100644 --- a/paddle/cinn/hlir/framework/instruction.cc +++ b/paddle/cinn/hlir/framework/instruction.cc @@ -147,27 +147,29 @@ void Instruction::Run( utils::RecordEvent record_args("Instruction::Run", cinn::utils::EventType::kInstruction); + const auto DefaultRun = [&] { #if defined(CINN_WITH_CUDA) && !defined(CINN_WITH_CUDNN) - if (function_name_ == "cublas_gemm" && target_.arch == Target::Arch::NVGPU) { - auto& pod_args = args_cached_[0]; - VLOG(3) << "The pod_args size of cublas_gemm: " << pod_args.size(); - runtime::cuda::cinn_gpu_cublas_gemm(attrs, - pod_args[0], - pod_args[1], - pod_args[2], - pod_args[3], - static_cast(stream)); - } else if (function_name_ == "cublas_matmul" && - target_.arch == Target::Arch::NVGPU) { + VLOG(3) << "Running extern function " << function_name_; + for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { + VLOG(3) << "Running func name: " << fn_names_[idx]; + auto& pod_args = args_cached_[idx]; + CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " + "calling SetLoweredFunc method"; + if (!dryrun) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { + ((lower_func_ptr_g)fn_ptrs_[idx])( + static_cast(pod_args.data()), pod_args.size(), stream); + } else { + ((lower_func_ptr_t)fn_ptrs_[idx])(static_cast(pod_args.data()), + pod_args.size()); + } + } + } + VLOG(3) << "Done Running extern function " << function_name_; +#elif defined(CINN_WITH_CUDNN) auto& pod_args = args_cached_[0]; - VLOG(3) << "The pod_args size of cublas_matmul: " << pod_args.size(); - runtime::cuda::cinn_gpu_cublas_gemm(attrs, - pod_args[0], - pod_args[1], - nullptr, - pod_args[2], - static_cast(stream)); - } else { + // Here conv2d and depthwise_conv2d are implemented by one cudnn api + // cudnnConvolutionForward VLOG(3) << "Running extern function " << function_name_; for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { VLOG(3) << "Running func name: " << fn_names_[idx]; @@ -185,136 +187,202 @@ void Instruction::Run( } } VLOG(3) << "Done Running extern function " << function_name_; - } +#else + VLOG(3) << "Running extern function " << function_name_; + for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { + VLOG(3) << "Running func name: " << fn_names_[idx]; + auto& pod_args = args_cached_[idx]; + CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " + "calling SetLoweredFunc method"; + if (!dryrun) { + ((lower_func_ptr_t)fn_ptrs_[idx])(static_cast(pod_args.data()), + pod_args.size()); + } + } + VLOG(3) << "Done Running extern function " << function_name_; +#endif + }; + const auto NVGPURun = [&] { +#if defined(CINN_WITH_CUDA) && !defined(CINN_WITH_CUDNN) + if (function_name_ == "cublas_gemm") { + auto& pod_args = args_cached_[0]; + VLOG(3) << "The pod_args size of cublas_gemm: " << pod_args.size(); + runtime::cuda::cinn_gpu_cublas_gemm(attrs, + pod_args[0], + pod_args[1], + pod_args[2], + pod_args[3], + static_cast(stream)); + } else if (function_name_ == "cublas_matmul") { + auto& pod_args = args_cached_[0]; + VLOG(3) << "The pod_args size of cublas_matmul: " << pod_args.size(); + runtime::cuda::cinn_gpu_cublas_gemm(attrs, + pod_args[0], + pod_args[1], + nullptr, + pod_args[2], + static_cast(stream)); + } else { + VLOG(3) << "Running extern function " << function_name_; + for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { + VLOG(3) << "Running func name: " << fn_names_[idx]; + auto& pod_args = args_cached_[idx]; + CHECK(fn_ptrs_[idx]) + << "The LoweredFunc address should be set first by " + "calling SetLoweredFunc method"; + if (!dryrun) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { + ((lower_func_ptr_g)fn_ptrs_[idx])( + static_cast(pod_args.data()), pod_args.size(), stream); + } else { + ((lower_func_ptr_t)fn_ptrs_[idx])( + static_cast(pod_args.data()), pod_args.size()); + } + } + } + VLOG(3) << "Done Running extern function " << function_name_; + } #elif defined(CINN_WITH_CUDNN) - auto& pod_args = args_cached_[0]; - // Here conv2d and depthwise_conv2d are implemented by one cudnn api - // cudnnConvolutionForward - if ((function_name_ == "conv2d" || function_name_ == "depthwise_conv2d") && - target_.arch == Target::Arch::NVGPU) { - if (str_attrs[0] == "forward") { - if (str_attrs.size() > 1 && str_attrs[1] == "NHWC") { + auto& pod_args = args_cached_[0]; + // Here conv2d and depthwise_conv2d are implemented by one cudnn api + // cudnnConvolutionForward + if ((function_name_ == "conv2d" || function_name_ == "depthwise_conv2d")) { + if (str_attrs[0] == "forward") { + if (str_attrs.size() > 1 && str_attrs[1] == "NHWC") { + absl::flat_hash_map attrs_map = { + {"input_n", attrs[0]}, {"input_h", attrs[1]}, + {"input_w", attrs[2]}, {"input_c", attrs[3]}, + {"weights_n", attrs[4]}, {"weights_c", attrs[5]}, + {"weights_h", attrs[6]}, {"weights_w", attrs[7]}, + {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, + {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, + {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, + {"groups", attrs[14]}, {"output_n", attrs[15]}, + {"output_h", attrs[16]}, {"output_w", attrs[17]}, + {"output_c", attrs[18]}, + }; + runtime::cuda::cinn_gpu_cudnn_conv2d( + attrs_map, + pod_args[0], + pod_args[1], + pod_args[2], + static_cast(stream), + cinn::common::Layout::kNHWC); + + } else { + absl::flat_hash_map attrs_map = { + {"input_n", attrs[0]}, {"input_c", attrs[1]}, + {"input_h", attrs[2]}, {"input_w", attrs[3]}, + {"weights_n", attrs[4]}, {"weights_c", attrs[5]}, + {"weights_h", attrs[6]}, {"weights_w", attrs[7]}, + {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, + {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, + {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, + {"groups", attrs[14]}, {"output_n", attrs[15]}, + {"output_c", attrs[16]}, {"output_h", attrs[17]}, + {"output_w", attrs[18]}, + }; + runtime::cuda::cinn_gpu_cudnn_conv2d( + attrs_map, + pod_args[0], + pod_args[1], + pod_args[2], + static_cast(stream), + cinn::common::Layout::kNCHW); + } + } else if (str_attrs[0] == "backward_data") { + // w, dy, dx absl::flat_hash_map attrs_map = { - {"input_n", attrs[0]}, {"input_h", attrs[1]}, - {"input_w", attrs[2]}, {"input_c", attrs[3]}, - {"weights_n", attrs[4]}, {"weights_c", attrs[5]}, - {"weights_h", attrs[6]}, {"weights_w", attrs[7]}, + {"input_n", attrs[15]}, {"input_c", attrs[16]}, + {"input_h", attrs[17]}, {"input_w", attrs[18]}, + {"weights_n", attrs[0]}, {"weights_c", attrs[1]}, + {"weights_h", attrs[2]}, {"weights_w", attrs[3]}, {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, - {"groups", attrs[14]}, {"output_n", attrs[15]}, - {"output_h", attrs[16]}, {"output_w", attrs[17]}, - {"output_c", attrs[18]}, + {"groups", attrs[14]}, {"output_n", attrs[4]}, + {"output_c", attrs[5]}, {"output_h", attrs[6]}, + {"output_w", attrs[7]}, }; - runtime::cuda::cinn_gpu_cudnn_conv2d(attrs_map, - pod_args[0], - pod_args[1], - pod_args[2], - static_cast(stream), - cinn::common::Layout::kNHWC); - + // w, dy, dx + runtime::cuda::cinn_gpu_cudnn_conv2d_backward_data( + attrs_map, + pod_args[0], + pod_args[1], + pod_args[2], + static_cast(stream)); } else { + // x, dy, w absl::flat_hash_map attrs_map = { {"input_n", attrs[0]}, {"input_c", attrs[1]}, {"input_h", attrs[2]}, {"input_w", attrs[3]}, - {"weights_n", attrs[4]}, {"weights_c", attrs[5]}, - {"weights_h", attrs[6]}, {"weights_w", attrs[7]}, + {"weights_n", attrs[15]}, {"weights_c", attrs[16]}, + {"weights_h", attrs[17]}, {"weights_w", attrs[18]}, {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, - {"groups", attrs[14]}, {"output_n", attrs[15]}, - {"output_c", attrs[16]}, {"output_h", attrs[17]}, - {"output_w", attrs[18]}, + {"groups", attrs[14]}, {"output_n", attrs[4]}, + {"output_c", attrs[5]}, {"output_h", attrs[6]}, + {"output_w", attrs[7]}, }; - runtime::cuda::cinn_gpu_cudnn_conv2d(attrs_map, - pod_args[0], - pod_args[1], - pod_args[2], - static_cast(stream), - cinn::common::Layout::kNCHW); + // x, dy, w + runtime::cuda::cinn_gpu_cudnn_conv2d_backward_filter( + attrs_map, + pod_args[0], + pod_args[1], + pod_args[2], + static_cast(stream)); } - } else if (str_attrs[0] == "backward_data") { - // w, dy, dx - absl::flat_hash_map attrs_map = { - {"input_n", attrs[15]}, {"input_c", attrs[16]}, - {"input_h", attrs[17]}, {"input_w", attrs[18]}, - {"weights_n", attrs[0]}, {"weights_c", attrs[1]}, - {"weights_h", attrs[2]}, {"weights_w", attrs[3]}, - {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, - {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, - {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, - {"groups", attrs[14]}, {"output_n", attrs[4]}, - {"output_c", attrs[5]}, {"output_h", attrs[6]}, - {"output_w", attrs[7]}, - }; - // w, dy, dx - runtime::cuda::cinn_gpu_cudnn_conv2d_backward_data( - attrs_map, - pod_args[0], - pod_args[1], - pod_args[2], - static_cast(stream)); - } else { - // x, dy, w - absl::flat_hash_map attrs_map = { - {"input_n", attrs[0]}, {"input_c", attrs[1]}, - {"input_h", attrs[2]}, {"input_w", attrs[3]}, - {"weights_n", attrs[15]}, {"weights_c", attrs[16]}, - {"weights_h", attrs[17]}, {"weights_w", attrs[18]}, - {"pad_h", attrs[8]}, {"pad_w", attrs[9]}, - {"stride_h", attrs[10]}, {"stride_w", attrs[11]}, - {"dilation_h", attrs[12]}, {"dilation_w", attrs[13]}, - {"groups", attrs[14]}, {"output_n", attrs[4]}, - {"output_c", attrs[5]}, {"output_h", attrs[6]}, - {"output_w", attrs[7]}, - }; - // x, dy, w - runtime::cuda::cinn_gpu_cudnn_conv2d_backward_filter( - attrs_map, - pod_args[0], - pod_args[1], - pod_args[2], - static_cast(stream)); - } - } else if (function_name_ == "pool2d" && - target_.arch == Target::Arch::NVGPU) { - runtime::cuda::cinn_gpu_cudnn_pool2d(attrs, - str_attrs, + } else if (function_name_ == "pool2d") { + runtime::cuda::cinn_gpu_cudnn_pool2d(attrs, + str_attrs, + pod_args[0], + pod_args[1], + static_cast(stream)); + } else if (function_name_ == "softmax") { + CHECK_EQ(pod_args.size(), 3); + runtime::cuda::cinn_gpu_cudnn_softmax( + attrs, pod_args[0], pod_args[1], static_cast(stream)); + } else if (function_name_ == "mul") { + CHECK_EQ(pod_args.size(), 4); + runtime::cuda::cinn_gpu_cublas_mul(attrs, pod_args[0], pod_args[1], + pod_args[2], static_cast(stream)); - } else if (function_name_ == "softmax" && - target_.arch == Target::Arch::NVGPU) { - CHECK_EQ(pod_args.size(), 3); - runtime::cuda::cinn_gpu_cudnn_softmax( - attrs, pod_args[0], pod_args[1], static_cast(stream)); - } else if (function_name_ == "mul" && target_.arch == Target::Arch::NVGPU) { - CHECK_EQ(pod_args.size(), 4); - runtime::cuda::cinn_gpu_cublas_mul(attrs, - pod_args[0], - pod_args[1], - pod_args[2], - static_cast(stream)); - } else if (function_name_ == "cublas_gemm" && - target_.arch == Target::Arch::NVGPU) { - VLOG(3) << "The pod_args size of cublas_gemm: " << pod_args.size(); - runtime::cuda::cinn_gpu_cublas_gemm(attrs, - pod_args[0], - pod_args[1], - pod_args[2], - pod_args[3], - static_cast(stream)); - } else if (function_name_ == "cublas_matmul" && - target_.arch == Target::Arch::NVGPU) { - auto& pod_args = args_cached_[0]; - VLOG(3) << "The pod_args size of cublas_matmul: " << pod_args.size(); - runtime::cuda::cinn_gpu_cublas_gemm(attrs, - pod_args[0], - pod_args[1], - nullptr, - pod_args[2], - static_cast(stream)); - } else { + } else if (function_name_ == "cublas_gemm") { + VLOG(3) << "The pod_args size of cublas_gemm: " << pod_args.size(); + runtime::cuda::cinn_gpu_cublas_gemm(attrs, + pod_args[0], + pod_args[1], + pod_args[2], + pod_args[3], + static_cast(stream)); + } else if (function_name_ == "cublas_matmul") { + auto& pod_args = args_cached_[0]; + VLOG(3) << "The pod_args size of cublas_matmul: " << pod_args.size(); + runtime::cuda::cinn_gpu_cublas_gemm(attrs, + pod_args[0], + pod_args[1], + nullptr, + pod_args[2], + static_cast(stream)); + } else { + VLOG(3) << "Running extern function " << function_name_; + for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { + VLOG(3) << "Running func name: " << fn_names_[idx]; + auto& pod_args = args_cached_[idx]; + CHECK(fn_ptrs_[idx]) + << "The LoweredFunc address should be set first by " + "calling SetLoweredFunc method"; + if (!dryrun) { + ((lower_func_ptr_g)fn_ptrs_[idx])( + static_cast(pod_args.data()), pod_args.size(), stream); + } + } + VLOG(3) << "Done Running extern function " << function_name_; + } +#else VLOG(3) << "Running extern function " << function_name_; for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { VLOG(3) << "Running func name: " << fn_names_[idx]; @@ -332,37 +400,17 @@ void Instruction::Run( } } VLOG(3) << "Done Running extern function " << function_name_; - } -#else - VLOG(3) << "Running extern function " << function_name_; - for (int idx = 0; idx < fn_ptrs_.size(); ++idx) { - VLOG(3) << "Running func name: " << fn_names_[idx]; - auto& pod_args = args_cached_[idx]; - CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " - "calling SetLoweredFunc method"; - if (!dryrun) { - if (target_ == cinn::common::DefaultNVGPUTarget()) { - ((lower_func_ptr_g)fn_ptrs_[idx])( - static_cast(pod_args.data()), pod_args.size(), stream); - } else { - ((lower_func_ptr_t)fn_ptrs_[idx])(static_cast(pod_args.data()), - pod_args.size()); - } - } - } - VLOG(3) << "Done Running extern function " << function_name_; #endif - + }; + target_.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { DefaultRun(); }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { NVGPURun(); }, + }); if (!cinn::runtime::CheckStringFlagFalse(FLAGS_cinn_self_check_accuracy)) { CheckResults(name2podargs, stream); } - // TODO(thisjiang): revert while flags correct - // if (FLAGS_cinn_sync_run) { - // #ifdef CINN_WITH_CUDA - // utils::RecordEvent record_sync("FLAGS_cinn_sync_run"); - // CUDA_CALL(cudaStreamSynchronize(static_cast(stream))); - // #endif - // } } std::string Instruction::DumpInstruction() const { diff --git a/paddle/cinn/hlir/framework/memory.cc b/paddle/cinn/hlir/framework/memory.cc index bfc33b31beda9..d85393db72fb3 100755 --- a/paddle/cinn/hlir/framework/memory.cc +++ b/paddle/cinn/hlir/framework/memory.cc @@ -58,10 +58,10 @@ class CudaMemoryMng : public MemoryInterface { } // namespace MemoryManager::MemoryManager() { - Register(Target::Arch::Unk, new X86MemoryMng); - Register(Target::Arch::X86, new X86MemoryMng); + Register(common::UnknownArch{}, new X86MemoryMng); + Register(common::X86Arch{}, new X86MemoryMng); #ifdef CINN_WITH_CUDA - Register(Target::Arch::NVGPU, new CudaMemoryMng); + Register(common::NVGPUArch{}, new CudaMemoryMng); #endif } diff --git a/paddle/cinn/hlir/framework/memory.h b/paddle/cinn/hlir/framework/memory.h index 889e32e7fca0b..b719ece874f51 100644 --- a/paddle/cinn/hlir/framework/memory.h +++ b/paddle/cinn/hlir/framework/memory.h @@ -19,6 +19,7 @@ #include +#include "paddle/cinn/common/arch_util.h" #include "paddle/cinn/common/macros.h" #include "paddle/cinn/common/target.h" @@ -41,7 +42,7 @@ class MemoryInterface { */ class MemoryManager final { public: - using key_t = cinn::common::Target::Arch; + using key_t = cinn::common::Arch; static MemoryManager& Global() { static auto* x = new MemoryManager; @@ -56,12 +57,14 @@ class MemoryManager final { MemoryInterface* RetrieveSafely(key_t key) { auto* res = Retrieve(key); - CHECK(res) << "no MemoryInterface for architecture [" << key << "]"; + CHECK(res) << "no MemoryInterface for architecture [" << GetArchName(key) + << "]"; return res; } MemoryInterface* Register(key_t key, MemoryInterface* item) { - CHECK(!memory_mngs_.count(key)) << "Duplicate register [" << key << "]"; + CHECK(!memory_mngs_.count(key)) + << "Duplicate register [" << GetArchName(key) << "]"; memory_mngs_[key].reset(item); return item; } @@ -69,8 +72,7 @@ class MemoryManager final { private: MemoryManager(); - absl::flat_hash_map> + absl::flat_hash_map> memory_mngs_; CINN_DISALLOW_COPY_AND_ASSIGN(MemoryManager); diff --git a/paddle/cinn/hlir/framework/pir/CMakeLists.txt b/paddle/cinn/hlir/framework/pir/CMakeLists.txt index 3b09925b94830..bf8cd25f48e4b 100755 --- a/paddle/cinn/hlir/framework/pir/CMakeLists.txt +++ b/paddle/cinn/hlir/framework/pir/CMakeLists.txt @@ -8,7 +8,8 @@ gather_srcs( op_lowering_impl.cc op_mapper.cc op_lowering_util.cc + trivial_op_impl.cc + trivial_op_util.cc compilation_task.cc compilation_cache.cc - trivial_op_impl.cc - trivial_op_util.cc) + fusion_info.cc) diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.cc b/paddle/cinn/hlir/framework/pir/compilation_cache.cc index 47a38442b58a5..9b98597a50265 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.cc @@ -39,38 +39,20 @@ void* BackendResource::GetInferFuncPtr() const { return ptr; } -std::shared_ptr& BackendResource::GetBackendCompiler() { - return backend_compiler_; -} - -const std::shared_ptr& BackendResource::GetBackendCompiler() - const { - return backend_compiler_; -} - -void BackendResource::SetHostFnName(const std::string& name) { - host_fn_name_ = name; -} - -void BackendResource::SetInferFnName(const std::string& name) { - infer_fn_name_ = name; -} - -pir::CINNKernelInfo BackendResource::GernerateKernelInfo( - const std::shared_ptr& group) const { +pir::CINNKernelInfo BackendResource::GenerateKernelInfo() const { pir::CINNKernelInfo kernel_info; kernel_info.fn_name = host_fn_name_; kernel_info.fn_ptr = GetHostFuncPtr(); kernel_info.infer_shape_fn_ptr = GetInferFuncPtr(); - kernel_info.int_args_map = group->int_args_map(); + kernel_info.int_args_map = GetIntArgsMap(); return kernel_info; } } // namespace pir bool CompilationCache::Has(const CacheKey& key) const { - const bool has_existed = cache_.find(KeyHash(key)) != cache_.end(); - VLOG(6) << "Check IsExisted in CompilationCache: " << key->FuncName() << " " - << has_existed; + const bool has_existed = cache_.find(key) != cache_.end(); + VLOG(6) << "Check IsExisted in CompilationCache: " << has_existed << " - " + << key; return has_existed; } @@ -79,24 +61,19 @@ const CompilationCache::CacheValue& CompilationCache::Get( PADDLE_ENFORCE_EQ( Has(key), true, - phi::errors::NotFound("%s is not in CompliatonCache.", key->FuncName())); - return cache_.at(KeyHash(key)); + phi::errors::NotFound("%s is not in CompliatonCache.", key)); + return cache_.at(key); } pir::CINNKernelInfo CompilationCache::GetKernelInfo(const CacheKey& key) const { - return Get(key)->GetKernelInfo(key); + return Get(key)->GetKernelInfo(); } void CompilationCache::Insert(const CacheKey& key, const CacheValue& value) { - VLOG(6) << "Insert CompilationCache for: " << key->FuncName(); - cache_.insert({KeyHash(key), value}); + VLOG(6) << "Insert CompilationCache for: " << key; + cache_.insert({key, value}); } void CompilationCache::Clear() { cache_.clear(); } -size_t CompilationCache::KeyHash(const CacheKey& key) const { - // TODO(Aurelius84): use a better hash function in next pr. - return std::hash{}(key->FuncName()); -} - } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_cache.h b/paddle/cinn/hlir/framework/pir/compilation_cache.h index 018bd6fd85572..547a1889f01a6 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_cache.h +++ b/paddle/cinn/hlir/framework/pir/compilation_cache.h @@ -19,6 +19,7 @@ #include "paddle/cinn/backends/compiler.h" #include "paddle/cinn/common/macros.h" #include "paddle/cinn/common/target.h" +#include "paddle/cinn/hlir/framework/pir/fusion_info.h" #include "paddle/cinn/hlir/framework/pir/utils.h" namespace cinn::hlir::framework { @@ -27,76 +28,79 @@ namespace pir { class OpLoweringGroup; class BackendResource final { public: - BackendResource(const Target& target) { - backend_compiler_ = backends::Compiler::Create(target); - } - BackendResource(const Target& target, const std::string& host_fn_name, - const std::string& infer_fn_name) - : host_fn_name_(host_fn_name), infer_fn_name_(infer_fn_name) { + const std::string& infer_fn_name, + const std::map& int_args_map) + : host_fn_name_(host_fn_name), + infer_fn_name_(infer_fn_name), + int_args_map_(int_args_map) { backend_compiler_ = backends::Compiler::Create(target); } void* GetHostFuncPtr() const; void* GetInferFuncPtr() const; - pir::CINNKernelInfo GernerateKernelInfo( - const std::shared_ptr& group) const; - std::shared_ptr& GetBackendCompiler(); - const std::shared_ptr& GetBackendCompiler() const; - void SetHostFnName(const std::string& name); - void SetInferFnName(const std::string& name); + const std::map& GetIntArgsMap() const { + return int_args_map_; + } + const std::shared_ptr& GetBackendCompiler() const { + return backend_compiler_; + } + pir::CINNKernelInfo GenerateKernelInfo() const; private: std::string host_fn_name_; std::string infer_fn_name_; - // std::string host_code_; - // std::vector device_code_; - std::shared_ptr backend_compiler_; + std::map int_args_map_; + + std::shared_ptr backend_compiler_{nullptr}; }; class CompilationResult final { public: - explicit CompilationResult(const Target& target) - : target_(target), backend_resource_(target) {} - - BackendResource& MutableBackendResource() { return backend_resource_; } - const BackendResource& GetBackendResource() const { + explicit CompilationResult(const Target& target) : target_(target) {} + const std::shared_ptr& GetBackendResource() const { return backend_resource_; } - pir::CINNKernelInfo GetKernelInfo( - const std::shared_ptr& group) { - return backend_resource_.GernerateKernelInfo(group); + + void SetBackendResource(const std::shared_ptr& other) { + backend_resource_ = other; + } + + pir::CINNKernelInfo GetKernelInfo() { + // TODO(Aurelius84): add ENFORCE_NOT_NULL + return backend_resource_->GenerateKernelInfo(); } private: Target target_; - BackendResource backend_resource_; + std::shared_ptr backend_resource_{nullptr}; }; + } // namespace pir class CompilationCache { public: - using CacheKey = std::shared_ptr; + using CacheKey = pir::FusionInfo; using CacheValue = std::shared_ptr; static CompilationCache& Instance() { - static CompilationCache instance; + thread_local static CompilationCache instance; return instance; } bool Has(const CacheKey& key) const; const CacheValue& Get(const CacheKey& key) const; - pir::CINNKernelInfo GetKernelInfo(const CacheKey& key) const; void Insert(const CacheKey& key, const CacheValue& value); void Clear(); - size_t KeyHash(const CacheKey& key) const; + + pir::CINNKernelInfo GetKernelInfo(const CacheKey& key) const; private: CompilationCache() = default; CINN_DISALLOW_COPY_AND_ASSIGN(CompilationCache); - std::unordered_map cache_; + std::unordered_map cache_; }; } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.cc b/paddle/cinn/hlir/framework/pir/compilation_task.cc index a93ac960d496a..85f4d2849ea80 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_task.cc @@ -42,15 +42,9 @@ std::string GroupCompilationContext::PrintPredicate2Funcs() const { return ss.str(); } -void CompilationTask::operator()() { - VLOG(4) << "Run Compilation Task for : " << context_->group_.get(); - if (CompilationCache::Instance().Has(context_->group_)) { - VLOG(4) << "Found cached kernel info for group: " - << context_->group_->FuncName(); - return; - } +std::shared_ptr CompilationTask::operator()() { Lowering(); - CodegenAndJit(); + return CodegenAndJit(); } void CompilationTask::Lowering() { @@ -62,7 +56,7 @@ void CompilationTask::Lowering() { /* apply pass = */ true)); } -void CompilationTask::CodegenAndJit() { +std::shared_ptr CompilationTask::CodegenAndJit() { ir::Module::Builder builder(cinn::common::UniqName("module"), context_->target_); CHECK_EQ(context_->predicates_.size(), context_->lowered_funcs_.size()); @@ -74,27 +68,22 @@ void CompilationTask::CodegenAndJit() { } builder.SetInferShapeFunc(context_->infer_shape_lowered_func_); ir::Module ir_module = builder.Build(); - BuildPirCINNKernelInfo(ir_module); -} - -pir::CINNKernelInfo CompilationTask::GetCINNKernelInfo() { - if (!CompilationCache::Instance().Has(context_->group_)) { - PADDLE_THROW(phi::errors::NotFound( - "Kernel info has been cached for current group.")); - } - return CompilationCache::Instance().GetKernelInfo(context_->group_); + return BuildPirCINNKernelInfo(ir_module); } -void CompilationTask::BuildPirCINNKernelInfo(const ir::Module& module) { +std::shared_ptr CompilationTask::BuildPirCINNKernelInfo( + const ir::Module& module) { auto compilation_result = std::make_shared(context_->target_); - pir::BackendResource& backend_resource = - compilation_result->MutableBackendResource(); - backend_resource.GetBackendCompiler()->Build(module, ""); - backend_resource.SetHostFnName(context_->group_->FuncName()); - backend_resource.SetInferFnName(context_->group_->FuncName() + - "_infer_shape"); - CompilationCache::Instance().Insert(context_->group_, compilation_result); + auto backend_resource = std::make_shared( + context_->target_, + context_->group_->FuncName(), + context_->group_->FuncName() + "_infer_shape", + context_->group_->int_args_map()); + VLOG(5) << "Start to compile module into cuda kernel..."; + backend_resource->GetBackendCompiler()->Build(module, ""); + compilation_result->SetBackendResource(backend_resource); + return compilation_result; } } // namespace framework diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.h b/paddle/cinn/hlir/framework/pir/compilation_task.h index 69e985afd7869..d104d264b6852 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.h +++ b/paddle/cinn/hlir/framework/pir/compilation_task.h @@ -50,14 +50,13 @@ class CompilationTask { explicit CompilationTask(GroupCompilationContext* context) : context_(context) {} - void operator()(); - pir::CINNKernelInfo GetCINNKernelInfo(); + std::shared_ptr operator()(); private: void Lowering(); - void CodegenAndJit(); - std::unique_ptr BuildInstruction(); - void BuildPirCINNKernelInfo(const ir::Module& module); + std::shared_ptr CodegenAndJit(); + std::shared_ptr BuildPirCINNKernelInfo( + const ir::Module& module); GroupCompilationContext* context_; }; diff --git a/paddle/cinn/hlir/framework/pir/fusion_info.cc b/paddle/cinn/hlir/framework/pir/fusion_info.cc new file mode 100644 index 0000000000000..f3b1979e6627e --- /dev/null +++ b/paddle/cinn/hlir/framework/pir/fusion_info.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/hlir/framework/pir/fusion_info.h" +#include "paddle/common/enforce.h" +#include "paddle/pir/include/core/ir_printer.h" + +namespace cinn::hlir::framework::pir { + +constexpr static char* kOpCallStack = "op_callstack"; + +std::size_t AttributeInfo::hash() const { return attr_.hash(); } + +std::ostream& operator<<(std::ostream& os, const AttributeInfo& attr_info) { + os << "AttributeInfo - " << attr_info.name_ << ", " << attr_info.hash(); + if (VLOG_IS_ON(7)) { + os << " ("; + ::pir::IrPrinter(os).PrintAttribute(attr_info.attr_); + os << ")"; + } + return os; +} + +std::size_t ValueInfo::hash() const { return type_.hash(); } + +std::ostream& operator<<(std::ostream& os, const ValueInfo& value_info) { + os << "ValueInfo - " << value_info.hash(); + if (VLOG_IS_ON(7)) { + os << "("; + ::pir::IrPrinter(os).PrintType(value_info.type_); + os << ")"; + } + return os; +} + +OperationInfo::OperationInfo(const ::pir::Operation& op) { + name_ = op.name(); + for (const auto value : op.operands_source()) { + if (!value || !value.type()) continue; + input_infos_.emplace_back(value); + } + for (const auto value : op.results()) { + if (!value || !value.type()) continue; + output_infos_.emplace_back(value); + } + // Keep attribute always in order. + const auto& attributes = op.attributes(); + std::map> order_attributes( + attributes.begin(), attributes.end()); + for (const auto& [attr_name, attr_value] : order_attributes) { + if (!attr_value || attr_name == kOpCallStack) continue; + attr_infos_.emplace_back(attr_name, attr_value); + } +} + +std::size_t OperationInfo::hash() const { + std::size_t seed = 1789; + hash_combine(seed, name_); + for (const auto& info : input_infos_) hash_combine(seed, info); + for (const auto& info : output_infos_) hash_combine(seed, info); + for (const auto& info : attr_infos_) hash_combine(seed, info); + return seed; +} + +std::ostream& operator<<(std::ostream& os, const OperationInfo& op_info) { + os << op_info.name_ << " - " << op_info.hash(); + if (VLOG_IS_ON(7)) { + os << "{\n"; + for (const auto& info : op_info.input_infos_) os << info << "\n"; + for (const auto& info : op_info.output_infos_) os << info << "\n"; + for (const auto& info : op_info.attr_infos_) os << info << "\n"; + os << "}"; + } + return os; +} + +FusionInfo::FusionInfo(const OpLoweringGroup& group) { + for (const auto* op : TopologySort(group)) { + op_infos_.emplace_back(*op); + } +} + +std::size_t FusionInfo::hash() const { + if (cached_hash_value_ != 0U) { + return cached_hash_value_; + } + std::size_t seed = 2153; + for (const auto& info : op_infos_) hash_combine(seed, info); + return seed; +} + +std::ostream& operator<<(std::ostream& os, const FusionInfo& fusion_info) { + os << "FusionInfo - " << fusion_info.hash(); + if (VLOG_IS_ON(5)) { + os << "{\n"; + for (const auto& op_info : fusion_info.op_infos_) os << op_info << "\n"; + os << "}\n"; + } + return os; +} + +std::size_t HashIntArgsMap( + const std::map& int_args_map) { + std::size_t seed = 2153; + for (const auto& [input_idx, dim_idx] : int_args_map) { + hash_combine(seed, input_idx); + hash_combine(seed, dim_idx.arg_idx); + hash_combine(seed, dim_idx.dim_idx); + } + return seed; +} +std::ostream& operator<<( + std::ostream& os, + const std::map& int_args_map) { + os << "int_args_map: {\n"; + for (const auto& [input_idx, dim_idx] : int_args_map) { + os << "input_idx: " << input_idx << ":[ " << dim_idx.arg_idx << ", " + << dim_idx.dim_idx << " ]\n"; + } + os << "}\n"; +} + +std::vector TopologySort( + const OpLoweringGroup& group) { + // NOTE(Aurelius84): Use simplest one-by-one order temporaly. + auto* block = group.GetParentBlock(); + std::vector ops; + ops.reserve(block->size()); + for (auto& op : *block) { + ops.push_back(&op); + } + return ops; +} + +} // namespace cinn::hlir::framework::pir diff --git a/paddle/cinn/hlir/framework/pir/fusion_info.h b/paddle/cinn/hlir/framework/pir/fusion_info.h new file mode 100644 index 0000000000000..477e6934319cf --- /dev/null +++ b/paddle/cinn/hlir/framework/pir/fusion_info.h @@ -0,0 +1,118 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/cinn/hlir/framework/pir/op_lowering_group.h" + +namespace cinn::hlir::framework::pir { + +class AttributeInfo { + public: + AttributeInfo(const std::string &name, const ::pir::Attribute &attr) + : name_(name), attr_(attr) {} + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const AttributeInfo &info); + + private: + std::string name_; + ::pir::Attribute attr_; +}; + +class ValueInfo { + public: + explicit ValueInfo(const ::pir::Value &value) : type_(value.type()) {} + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const ValueInfo &info); + + private: + // All value information is in TypeStorage. + ::pir::Type type_; +}; + +class OperationInfo { + public: + explicit OperationInfo(const ::pir::Operation &op); + + std::size_t hash() const; + friend std::ostream &operator<<(std::ostream &os, const OperationInfo &info); + + private: + std::string name_; + std::vector input_infos_; + std::vector output_infos_; + std::vector attr_infos_; +}; + +class FusionInfo { + using IntArgsMap = std::map; + + public: + explicit FusionInfo(const OpLoweringGroup &group); + FusionInfo() = delete; + FusionInfo(const FusionInfo &) = default; + FusionInfo(FusionInfo &&) = default; + + std::size_t hash() const; + + bool operator==(const FusionInfo &other) const { + return this->hash() == other.hash(); + } + friend std::ostream &operator<<(std::ostream &os, const FusionInfo &info); + + private: + std::vector op_infos_; + std::size_t cached_hash_value_{0}; +}; + +std::ostream &operator<<(std::ostream &os, const AttributeInfo &info); +std::ostream &operator<<(std::ostream &os, const ValueInfo &info); +std::ostream &operator<<(std::ostream &os, const OperationInfo &info); +std::ostream &operator<<(std::ostream &os, const FusionInfo &info); + +// See boost.hash_combine for details +template +inline void hash_combine(std::size_t &seed, // NOLINT + const T &v) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +std::size_t HashIntArgsMap( + const std::map &int_args_map); +std::ostream &operator<<( + std::ostream &os, + const std::map &int_args_map); +std::vector TopologySort( + const OpLoweringGroup &group); + +} // namespace cinn::hlir::framework::pir + +namespace std { +#define REGISTER_STD_HASH(class_name) \ + template <> \ + struct hash { \ + std::size_t operator()( \ + const cinn::hlir::framework::pir::class_name &obj) const { \ + return obj.hash(); \ + } \ + }; + +REGISTER_STD_HASH(AttributeInfo); +REGISTER_STD_HASH(ValueInfo); +REGISTER_STD_HASH(OperationInfo); +REGISTER_STD_HASH(FusionInfo) +} // namespace std diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc index 8799c84969a04..f9bfed7c92727 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.cc @@ -19,6 +19,113 @@ namespace hlir { namespace framework { namespace pir { +::pir::Program* OpLoweringGroup::GetParentProgram() const { + PADDLE_ENFORCE_GT(ops_.size(), + 0, + ::common::errors::PreconditionNotMet( + "Require at least one op in the group.")); + PADDLE_ENFORCE_NOT_NULL( + ops_[0], + ::common::errors::Unavailable("Found group.ops_[0] is nullptr.")); + return ops_[0]->GetParentProgram(); +} + +::pir::Block* OpLoweringGroup::GetParentBlock() const { + PADDLE_ENFORCE_GT(this->ops_.size(), + 0, + ::common::errors::PreconditionNotMet( + "Required at least one operation in OpLoweringGroup.")); + auto* block = this->ops_[0]->GetParent(); + PADDLE_ENFORCE_NOT_NULL( + block, + ::common::errors::Unavailable( + "Required inner op's parent block must not be nullptr.")); + for (size_t i = 1; i < this->ops_.size(); ++i) { + PADDLE_ENFORCE_EQ(this->ops_[0]->GetParent(), + block, + ::common::errors::PreconditionNotMet( + "Required all ops must belong into same block.")); + } + + return block; +} + +std::vector<::pir::Value> OpLoweringGroup::GetGroupOutputValues() const { + std::unordered_set<::pir::Operation*> group_ops_set(this->ops_.begin(), + this->ops_.end()); + + std::vector<::pir::Value> output_values; + for (auto* op : this->ops_) { + for (size_t i = 0; i < op->num_results(); ++i) { + auto result = op->result(i); + if (!result) { + continue; + } + for (auto use_iter = result.use_begin(); use_iter != result.use_end(); + ++use_iter) { + auto* use_op = use_iter->owner(); + if (group_ops_set.find(use_op) == group_ops_set.end()) { + output_values.push_back(result); + break; + } + } + } + } + return output_values; +} + +std::unordered_set<::pir::Value> OpLoweringGroup::GetInputOpValues() const { + std::unordered_set<::pir::Value> group_inputs; + std::unordered_set<::pir::Operation*> ops_set(this->ops_.begin(), + this->ops_.end()); + + // count all op's input Value + for (auto op : ops_set) { + for (auto& value : op->operands_source()) { + if (!value || !value.type() || ops_set.count(value.defining_op())) + continue; + // if the input value owner op is not in OpSet, it's the group's input + group_inputs.insert(value); + } + } + return group_inputs; +} + +std::unordered_set<::pir::Value> OpLoweringGroup::GetOutputOpValues() const { + std::unordered_set<::pir::Value> group_outputs; + + for (auto op : this->output_ops_) { + for (auto& result : op->results()) { + if (!result || result.type()) { + continue; + } + + group_outputs.insert(result); + } + } + return group_outputs; +} + +const symbol::ShapeOrDataDimExprs& OpLoweringGroup::GetShapeOrDataExprs( + const ::pir::Value& value) const { + PADDLE_ENFORCE_EQ(HasShapeOrDataExprs(value), + true, + ::common::errors::NotFound( + "value not found in value_to_shape_or_data_exprs_")); + return value_to_shape_or_data_exprs_.at(value); +} + +void OpLoweringGroup::SetShapeOrDataExprs( + const ::pir::Value& value, + const symbol::ShapeOrDataDimExprs& shape_or_data) { + auto iter = value_to_shape_or_data_exprs_.find(value); + if (iter == value_to_shape_or_data_exprs_.end()) { + value_to_shape_or_data_exprs_.emplace(value, shape_or_data); + } else { + iter->second = shape_or_data; + } +} + std::shared_ptr OpLoweringGroup::Clone( ::pir::Block* target_block, ::pir::IrMapping* ir_mapping) const { std::vector<::pir::Operation*> new_ops; @@ -46,7 +153,6 @@ std::shared_ptr OpLoweringGroup::Clone( new_group->input_names_ = this->input_names_; new_group->output_names_ = this->output_names_; - new_group->fn_name_ = this->fn_name_; new_group->int_args_map_ = this->int_args_map_; new_group->alignment_schedule_info_ = this->alignment_schedule_info_; new_group->reduce_axis_ = this->reduce_axis_; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_group.h b/paddle/cinn/hlir/framework/pir/op_lowering_group.h index aaa2f31f0a60c..bfaf843cdf5f0 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_group.h +++ b/paddle/cinn/hlir/framework/pir/op_lowering_group.h @@ -22,6 +22,7 @@ #include "paddle/cinn/common/context.h" #include "paddle/cinn/hlir/framework/op.h" #include "paddle/cinn/hlir/framework/pir/utils.h" +#include "paddle/common/enforce.h" #include "paddle/pir/include/core/builtin_type_interfaces.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" @@ -38,124 +39,34 @@ namespace framework { namespace pir { class OpLoweringGroup { public: - OpLoweringGroup() = default; OpLoweringGroup(const OpLoweringGroup&) = delete; OpLoweringGroup(OpLoweringGroup&&) = delete; explicit OpLoweringGroup(const std::vector<::pir::Operation*>& group_ops) - : ops_(group_ops) {} - - explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops) - : ops_(group_ops) {} - - struct SharedGroupHasher { - size_t operator()( - const std::shared_ptr& group) const noexcept { - return std::hash()(group->group_id()); - } - }; - struct SharedGroupComparator { - bool operator()( - const std::shared_ptr& first, - const std::shared_ptr& second) const noexcept { - return first->group_id() == second->group_id(); - } - }; - - std::vector<::pir::Value> GetGroupOutputValues() const { - std::unordered_set<::pir::Operation*> group_ops_set(this->ops_.begin(), - this->ops_.end()); - - std::vector<::pir::Value> output_values; - for (auto* op : this->ops_) { - for (size_t i = 0; i < op->num_results(); ++i) { - auto result = op->result(i); - if (!result) { - continue; - } - for (auto use_iter = result.use_begin(); use_iter != result.use_end(); - ++use_iter) { - auto* use_op = use_iter->owner(); - if (group_ops_set.find(use_op) == group_ops_set.end()) { - output_values.push_back(result); - break; - } - } - } - } - return output_values; - } - - std::unordered_set<::pir::Value> GetInputOpValues() const { - std::unordered_set<::pir::Value> group_inputs; - - std::unordered_set<::pir::Operation*> ops_set; - for (auto op : this->ops_) { - ops_set.insert(op); - } - - // count all op's input Value - for (auto op : this->ops_) { - for (auto& value : op->operands_source()) { - if (!value || !value.type()) { - continue; - } - - if (!ops_set.count(value.defining_op())) { - // if the input value owner op is not in OpSet, it's the group's input - group_inputs.insert(value); - continue; - } - } - } - - return group_inputs; + : ops_(group_ops) { + fn_name_ = CompatibleInfo::GroupOpsName(ops_); } - std::unordered_set<::pir::Value> GetOutputOpValues() const { - std::unordered_set<::pir::Value> group_outputs; - - for (auto op : this->output_ops_) { - for (auto& result : op->results()) { - if (!result || result.type()) { - continue; - } - - group_outputs.insert(result); - } - } - return group_outputs; - } - - std::string FuncName() const { - if (fn_name_ == "") { - // TODO(Aurelius84): Polish this implementation. - const_cast(this)->fn_name_ = - CompatibleInfo::GroupOpsName(ops_); - } - return this->fn_name_; + explicit OpLoweringGroup(std::initializer_list<::pir::Operation*> group_ops) + : ops_(group_ops) { + fn_name_ = CompatibleInfo::GroupOpsName(ops_); } + const std::string& FuncName() const { return this->fn_name_; } + ::pir::Block* GetParentBlock() const; + ::pir::Program* GetParentProgram() const; + std::vector<::pir::Value> GetGroupOutputValues() const; + std::unordered_set<::pir::Value> GetInputOpValues() const; + std::unordered_set<::pir::Value> GetOutputOpValues() const; const symbol::ShapeOrDataDimExprs& GetShapeOrDataExprs( - const ::pir::Value& value) const { - CHECK(value_to_shape_or_data_exprs_.count(value)) - << "value not found in value_to_shape_or_data_exprs_"; - return value_to_shape_or_data_exprs_.at(value); - } + const ::pir::Value& value) const; bool HasShapeOrDataExprs(const ::pir::Value& value) const { return value_to_shape_or_data_exprs_.count(value); } void SetShapeOrDataExprs(const ::pir::Value& value, - const symbol::ShapeOrDataDimExprs& shape_or_data) { - auto iter = value_to_shape_or_data_exprs_.find(value); - if (iter == value_to_shape_or_data_exprs_.end()) { - value_to_shape_or_data_exprs_.emplace(value, shape_or_data); - } else { - iter->second = shape_or_data; - } - } + const symbol::ShapeOrDataDimExprs& shape_or_data); void WalkOps(const std::function& VisitOp) const { for (const auto& op : ops_) { @@ -164,23 +75,17 @@ class OpLoweringGroup { } const std::vector<::pir::Operation*>& ops() const { return ops_; } - std::vector<::pir::Operation*>& mut_ops() { return ops_; } - void SetOps(const std::vector<::pir::Operation*>& new_ops) { ops_ = new_ops; } const std::vector& input_names() const { return this->input_names_; } - std::vector& mut_input_names() { return this->input_names_; } - const std::vector& output_names() const { return this->output_names_; } - std::vector& mut_output_names() { return this->output_names_; } - const std::vector<::pir::Value>& output_values() const { return this->output_values_; } @@ -188,22 +93,25 @@ class OpLoweringGroup { std::vector<::pir::Value>& mut_output_values() { return this->output_values_; } - const std::unordered_set<::pir::Operation*>& output_ops() const { return this->output_ops_; } - std::unordered_set<::pir::Operation*>& mut_output_ops() { return this->output_ops_; } std::shared_ptr mut_map_expr_ctx() { - CHECK_NOTNULL(map_expr_ctx_); + PADDLE_ENFORCE_NOT_NULL( + map_expr_ctx_, + ::common::errors::Unavailable("Required map_expr_ctx_ != nullptr.")); return map_expr_ctx_; } const adt::MapExprCtx& map_expr_ctx() const { - return *CHECK_NOTNULL(map_expr_ctx_); + PADDLE_ENFORCE_NOT_NULL( + map_expr_ctx_, + ::common::errors::Unavailable("Required map_expr_ctx_ != nullptr.")); + return *map_expr_ctx_; } void set_value_to_shape_or_data_exprs( @@ -285,6 +193,7 @@ class OpLoweringGroup { std::string group_id_{common::UniqName("group_")}; // op in this group std::vector<::pir::Operation*> ops_; + std::string fn_name_; // output ops of the group. std::unordered_set<::pir::Operation*> output_ops_; // op pattern kind. @@ -293,7 +202,6 @@ class OpLoweringGroup { std::vector input_names_; std::vector output_names_; std::vector<::pir::Value> output_values_; - std::string fn_name_{""}; std::map int_args_map_; alignment_schedule_info_t alignment_schedule_info_; diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc index eea87c639cc96..bab37b959ddfc 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc @@ -204,6 +204,7 @@ BucketLoweredFuncsWrapper OpLowererImpl::BucketLower( if (ops.size() == 1 && ops[0]->name() == "custom_call") { return {{{ir::Expr(1), LowerCustomCall(group)[0]}}, ir::LoweredFunc()}; } + std::vector group_func_arg_tensors; std::unordered_map<::pir::Value, ir::Tensor> tensor_map; // for some op, it will output more tmp value and regard as diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc b/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc index 8b97871211a55..deda666331f2f 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc +++ b/paddle/cinn/hlir/framework/pir/trivial_op_impl.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/cinn/hlir/framework/pir/trivial_op_impl.h" - #include +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/backend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/group_cluster.h" #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" #include "paddle/cinn/hlir/framework/compile_error.h" @@ -327,66 +329,17 @@ ir::Expr CreateExprWithNewComputeBody(const FusibleOp& fusible_op, return std::visit(Visitor(new_compute_body), fusible_op); } -FusionNode::FusionNode(FusibleOp fusible_op) : fusible_op(fusible_op) {} - -std::string FusionNode::GetTensorCounter() { - static int i = 0; - return std::to_string(i++); -} - -void FusionNode::replace_topo_structure_of_fused_nodes( - FusionNode* fused_up_node, FusionNode* fused_down_node) { - upstream.insert(fused_up_node->upstream.begin(), - fused_up_node->upstream.end()); - upstream.insert(fused_down_node->upstream.begin(), - fused_down_node->upstream.end()); - upstream.erase(fused_up_node); - - downstream.insert(fused_up_node->downstream.begin(), - fused_up_node->downstream.end()); - downstream.insert(fused_down_node->downstream.begin(), - fused_down_node->downstream.end()); - downstream.erase(fused_down_node); - - expr_related_op = fused_down_node->expr_related_op; - - for (const auto& pair_data : upstream) { - FusionNode* upstream_node = pair_data.first; - ::pir::Value related_value = pair_data.second; - if (upstream_node->downstream.find(fused_up_node) != - upstream_node->downstream.end()) { - upstream_node->downstream.erase(fused_up_node); - } - if (upstream_node->downstream.find(fused_down_node) != - upstream_node->downstream.end()) { - upstream_node->downstream.erase(fused_down_node); - } - upstream_node->downstream[this] = related_value; - } - - for (const auto& pair_data : downstream) { - FusionNode* downstream_node = pair_data.first; - ::pir::Value related_value = pair_data.second; - if (downstream_node->upstream.find(fused_up_node) != - downstream_node->upstream.end()) { - downstream_node->upstream.erase(fused_up_node); - } - if (downstream_node->upstream.find(fused_down_node) != - downstream_node->upstream.end()) { - downstream_node->upstream.erase(fused_down_node); - } - downstream_node->upstream[this] = related_value; - } -} +bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down) {} -bool FusionNode::IsTrivial() const { - return std::holds_alternative(fusible_op); +int GetTensorCounter() { + static int counter = 1; + return counter++; } -bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down) {} - -std::vector TransformReduceLoopRange(const ReduceOp& upstream, - FusibleOp* downstream) { +std::vector TransformReduceLoopRange( + const ReduceOp& upstream, + FusibleOp* downstream, + std::vector fake_reduce_iter_idx) { // downstream will be mutated by this transform. VLOG(4) << "RRTransform begin"; VLOG(4) << "RRTransform Upstream is \n" << _GetRootExpr(upstream); @@ -396,13 +349,22 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, modified_downstream_compute_body, GetOutputTensor(upstream)); std::vector results; ir::Tensor downstream_output_tensor = GetOutputTensor(*downstream); + + bool is_trivial_downstream = std::holds_alternative(*downstream); + const auto create_new_tensor = [&](const ir::Tensor& downstream_load_tensor) { VLOG(4) << "Create New Tensor Start"; ir::Tensor result = ir::Tensor( - downstream_load_tensor->name + "_" + FusionNode::GetTensorCounter(), + downstream_load_tensor->name + "_" + std::to_string(GetTensorCounter()), downstream_load_tensor->type(), - downstream_output_tensor->shape, - downstream_output_tensor->domain, + is_trivial_downstream + ? FilterWithFakeReduceIter(downstream_output_tensor->shape, + fake_reduce_iter_idx) + : downstream_output_tensor->shape, + is_trivial_downstream + ? FilterWithFakeReduceIter(downstream_output_tensor->domain, + fake_reduce_iter_idx) + : downstream_output_tensor->domain, GetOutputTensor(upstream)->operation, GetReduceIters(upstream)); result->WithBuffer(); @@ -414,7 +376,10 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, const auto& new_tensor = create_new_tensor(load_tensor.As()->tensor.as_tensor_ref()); ir::Expr new_reduce = CreateReduceExpr( - GetOutputIters(*downstream), + is_trivial_downstream + ? FilterWithFakeReduceIter(GetOutputIters(*downstream), + fake_reduce_iter_idx) + : GetOutputIters(*downstream), GetReduceIters(upstream), GetInitExpr(upstream), ComposeUtils::CopyedReplaceExpr(GetComputeBody(upstream), @@ -423,10 +388,16 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, new_tensor, GetOutputTensor(upstream)); results.emplace_back(ReduceOp(new_reduce)); + VLOG(4) << "After Tmp Transform, upstream is : \n" + << _GetRootExpr(results.back()); ExprTransformerUtils::ReplaceTarget( &modified_downstream_compute_body, load_tensor, - new_tensor(ComposeUtils::VarVec2ExprVec(GetOutputIters(*downstream)))); + new_tensor(ComposeUtils::VarVec2ExprVec( + is_trivial_downstream + ? FilterWithFakeReduceIter(GetOutputIters(*downstream), + fake_reduce_iter_idx) + : GetOutputIters(*downstream)))); } _SetFuncBody(*downstream, CreateExprWithNewComputeBody(*downstream, @@ -436,61 +407,52 @@ std::vector TransformReduceLoopRange(const ReduceOp& upstream, return results; } -FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream) { - CHECK(upstream->IsTrivial()); - if (downstream->IsTrivial()) { - return TrivalxOther_Fusion(std::get(upstream->fusible_op), - std::get(downstream->fusible_op)); - } else { - return TrivalxOther_Fusion(std::get(upstream->fusible_op), - std::get(downstream->fusible_op)); +FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, + ReduceOp reduce_op, + std::vector fake_reduce_iter_idx) { + VLOG(4) << "SinkTrivialLoopAlign"; + ir::Expr new_trivial_body = ir::ir_utils::IRCopy(trivial_op.GetFuncBody()); + std::vector all_out_iter_vars = GetOutputIters(trivial_op); + std::vector non_reduce_iter_vars = + FilterWithFakeReduceIter(all_out_iter_vars, fake_reduce_iter_idx); + std::vector fake_reduce_iter_vars; + for (const auto& idx : fake_reduce_iter_idx) { + fake_reduce_iter_vars.emplace_back( + all_out_iter_vars.at(static_cast(idx))); } -} -FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, ReduceOp reduce_op) { - ir::Expr new_trivial_body = ir::ir_utils::IRCopy(trivial_op.GetFuncBody()); - ir::Var last_iter = GetOutputIters(trivial_op).back(); - ir::Expr trivial_last_for = (ExprSetFinderUtils::ChildFors * - ExprSetFinderUtils::IsForIterVar(last_iter)) - .GetSingle(new_trivial_body); + VLOG(4) << "all_out_iter_vars: " + << cinn::utils::Join(all_out_iter_vars, ", "); + VLOG(4) << "non_reduce_iter_vars: " + << cinn::utils::Join(non_reduce_iter_vars, ", "); + VLOG(4) << "fake_reduce_iter_vars: " + << cinn::utils::Join(fake_reduce_iter_vars, ", "); + + ir::Expr trivial_last_for = + (ExprSetFinderUtils::ChildFors * + ExprSetFinderUtils::IsForIterVar(all_out_iter_vars.back())) + .GetSingle(new_trivial_body); ir::Expr new_for_body = trivial_last_for.As()->body; - new_for_body = ExprTransformerUtils::WrapForsTransformer( - GetReduceIters(reduce_op))(new_for_body); - trivial_last_for.As()->body = new_for_body; - return TrivialOp(new_trivial_body); -} -std::vector ReduceTransformRecursive(FusibleOp root_op, - FusionNode* fusion_tree) { - VLOG(4) << "ReduceTransformRecursive: " << *_GetFuncBodyPointer(root_op); - std::vector result; - for (auto& pair : fusion_tree->upstream) { - auto transformed_nodes = TransformReduceLoopRange( - std::get(pair.first->fusible_op), &root_op); - for (auto& node : transformed_nodes) { - auto child_flatten = ReduceTransformRecursive(node, pair.first); - result.insert(result.end(), child_flatten.begin(), child_flatten.end()); + const auto ExpandIterVars = [&]() { + std::vector result = + ComposeUtils::ConcatVector(non_reduce_iter_vars, fake_reduce_iter_vars); + auto upstream_reduce_iters = GetReduceIters(reduce_op); + if (fake_reduce_iter_vars.size() != upstream_reduce_iters.size()) { + result.insert(result.end(), + upstream_reduce_iters.begin(), + upstream_reduce_iters.end()); } - } - VLOG(4) << "Before push_back, is trivial_op: " - << std::holds_alternative(root_op); - result.push_back( - std::holds_alternative(root_op) - ? SinkTrivialLoopAlign( - std::get(root_op), - std::get( - fusion_tree->upstream.begin()->first->fusible_op)) - : root_op); - VLOG(4) << "After push_back."; - return result; -} + VLOG(4) << "ExpandIterVars: " << cinn::utils::Join(result, ", "); + return result; + }; -std::vector ReduceTransform(FusionNode* downstream) { - if (downstream->IsTrivial() && downstream->upstream.empty()) { - return {downstream->fusible_op}; - } - auto reduces = ReduceTransformRecursive(downstream->fusible_op, downstream); - return reduces; + ir::Expr new_schedule_realizer = + (ExprTransformerUtils::WrapForsTransformer(ExpandIterVars()) * + ExprTransformerUtils::WrapScheduleRealizer({}, "root"))(new_for_body); + + VLOG(4) << "new_schedule_realizer\n" << new_schedule_realizer; + return TrivialOp(new_schedule_realizer); } FusibleOp CreateFusibleOp(ir::Expr compute_body, OpPatternKind op_pattern) { @@ -512,77 +474,6 @@ std::vector FilterVector(const std::vector& ops, const F& f) { return res; } -FusionGraph::FusionGraph(const std::vector<::pir::Operation*>& ops, - const std::vector& op_compute_bodies) { - // shardable_axes_ = InferShardableAxes(ops); - VLOG(4) << "CreateFusionGraph"; - const auto& filtered_ops = FilterVector(ops, [](const ::pir::Operation* op) { - if (op->name() == "cinn_op.generate_shape") { - return false; - } - return true; - }); - const auto& op_patterns = GetOpPatternKindVector(filtered_ops); - CheckFusionInputValid(op_compute_bodies, op_patterns); - - std::unordered_map<::pir::Operation*, FusionNode*> op_to_node_map; - - for (int i = 0; i < filtered_ops.size(); ++i) { - FusionNode* node = - new FusionNode(CreateFusibleOp(op_compute_bodies[i], op_patterns[i])); - op_to_node_map[filtered_ops[i]] = node; - all_fusion_nodes_.emplace(node); - node->expr_related_op = filtered_ops[i]; - } - - for (::pir::Operation* op : filtered_ops) { - FusionNode* cur_node = op_to_node_map[op]; - - // add upstream nodes - for (int i = 0; i < op->num_operands(); ++i) { - ::pir::Value related_value = op->operand_source(i); - ::pir::Operation* input_op = related_value.defining_op(); - if (op_to_node_map.find(input_op) != op_to_node_map.end()) { - FusionNode* upstream_node = op_to_node_map[input_op]; - cur_node->upstream[upstream_node] = related_value; - upstream_node->downstream[cur_node] = related_value; - } - } - - // add downstream nodes - for (int i = 0; i < op->num_results(); ++i) { - ::pir::Value related_value = op->result(i); - for (auto consumer_it = related_value.use_begin(); - consumer_it != related_value.use_end(); - ++consumer_it) { - ::pir::Operation* output_op = consumer_it->owner(); - if (op_to_node_map.find(output_op) != op_to_node_map.end()) { - FusionNode* downstream_node = op_to_node_map[output_op]; - cur_node->downstream[downstream_node] = related_value; - downstream_node->upstream[cur_node] = related_value; - } - } - } - - if (cur_node->upstream.empty()) { - entrance_nodes_.emplace(cur_node); - } - - if (cur_node->downstream.empty()) { - exit_nodes_.emplace(cur_node); - } - } - - VLOG(4) << "FusionGraph Created, fusion node size: " - << all_fusion_nodes_.size(); -} - -FusionGraph::~FusionGraph() { - for (FusionNode* node : all_fusion_nodes_) { - delete node; - } -} - std::vector GetShapeFromVars(const std::vector& vars) { std::vector res; for (const auto& v : vars) { @@ -605,175 +496,90 @@ void DebugPrintReduceVar(const FusibleOp& op) { } } -void FusionGraph::SplitReduceTransform() { - VLOG(4) << "SplitReduceTransform Start."; - std::vector result; - for (const auto& fop : fusion_results_) { - if (std::holds_alternative(fop)) { - VLOG(4) << "DebugPrint Op Origin: "; - ReduceOp reduce_op = std::get(fop); - ir::Tensor reduce_out_tensor = GetOutputTensor(reduce_op); - // substitude compute_body with a new init value. - ir::Expr trivial_compute_body = - ExprTransformerUtils::ChangeTensorLoadTransformer( - GetOutputTensor(fop), - GetInitExpr(reduce_op))(GetComputeBody(reduce_op)); - - const std::vector& all_iters = ComposeUtils::ConcatVector( - GetOutputIters(reduce_op), GetReduceIters(reduce_op)); - VLOG(4) << "Trivial Compute Body is " << trivial_compute_body; - ir::Tensor new_trivial_tensor = - ir::Tensor(reduce_out_tensor->name + "_split_transform", - reduce_out_tensor->type(), +std::pair SplitReduceOp(const ReduceOp& reduce_op) { + VLOG(4) << "DebugPrint Op Origin: "; + ir::Tensor reduce_out_tensor = GetOutputTensor(reduce_op); + // substitude compute_body with a new init value. + ir::Expr trivial_compute_body = + ExprTransformerUtils::ChangeTensorLoadTransformer( + GetOutputTensor(reduce_op), + GetInitExpr(reduce_op))(GetComputeBody(reduce_op)); + + const std::vector& all_iters = ComposeUtils::ConcatVector( + GetOutputIters(reduce_op), GetReduceIters(reduce_op)); + VLOG(4) << "Trivial Compute Body is " << trivial_compute_body; + ir::Tensor new_trivial_tensor = + ir::Tensor(reduce_out_tensor->name + "_split_transform", + reduce_out_tensor->type(), + GetShapeFromVars(all_iters), + GetShapeFromVars(all_iters), + ir::ComputeOp::Make( + reduce_out_tensor->name + "_split_transform", + [body = trivial_compute_body]( + const std::vector& indices) { return body; }, GetShapeFromVars(all_iters), GetShapeFromVars(all_iters), - ir::ComputeOp::Make( - reduce_out_tensor->name + "_split_transform", - [body = trivial_compute_body]( - const std::vector& indices) { return body; }, - GetShapeFromVars(all_iters), - GetShapeFromVars(all_iters), - {}), - {}); - new_trivial_tensor->WithBuffer(); - VLOG(4) << "Created Tensor is: " << new_trivial_tensor; - VLOG(4) << "Load Expr is: " - << new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters)); - - // push trivial op - VLOG(4) << "Splited TrivialOp is " - << CreateTrivialExpr( - all_iters, trivial_compute_body, new_trivial_tensor); - - result.emplace_back(TrivialOp(CreateTrivialExpr( - all_iters, trivial_compute_body, new_trivial_tensor))); - - // push reduce op, change compute_body to - VLOG(4) - << "WrapReduceOperation start: with reduce_type: " + {}), + {}); + new_trivial_tensor->WithBuffer(); + VLOG(4) << "Created Tensor is: " << new_trivial_tensor; + VLOG(4) << "Load Expr is: " + << new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters)); + + // push trivial op + VLOG(4) << "Splited TrivialOp is " + << CreateTrivialExpr( + all_iters, trivial_compute_body, new_trivial_tensor); + + const auto& result_trivial = TrivialOp( + CreateTrivialExpr(all_iters, trivial_compute_body, new_trivial_tensor)); + + // push reduce op, change compute_body to + VLOG(4) << "WrapReduceOperation start: with reduce_type: " << GetOutputTensor(reduce_op)->body().As()->reduce_type; - VLOG(4) << "WrapReduceOperation new_trivial_tensor: " - << new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters)); - const ir::Expr& new_reduce_body = - ExprTransformerUtils::WrapReduceOperation( - GetOutputTensor(reduce_op)->body().As()->reduce_type, - GetOutputTensor(reduce_op), - ComposeUtils::VarVec2ExprVec(GetOutputIters(reduce_op)))( - new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters))); - VLOG(4) << "Splited ReduceOp body is " << new_reduce_body; - VLOG(4) << "Splited ReduceOp is " - << CreateExprWithNewComputeBody( - fop, - ExprSetFinderUtils::Store2Value.GetSingle( - new_reduce_body)); - result.emplace_back(ReduceOp(CreateExprWithNewComputeBody( - fop, ExprSetFinderUtils::Store2Value.GetSingle(new_reduce_body)))); - } else { - result.emplace_back(fop); - } - } - fusion_results_ = result; + VLOG(4) << "WrapReduceOperation new_trivial_tensor: " + << new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters)); + const ir::Expr& new_reduce_body = ExprTransformerUtils::WrapReduceOperation( + GetOutputTensor(reduce_op)->body().As()->reduce_type, + GetOutputTensor(reduce_op), + ComposeUtils::VarVec2ExprVec(GetOutputIters(reduce_op)))( + new_trivial_tensor(ComposeUtils::VarVec2ExprVec(all_iters))); + VLOG(4) << "Splited ReduceOp body is " << new_reduce_body; + VLOG(4) << "Splited ReduceOp is " + << CreateExprWithNewComputeBody( + reduce_op, + ExprSetFinderUtils::Store2Value.GetSingle(new_reduce_body)); + const auto& result_reduce = ReduceOp(CreateExprWithNewComputeBody( + reduce_op, ExprSetFinderUtils::Store2Value.GetSingle(new_reduce_body))); VLOG(4) << "SplitReduceTransform End~"; + return std::make_pair(result_trivial, result_reduce); } -std::vector FusionGraph::DoFusion() { - VLOG(4) << "Start Trivial Fusion"; - DoTrivialFusion(); - VLOG(4) << "Start R + T and R + R Fusion"; - ReduceLoopTranform(); - // TODO(@xubin): remove this when backend support arbitrary reduce. - VLOG(4) << "Split Reduce Transform into a tmp tensor to keep reduce clean."; - SplitReduceTransform(); - return GetExprResults(); -} - -FusionNode* FusionGraph::FindTrivialFusibleNode() { - for (FusionNode* node : all_fusion_nodes_) { - if (node->IsTrivial() && !node->downstream.empty()) { - return node; - } - } - return nullptr; -} - -void FusionGraph::DoTrivialFusion() { - FusionNode* upstream = nullptr; - // use funcion to get upstream and downstream is save here - // cause we might delete Nodes in this process - while ((upstream = FindTrivialFusibleNode()) != nullptr) { - std::unordered_map fusion_candidate = - upstream->downstream; - upstream->downstream.clear(); - for (const auto& pair_data : fusion_candidate) { - FusionNode* downstream = pair_data.first; - FusionNode* new_node = - new FusionNode(TrivialFusion(upstream, downstream)); - new_node->replace_topo_structure_of_fused_nodes(upstream, downstream); - AppendNode(new_node); - RemoveNode(downstream); - } - RemoveNode(upstream); - } -} - -void FusionGraph::ReduceLoopTranform() { - for (FusionNode* node : exit_nodes_) { - auto fusion_nodes = ReduceTransform(node); - fusion_results_.insert( - fusion_results_.end(), fusion_nodes.begin(), fusion_nodes.end()); - } -} - -std::vector FusionGraph::GetExprResults() { - std::vector output_exprs; - for (const auto& node : fusion_results_) { - output_exprs.emplace_back(_GetRootExpr(node)); - } - return output_exprs; -} - -void FusionGraph::RemoveNode(FusionNode* node) { - if (all_fusion_nodes_.find(node) != all_fusion_nodes_.end()) { - all_fusion_nodes_.erase(node); - } - if (entrance_nodes_.find(node) != entrance_nodes_.end()) { - entrance_nodes_.erase(node); - } - if (exit_nodes_.find(node) != exit_nodes_.end()) { - exit_nodes_.erase(node); - } - delete node; -} - -void FusionGraph::AppendNode(FusionNode* node) { - all_fusion_nodes_.emplace(node); - if (node->upstream.empty()) { - entrance_nodes_.emplace(node); - } +} // namespace trivial_fusion_detail - if (node->downstream.empty()) { - exit_nodes_.emplace(node); - } -} +std::vector OperationFusion( + const std::vector<::pir::Operation*>& original_ops, + const std::vector& op_compute_bodies) { + const auto& ops = trivial_fusion_detail::FilterVector( + original_ops, [](const ::pir::Operation* op) { + if (op->name() == "cinn_op.generate_shape") { + return false; + } + return true; + }); -FusionNode* FusionGraph::FindReduceUpstream(FusionNode* node) { - for (const auto& pair_data : node->upstream) { - FusionNode* upstream = pair_data.first; - if (!upstream->IsTrivial()) { - return upstream; - } + std::vector contents; + for (int i = 0; i < ops.size(); i++) { + contents.emplace_back(ops[i], op_compute_bodies[i]); + // contents.emplace_back(ops[i]); } - return nullptr; -} + const auto& fusion_nodes = + cinn::fusion::ClusterOps(contents); -} // namespace trivial_fusion_detail + CHECK(fusion_nodes.size() == 1) + << "Only support one fusion node in backend now."; -std::vector OperationFusion( - const std::vector<::pir::Operation*>& ops, - const std::vector& op_compute_bodies) { - trivial_fusion_detail::FusionGraph graph = - trivial_fusion_detail::FusionGraph(ops, op_compute_bodies); - auto output = graph.DoFusion(); + const auto& output = GetExprFromPattern(fusion_nodes[0]->stmt_pattern_); VLOG(4) << "Fusion Result: output size is " << output.size(); for (const auto& expr : output) { VLOG(4) << expr; diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_impl.h b/paddle/cinn/hlir/framework/pir/trivial_op_impl.h index f5964ad854848..48eddf4852870 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_impl.h +++ b/paddle/cinn/hlir/framework/pir/trivial_op_impl.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" @@ -28,6 +29,7 @@ #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/ir/schedule/ir_schedule_util.h" #include "paddle/cinn/lang/placeholder.h" +#include "paddle/cinn/operator_fusion/pattern_graph.h" #include "paddle/cinn/optim/schedule_block_dce.h" #include "paddle/cinn/optim/transform_gpu_forloop.h" #include "paddle/common/ddim.h" @@ -92,6 +94,8 @@ ir::Expr* _GetFuncBodyPointer(FusibleOp op); ir::Expr CopyReduceBody(const FusibleOp& downstream, const ReduceOp& upstream); +int GetTensorCounter(); + ir::Expr CreateReduceExpr( const std::vector& output_iters, const std::vector& reduce_iters, @@ -103,23 +107,13 @@ ir::Expr CreateReduceExpr( ir::Expr CreateTrivialExpr(const std::vector& output_iters, const ir::Expr& function_body, const ir::Tensor& new_write_tensor); + ir::Expr CreateExprWithNewComputeBody(const FusibleOp& fusible_op, const ir::Expr& new_compute_body); -struct FusionNode { - FusibleOp fusible_op; - ::pir::Operation* expr_related_op; - - std::unordered_map upstream; - std::unordered_map downstream; - - explicit FusionNode(FusibleOp fusible_op); - static std::string GetTensorCounter(); - void replace_topo_structure_of_fused_nodes(FusionNode* fused_up_node, - FusionNode* fused_down_node); +bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down); - bool IsTrivial() const; -}; +FusibleOp CreateFusibleOp(ir::Expr compute_body, OpPatternKind op_pattern); template DownStreamOp TrivalxOther_Fusion(TrivialOp upstream, DownStreamOp downstream) { @@ -142,54 +136,30 @@ DownStreamOp TrivalxOther_Fusion(TrivialOp upstream, DownStreamOp downstream) { return DownStreamOp(modified_body); } -bool CheckAllLoopRangeEq(ReduceOp reduce_upper, TrivialOp trivial_down); - -std::vector TransformReduceLoopRange(const ReduceOp& upstream, - FusibleOp* downstream); - -FusibleOp TrivialFusion(FusionNode* upstream, FusionNode* downstream); - -FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, ReduceOp reduce_op); - -std::vector ReduceTransformRecursive(FusibleOp root_op, - FusionNode* fusion_tree); -std::vector ReduceTransform(FusionNode* downstream); - -FusibleOp CreateFusibleOp(ir::Expr compute_body, OpPatternKind op_pattern); - -struct FusionGraph { - explicit FusionGraph(const std::vector<::pir::Operation*>& ops, - const std::vector& op_compute_bodies); - - ~FusionGraph(); - - std::vector DoFusion(); - - private: - FusionNode* FindTrivialFusibleNode(); - - void DoTrivialFusion(); - - void ReduceLoopTranform(); - - void SplitReduceTransform(); - - std::vector GetExprResults(); - - void RemoveNode(FusionNode* node); - - void AppendNode(FusionNode* node); - - FusionNode* FindReduceUpstream(FusionNode* node); - - private: - std::unordered_set all_fusion_nodes_; - std::vector fusion_results_; - std::unordered_set entrance_nodes_; - std::unordered_set exit_nodes_; +std::pair SplitReduceOp(const ReduceOp& reduce_op); + +std::vector TransformReduceLoopRange( + const ReduceOp& upstream, + FusibleOp* downstream, + std::vector fake_reduce_iter_idx); + +template +std::vector FilterWithFakeReduceIter( + const std::vector& input, std::vector fake_reduce_iter_idx) { + std::vector result; + for (size_t i = 0; i < input.size(); i++) { + if (std::find(fake_reduce_iter_idx.begin(), + fake_reduce_iter_idx.end(), + i) == fake_reduce_iter_idx.end()) { + result.emplace_back(input.at(i)); + } + } + return result; +} - // std::unordered_map<::pir::Value, ShardableAxes> shardable_axes_; -}; +FusibleOp SinkTrivialLoopAlign(TrivialOp trivial_op, + ReduceOp reduce_op, + std::vector fake_reduce_iter_idx); } // namespace trivial_fusion_detail diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_util.cc b/paddle/cinn/hlir/framework/pir/trivial_op_util.cc index 9b776aae4e454..c930aa8a8fd95 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_util.cc +++ b/paddle/cinn/hlir/framework/pir/trivial_op_util.cc @@ -502,7 +502,7 @@ void CheckFusionInputValid(const std::vector& op_compute_bodies, const std::vector& op_patterns) { if (VLOG_IS_ON(4)) { for (const auto& func : op_compute_bodies) { - VLOG(4) << "TrivialOpFusion: {FuncBody is} :" << func; + VLOG(4) << "FuncBody is :" << func; } for (const auto& op_ptn : op_patterns) { VLOG(4) << "OpPattern is :" << op_ptn; diff --git a/paddle/cinn/hlir/framework/pir/trivial_op_util.h b/paddle/cinn/hlir/framework/pir/trivial_op_util.h index e28cad31310f7..9dbddc6ada18c 100644 --- a/paddle/cinn/hlir/framework/pir/trivial_op_util.h +++ b/paddle/cinn/hlir/framework/pir/trivial_op_util.h @@ -46,6 +46,18 @@ std::vector ConcatVector(const std::vector& first, return result; } +template +std::unordered_map MakeMap(const std::vector& keys, + const std::vector& values) { + std::unordered_map result = std::unordered_map(); + + CHECK(keys.size() == values.size()); + for (int i = 0; i < keys.size(); i++) { + result[keys[i]] = values[i]; + } + return result; +} + std::vector ExprVec2VarVec(const std::vector& in); std::vector VarVec2ExprVec(const std::vector& in); diff --git a/paddle/cinn/hlir/framework/pir/utils.cc b/paddle/cinn/hlir/framework/pir/utils.cc index 4d20fbf382fe6..1bc39aee5370f 100644 --- a/paddle/cinn/hlir/framework/pir/utils.cc +++ b/paddle/cinn/hlir/framework/pir/utils.cc @@ -37,6 +37,7 @@ PD_DECLARE_string(allow_cinn_ops); PD_DECLARE_string(deny_cinn_ops); +COMMON_DECLARE_bool(disable_dyshape_in_train); namespace cinn { namespace hlir { @@ -125,23 +126,23 @@ class OpTransInfo { DeParamCondT deny_param_cond_{{"batch_norm", {"ReserveSpace"}}, {"batch_norm_grad", {"ReserveSpace"}}}; - std::unordered_set default_deny_ops_{ - "feed", - "fetch", - "conv2d", - "conv2d_grad", - "depthwise_conv2d", - "depthwise_conv2d_grad", - "dropout", - "pool2d", - "pool2d_grad", - "split", - "matmul", - "matmul_grad", - "embedding_grad", - "embedding", - "arange", - }; + std::unordered_set default_deny_ops_{"feed", + "fetch", + "conv2d", + "conv2d_grad", + "depthwise_conv2d", + "depthwise_conv2d_grad", + "dropout", + "pool2d", + "pool2d_grad", + "split", + "matmul", + "matmul_grad", + "embedding_grad", + "embedding", + "arange", + "softmax", + "randint"}; }; std::string OpNameAfterStripDialect(const ::pir::Operation& op) { @@ -173,12 +174,7 @@ bool UnimplementOps(const ::pir::Operation& op) { return false; } -bool HaveZeroDimInput(const ::pir::Operation& op) { - auto HasZeroDim = [](const ::pir::Type& type) { - auto tensor_type = type.dyn_cast<::pir::DenseTensorType>(); - return tensor_type && tensor_type.dims().size() == 0U; - }; - +bool HaveUnkDim(const ::pir::Operation& op) { auto HasNegDim = [](const ::pir::Type& type) { auto tensor_type = type.dyn_cast<::pir::DenseTensorType>(); @@ -194,9 +190,9 @@ bool HaveZeroDimInput(const ::pir::Operation& op) { }; // Judge for vector - auto HasZeroDimInVT = [&](const std::vector<::pir::Type>& types) { + auto HasUnkDimInVT = [&](const std::vector<::pir::Type>& types) { for (auto& type : types) { - if (HasZeroDim(type)) return true; + if (HasNegDim(type)) return true; } return false; }; @@ -205,8 +201,18 @@ bool HaveZeroDimInput(const ::pir::Operation& op) { auto value = op.operand_source(i); if (!value || !value.type()) continue; if (auto vector_type = value.type().dyn_cast<::pir::VectorType>()) { - if (HasZeroDimInVT(vector_type.data())) return true; - } else if (HasZeroDim(value.type()) || HasNegDim(value.type())) { + if (HasUnkDimInVT(vector_type.data())) return true; + } else if (HasNegDim(value.type())) { + return true; + } + } + + for (size_t i = 0; i < op.num_results(); ++i) { + auto value = op.result(i); + if (!value || !value.type()) continue; + if (auto vector_type = value.type().dyn_cast<::pir::VectorType>()) { + if (HasUnkDimInVT(vector_type.data())) return true; + } else if (HasNegDim(value.type())) { return true; } } @@ -323,22 +329,21 @@ bool IsTempDenySpecialOp(const ::pir::Operation& op) { if (op.name() == "cinn_op.generate_shape") { return false; } - return IsShapeComputeOp(op) || IsSmallNumelOp(op); + return IsShapeComputeOp(op); } // Mainly used for pd_to_cinn_pass and reused in IsSupportInCinn function. bool IsDeniedInCinn(const ::pir::Operation& op) { + if (FLAGS_disable_dyshape_in_train && HaveUnkDim(op)) { + return true; + } if (!AllInputDenseTensor(op) || UnimplementOps(op)) { VLOG(5) << "Found " << op.name() << " UnimplementOps or NotAllInputDenseTensor. " << "So mark IsDeniedForCinn: " << true; return true; } - if (IsTempDenySpecialOp(op)) { - VLOG(5) << "Found " << op.name() << " is in TempDenySpecialOp." - << "So mark IsDeniedForCinn: " << true; - return true; - } + // Strip the dialect, like pd_op.abs -> abs const auto op_name = OpNameAfterStripDialect(op); const bool is_denied = OpTransInfo().IsDeniedByDefault(op_name); @@ -423,12 +428,12 @@ std::string CompatibleInfo::OpFuncName(const ::pir::Operation& op) { std::string CompatibleInfo::GroupOpsName( const std::vector<::pir::Operation*>& ops) { - std::string name = "fn"; + std::string name = "fn_"; for (auto* op : ops) { - std::string op_name = OpName(*op); - name += "_" + cinn::common::Context::Global().NewName(op_name); + name += OpName(*op); + name += "_"; } - return name; + return cinn::common::Context::Global().NewName(name); } std::string CompatibleInfo::ValueName(const ::pir::Value& value) { diff --git a/paddle/cinn/hlir/framework/pir_compiler.cc b/paddle/cinn/hlir/framework/pir_compiler.cc index 2db39508ce1e1..73f2d11f3e1b4 100644 --- a/paddle/cinn/hlir/framework/pir_compiler.cc +++ b/paddle/cinn/hlir/framework/pir_compiler.cc @@ -16,23 +16,128 @@ #include "paddle/cinn/hlir/framework/pir/utils.h" #include "paddle/cinn/utils/multi_threading.h" +#include "paddle/common/enforce.h" +#include "paddle/common/flags.h" + +PD_DECLARE_bool(enable_cinn_compile_cache); namespace cinn::hlir::framework { +class CompilationContextMapper { + public: + CompilationContextMapper(const Target& target, + const std::vector& groups) { + Construct(target, groups); + } + std::vector& UniqueCompilationContexts() { + return group_compilation_contexts_; + } + std::vector>& + MutableCompilationResult() { + return compilation_results_; + } + + std::vector RecoverKernelInfos(); + void UpdateGlobalCache(); + void SetFinalize(bool val) { is_finalized_ = val; } + + private: + void Construct(const Target& target, + const std::vector& groups); + std::vector mapper_index_; + std::vector fusion_infos_; + std::vector group_compilation_contexts_; + std::vector> compilation_results_; + + bool is_finalized_{false}; +}; + std::vector PirCompiler::Build( const std::vector& groups) { - std::vector kernel_infos(groups.size()); - for (int i = 0; i < groups.size(); ++i) { - group_compilation_contexts_.emplace_back(target_, groups[i]); + CompilationContextMapper ctx_mapper(target_, groups); + auto& group_compilation_contexts = ctx_mapper.UniqueCompilationContexts(); + auto& compilation_results = ctx_mapper.MutableCompilationResult(); + + const size_t task_size = group_compilation_contexts.size(); + const size_t thread_size = FLAGS_enable_cinn_compile_cache ? task_size : 1; + VLOG(5) << "Found " << task_size << " new groups parsed from " + << groups.size(); + if (task_size > 0) { + auto worker_fn = [&](int index) { + CompilationTask task(&group_compilation_contexts[index]); + compilation_results[index] = task(); + }; + utils::parallel_run(worker_fn, + utils::SequenceDispatcher(0, task_size), + /*thread_num=*/thread_size); } - auto worker_fn = [&](int index) { - CompilationTask task(&group_compilation_contexts_[index]); - task(); - kernel_infos[index] = task.GetCINNKernelInfo(); + ctx_mapper.SetFinalize(true); + ctx_mapper.UpdateGlobalCache(); + return ctx_mapper.RecoverKernelInfos(); +} + +void CompilationContextMapper::Construct( + const Target& target, const std::vector& groups) { + std::unordered_set unique_infos; + const auto IsNewAndUnique = + [&unique_infos](const pir::FusionInfo& info) -> bool { + const bool is_unique = unique_infos.find(info.hash()) == unique_infos.end(); + const bool is_new = !CompilationCache::Instance().Has(info); + return is_new && is_unique; }; - utils::parallel_run( - worker_fn, utils::SequenceDispatcher(0, groups.size()), -1); + + for (size_t i = 0; i < groups.size(); ++i) { + fusion_infos_.emplace_back(*groups[i]); + // If FLAGS_enable_cinn_compile_cache=False, Cache strategy will not take + // effects. + if (IsNewAndUnique(fusion_infos_[i]) || !FLAGS_enable_cinn_compile_cache) { + mapper_index_.push_back(i); + group_compilation_contexts_.emplace_back(target, groups[i]); + compilation_results_.push_back( + std::make_shared(target)); + } + unique_infos.insert(fusion_infos_[i].hash()); + } +} + +std::vector +CompilationContextMapper::RecoverKernelInfos() { + PADDLE_ENFORCE_EQ( + is_finalized_, + true, + ::common::errors::PreconditionNotMet( + "Required is_finalized_ = true, please call SetFinalize() firstly.")); + PADDLE_ENFORCE_EQ(group_compilation_contexts_.size(), + compilation_results_.size(), + ::common::errors::PreconditionNotMet( + "Required group_compilation_contexts_.size() = " + "compilation_results_.size().")); + + std::vector kernel_infos(fusion_infos_.size()); + for (size_t i = 0; i < fusion_infos_.size(); ++i) { + kernel_infos[i] = + CompilationCache::Instance().GetKernelInfo(fusion_infos_[i]); + } return kernel_infos; } +void CompilationContextMapper::UpdateGlobalCache() { + PADDLE_ENFORCE_EQ( + is_finalized_, + true, + ::common::errors::PreconditionNotMet( + "Required is_finalized_ = true, please call SetFinalize() firstly.")); + for (size_t i = 0; i < compilation_results_.size(); ++i) { + PADDLE_ENFORCE_LT(mapper_index_[i], + fusion_infos_.size(), + ::common::errors::PreconditionNotMet( + "Required mapper_index < fusion_infos_.size().")); + const auto& fusion_info = fusion_infos_[mapper_index_[i]]; + const auto& int_args_map = + compilation_results_[i]->GetBackendResource()->GetIntArgsMap(); + VLOG(5) << "Insert new compiled result into cache, fusion_info: " + << fusion_info << ", int_args_map: " << int_args_map; + CompilationCache::Instance().Insert(fusion_info, compilation_results_[i]); + } +} } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/pir_compiler.h b/paddle/cinn/hlir/framework/pir_compiler.h index d9429b76a6fa8..9ea83defa0cb9 100644 --- a/paddle/cinn/hlir/framework/pir_compiler.h +++ b/paddle/cinn/hlir/framework/pir_compiler.h @@ -31,7 +31,6 @@ class PirCompiler final { CINN_DISALLOW_COPY_AND_ASSIGN(PirCompiler); Target target_; - std::vector group_compilation_contexts_; }; } // namespace cinn::hlir::framework diff --git a/paddle/cinn/hlir/framework/program.cc b/paddle/cinn/hlir/framework/program.cc index 0e00795ae775d..dd8d8aba91da0 100644 --- a/paddle/cinn/hlir/framework/program.cc +++ b/paddle/cinn/hlir/framework/program.cc @@ -169,6 +169,33 @@ void Program::Export(const std::vector& persistent_vars, fclose(f); } +void DeviceSynchronizeImpl(common::UnknownArch, void* stream) { + LOG(FATAL) << "NotImplemented."; +} + +void DeviceSynchronizeImpl(common::X86Arch, void* stream) { + // Do nothing. +} + +void DeviceSynchronizeImpl(common::ARMArch, void* stream) { + // Do nothing. +} + +void DeviceSynchronizeImpl(common::NVGPUArch, void* stream) { +#ifdef CINN_WITH_CUDA + VLOG(4) << "-- The value of the used stream: " << stream; + if (stream == nullptr) { + CUDA_CALL(cudaDeviceSynchronize()); + } +#endif +} + +void DeviceSynchronize(common::Arch arch, void* stream) { + return std::visit( + [&](const auto& impl) { return DeviceSynchronizeImpl(impl, stream); }, + arch.variant()); +} + void Program::Execute( const std::map* name2podargs, void* stream, @@ -176,12 +203,7 @@ void Program::Execute( for (auto& ins : instrs_) { ins->Run(name2podargs, false, stream, use_cache); } -#ifdef CINN_WITH_CUDA - VLOG(4) << "-- The value of the used stream: " << stream; - if (instrs_[0]->target_.arch == Target::Arch::NVGPU && stream == nullptr) { - CUDA_CALL(cudaDeviceSynchronize()); - } -#endif + DeviceSynchronize(instrs_[0]->target_.arch, stream); } void Program::ExecuteTest(int repeat_) { @@ -197,11 +219,7 @@ void Program::ExecuteTest(int repeat_) { ins->Run(); } } -#ifdef CINN_WITH_CUDA - if (instrs_[0]->target_.arch == Target::Arch::NVGPU) { - CUDA_CALL(cudaDeviceSynchronize()); - } -#endif + DeviceSynchronize(instrs_[0]->target_.arch, nullptr); double test_op_time = timer1.Stop() / repeat_; VLOG(3) << "Repeat times: [" << repeat_ << "], average op time: [" << test_op_time << "] ms"; diff --git a/paddle/cinn/hlir/op/contrib/argmax.cc b/paddle/cinn/hlir/op/contrib/argmax.cc index b3c6a647c4bc3..f1ccccd61d7c4 100644 --- a/paddle/cinn/hlir/op/contrib/argmax.cc +++ b/paddle/cinn/hlir/op/contrib/argmax.cc @@ -184,7 +184,7 @@ std::shared_ptr StrategyForArgmax( output_shapes[0].end(), 1, std::multiplies()); - if (prod_size > 1 && target.arch == Target::Arch::X86) { + if (prod_size > 1 && std::holds_alternative(target.arch)) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } std::vector res{ diff --git a/paddle/cinn/hlir/op/contrib/argmin.cc b/paddle/cinn/hlir/op/contrib/argmin.cc index dff137f0d9952..798f420cc76fc 100644 --- a/paddle/cinn/hlir/op/contrib/argmin.cc +++ b/paddle/cinn/hlir/op/contrib/argmin.cc @@ -182,7 +182,7 @@ std::shared_ptr StrategyForArgmin( output_shapes[0].end(), 1, std::multiplies()); - if (prod_size > 1 && target.arch == Target::Arch::X86) { + if (prod_size > 1 && std::holds_alternative(target.arch)) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } std::vector res{ diff --git a/paddle/cinn/hlir/op/contrib/gather_nd.cc b/paddle/cinn/hlir/op/contrib/gather_nd.cc index 8080cabb83609..92ba839f17211 100644 --- a/paddle/cinn/hlir/op/contrib/gather_nd.cc +++ b/paddle/cinn/hlir/op/contrib/gather_nd.cc @@ -187,11 +187,17 @@ std::shared_ptr StrategyForGatherNd( 1, std::multiplies()); if (prod_size > 1) { - if (target.arch == Target::Arch::NVGPU) { - pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); - } else if (target.arch == Target::Arch::X86) { - pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + pe::IRScheduleInjectiveCPU( + ir_sch, output_shapes.front(), target, true); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); + }, + }); } std::vector res{ cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; diff --git a/paddle/cinn/hlir/op/contrib/repeat.cc b/paddle/cinn/hlir/op/contrib/repeat.cc index f77e5939099b5..5347d454c39aa 100644 --- a/paddle/cinn/hlir/op/contrib/repeat.cc +++ b/paddle/cinn/hlir/op/contrib/repeat.cc @@ -198,11 +198,17 @@ std::shared_ptr StrategyForRepeat( 1, std::multiplies()); if (prod_size > 1) { - if (target.arch == Target::Arch::NVGPU) { - pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); - } else if (target.arch == Target::Arch::X86) { - pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + pe::IRScheduleInjectiveCPU( + ir_sch, output_shapes.front(), target, true); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); + }, + }); } std::vector res{ cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; diff --git a/paddle/cinn/hlir/op/contrib/resize.cc b/paddle/cinn/hlir/op/contrib/resize.cc index 91319ef7e5ac1..63329c5602013 100644 --- a/paddle/cinn/hlir/op/contrib/resize.cc +++ b/paddle/cinn/hlir/op/contrib/resize.cc @@ -55,15 +55,18 @@ ir::Tensor Resize(const ir::Tensor &input, const std::string &mode, const std::string &output_name) { std::string func_name; - - if (target.arch == cinn::common::Target::Arch::NVGPU) { - func_name.assign("cinn_cuda_resize_"); - } else if (target.arch == cinn::common::Target::Arch::X86) { - func_name.assign("cinn_host_resize_"); - } else { - PADDLE_THROW(phi::errors::Fatal( - "Resize only supports X86 and NVGPU ! Please Check.\n")); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { + PADDLE_THROW(phi::errors::Fatal( + "Resize only supports X86 and NVGPU ! Please Check.\n")); + }, + [&](common::X86Arch) { func_name.assign("cinn_host_resize_"); }, + [&](common::ARMArch) { + PADDLE_THROW(phi::errors::Fatal( + "Resize only supports X86 and NVGPU ! Please Check.\n")); + }, + [&](common::NVGPUArch) { func_name.assign("cinn_cuda_resize_"); }, + }); if (mode == "bilinear") { func_name.append("bilinear"); @@ -241,11 +244,17 @@ std::shared_ptr StrategyForResize( 1, std::multiplies()); if (prod_size > 1) { - if (target.arch == Target::Arch::NVGPU) { - pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); - } else if (target.arch == Target::Arch::X86) { - pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + pe::IRScheduleInjectiveCPU( + ir_sch, output_shapes.front(), target, true); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); + }, + }); } std::vector res{ cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; diff --git a/paddle/cinn/hlir/op/contrib/sort.cc b/paddle/cinn/hlir/op/contrib/sort.cc index 49f50a13ab6c9..2ecb08b41749c 100644 --- a/paddle/cinn/hlir/op/contrib/sort.cc +++ b/paddle/cinn/hlir/op/contrib/sort.cc @@ -51,14 +51,22 @@ std::vector ArgSort(const ir::Tensor &A, const std::string &name) { std::string find_func_name; std::string index_func_name; - if (target.arch == cinn::common::Target::Arch::NVGPU) { - find_func_name.assign("cinn_nvgpu_next_smallest_int32"); - } else if (target.arch == cinn::common::Target::Arch::X86) { - find_func_name.assign("cinn_host_next_smallest_int32"); - } else { - PADDLE_THROW(phi::errors::Fatal( - "ArgSort only supports X86 and NVGPU ! Please Check.\n")); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { + PADDLE_THROW(phi::errors::Fatal( + "ArgSort only supports X86 and NVGPU ! Please Check.\n")); + }, + [&](common::X86Arch) { + find_func_name.assign("cinn_host_next_smallest_int32"); + }, + [&](common::ARMArch) { + PADDLE_THROW(phi::errors::Fatal( + "ArgSort only supports X86 and NVGPU ! Please Check.\n")); + }, + [&](common::NVGPUArch) { + find_func_name.assign("cinn_nvgpu_next_smallest_int32"); + }, + }); if (is_ascend) { index_func_name = cinn::hlir::GetExternFuncName(target, A->type(), "lt_num"); @@ -215,7 +223,8 @@ std::shared_ptr StrategyForSort( output_shapes[0].end(), 1, std::multiplies()); - if (prod_size > 1 && target.arch == Target::Arch::X86) { + if (prod_size > 1 && + std::holds_alternative(target.arch)) { pe::IRScheduleInjectiveCPU( ir_sch, output_shapes.front(), target, true); } @@ -298,7 +307,7 @@ std::shared_ptr StrategyForArgSort( output_shapes[0].end(), 1, std::multiplies()); - if (prod_size > 1 && target.arch == Target::Arch::X86) { + if (prod_size > 1 && std::holds_alternative(target.arch)) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } std::vector res{ diff --git a/paddle/cinn/hlir/op/elementwise.cc b/paddle/cinn/hlir/op/elementwise.cc index 243ea5f0eb8a2..d32c2c0af8b2f 100644 --- a/paddle/cinn/hlir/op/elementwise.cc +++ b/paddle/cinn/hlir/op/elementwise.cc @@ -1763,6 +1763,8 @@ CINN_REGISTER_HELPER(elementwise_ops) { .set_num_outputs(1) .set_attr( "CINNStrategy", cinn::hlir::op::StrategyForLogicalNot) + .set_attr( + "CINNStrategySymbolic", cinn::hlir::op::StrategyForLogicalNotSymbolic) .set_attr("infershape", MakeOpFunction(cinn::hlir::op::InferShapeForElementwise)) .set_attr("inferdtype", diff --git a/paddle/cinn/hlir/op/nn.cc b/paddle/cinn/hlir/op/nn.cc index 2b1ce342e0810..995a5a6bc4787 100644 --- a/paddle/cinn/hlir/op/nn.cc +++ b/paddle/cinn/hlir/op/nn.cc @@ -71,6 +71,35 @@ std::shared_ptr StrategyForRelu( return strategy; } +std::shared_ptr StrategyForRelu6Symbolic( + const framework::NodeAttr &attrs, + const std::vector &inputs, + const std::vector &out_type, + const std::vector> &output_shapes, + const Target &target) { + framework::CINNCompute relu6_compute( + [](lang::Args args, lang::RetValue *ret) { + CHECK(!args.empty()) + << "The input argument of relu6 compute is empty! Please check.\n"; + CINNValuePack pack_args = args[0]; + CHECK(!pack_args.empty()) + << "at least one input tensor for relu6 compute\n"; + Expr A = pack_args[0]; + CHECK(A.as_tensor()); + CHECK_EQ(pack_args.size(), 2); + CHECK(pack_args[1].is_string()); + std::string tensor_name = pack_args[1].operator std::string(); + auto out = pe::Relu6(A.as_tensor_ref(), 0.0, tensor_name); + auto stages = CreateStages({out}); + *ret = CINNValuePack{{CINNValue(Expr(out.get())), CINNValue(stages)}}; + }); + + auto strategy = std::make_shared(); + CHECK(out_type.size()) << "Out_type of relu6 op is empty! Please check."; + strategy->AddImpl(relu6_compute, lang::PackedFunc(), "strategy.relu6.x86", 1); + return strategy; +} + std::shared_ptr StrategyForReluSymbolic( const framework::NodeAttr &attrs, const std::vector &inputs, @@ -167,7 +196,7 @@ std::shared_ptr StrategyForConv2d( int groups = 1; std::string key = ""; std::string conv_type = ""; - bool use_mkldnn = false; + bool use_onednn = false; if (attrs.attr_store.find("padding") != attrs.attr_store.end()) { padding = absl::get>(attrs.attr_store.at("padding")); } @@ -183,8 +212,8 @@ std::shared_ptr StrategyForConv2d( if (attrs.attr_store.find("groups") != attrs.attr_store.end()) { groups = absl::get(attrs.attr_store.at("groups")); } - if (attrs.attr_store.find("use_mkldnn") != attrs.attr_store.end()) { - use_mkldnn = absl::get(attrs.attr_store.at("use_mkldnn")); + if (attrs.attr_store.find("use_onednn") != attrs.attr_store.end()) { + use_onednn = absl::get(attrs.attr_store.at("use_onednn")); } if (attrs.attr_store.find("key") != attrs.attr_store.end()) { key = absl::get(attrs.attr_store.at("key")); @@ -230,22 +259,34 @@ std::shared_ptr StrategyForConv2d( if (data_format == "NCHW") { // A is input: [N, C, H, W], B is filter: [C_out, C_in/group, // filter_h, filter_w] - if (target.arch == Target::Arch::X86) { - if (groups == 1 && !use_mkldnn) { - out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), - B.as_tensor_ref(), - padding[0], - padding[1], - stride[0], - stride[1], - dilation[0], - dilation[1], - key, - tensor_name, - target); - } else { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + if (groups == 1 && !use_onednn) { + out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), + B.as_tensor_ref(), + padding[0], + padding[1], + stride[0], + stride[1], + dilation[0], + dilation[1], + key, + tensor_name, + target); + } else { #ifdef CINN_WITH_DNNL - out = pe::Conv2d_NCHW_MKLDNN(A.as_tensor_ref(), + out = pe::Conv2d_NCHW_ONEDNN(A.as_tensor_ref(), + B.as_tensor_ref(), + padding[0], + padding[1], + stride[0], + stride[1], + dilation[0], + dilation[1], + tensor_name); +#else + out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), B.as_tensor_ref(), padding[0], padding[1], @@ -253,45 +294,38 @@ std::shared_ptr StrategyForConv2d( stride[1], dilation[0], dilation[1], + key, tensor_name); -#else - out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), - B.as_tensor_ref(), - padding[0], - padding[1], - stride[0], - stride[1], - dilation[0], - dilation[1], - key, - tensor_name); #endif - } - } else { - if (conv_type == "forward") { - out = pe::Conv2d_NCHW(A.as_tensor_ref(), - B.as_tensor_ref(), - padding[0], - padding[1], - stride[0], - stride[1], - dilation[0], - dilation[1], - tensor_name); - out.push_back(B.as_tensor_ref()); - } else { + } + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + if (conv_type == "forward") { + out = pe::Conv2d_NCHW(A.as_tensor_ref(), + B.as_tensor_ref(), + padding[0], + padding[1], + stride[0], + stride[1], + dilation[0], + dilation[1], + tensor_name); + out.push_back(B.as_tensor_ref()); + } else { #ifdef CINN_WITH_CUDNN - // as backward_data and backward_filter is not support now, we - // built a fake op to instead. as the runtime use cudnn to compute - // the conv2d, so this fake op is not been called. When cinn - // support backward_filter/backward_data code gen, this code is to - // be removed. - out = pe::Identity(A.as_tensor_ref()); - out.push_back(A.as_tensor_ref()); - out.push_back(B.as_tensor_ref()); + // as backward_data and backward_filter is not support now, we + // built a fake op to instead. as the runtime use cudnn to + // compute the conv2d, so this fake op is not been called. + // When cinn support backward_filter/backward_data code gen, + // this code is to be removed. + out = pe::Identity(A.as_tensor_ref()); + out.push_back(A.as_tensor_ref()); + out.push_back(B.as_tensor_ref()); #endif - } - } + } + }, + }); } else if (data_format == "NHWC") { // A is input: [N, H, W, C], B is filter: [C_out, C_in/group, // filter_h, filter_w] @@ -339,39 +373,48 @@ std::shared_ptr StrategyForConv2d( ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (target.arch == Target::Arch::NVGPU) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { + PADDLE_THROW(phi::errors::InvalidArgument( + "This target [%s] is not supported yet.", target)); + }, + [&](common::X86Arch) { + PADDLE_THROW(phi::errors::InvalidArgument( + "This target [%s] is not supported yet.", target)); + }, + [&](common::ARMArch) { + PADDLE_THROW(phi::errors::InvalidArgument( + "This target [%s] is not supported yet.", target)); + }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDNN - // If conv_type is backward_filter or backward_data, we built a fake op. - // As runtime use cudnn to compute conv2d, this fake op is not to be - // called. When cinn support backward_filter/backward_data code gen, - // this code is to be removed. - if (conv_type != "forward") { - CHECK_EQ(vec_ast.size(), 1); - pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); - std::vector res{ - CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = CINNValuePack{res}; - return; - } + // If conv_type is backward_filter or backward_data, we built a fake + // op. As runtime use cudnn to compute conv2d, this fake op is not to + // be called. When cinn support backward_filter/backward_data code + // gen, this code is to be removed. + if (conv_type != "forward") { + CHECK_EQ(vec_ast.size(), 1); + pe::IRCudaScheduleInjective(ir_sch, output_shapes.front(), target); + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + return; + } #endif - int expr_size = vec_ast.size(); - if (expr_size == 2) { - pe::IRCudaScheduleConv(ir_sch, target); - VLOG(3) << "After IRCudaScheduleConv, arg_pack[0] is : " - << ir_sch.GetModule().GetExprs().at(0); - std::vector res{ - CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = CINNValuePack{res}; - return; - } else { - CINN_NOT_IMPLEMENTED - } - } else if (target.arch == Target::Arch::X86) { - CINN_NOT_IMPLEMENTED - } - std::stringstream ss; - ss << "This target [" << target << "] is not supported yet."; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); + int expr_size = vec_ast.size(); + if (expr_size == 2) { + pe::IRCudaScheduleConv(ir_sch, target); + VLOG(3) << "After IRCudaScheduleConv, arg_pack[0] is : " + << ir_sch.GetModule().GetExprs().at(0); + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + return; + } else { + CINN_NOT_IMPLEMENTED + } + }, + }); }); auto strategy = std::make_shared(); @@ -619,7 +662,7 @@ std::shared_ptr StrategyForConv2dNCHWc( CHECK_EQ(dilation.size(), 2) << "The size of stride in conv2d_NCHWc op is not 2! Please check."; std::vector out; - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "conv2d_NCHWc op is only used in x86"; // A is input: [N, C_in_outer, H, W, C_in_inner], B is filter: [C_out, // C_in_group_outer, filter_h, filter_w, C_in_group_inner] @@ -867,27 +910,32 @@ std::shared_ptr StrategyForDepthwiseConv2d( CHECK(pack_args[2].is_string()); std::string tensor_name = pack_args[2].operator std::string(); if (data_format == "NCHW") { - if (target.arch == Target::Arch::X86) { - out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), - B.as_tensor_ref(), - padding[0], - padding[1], - stride[0], - stride[1], - dilation[0], - dilation[1], - key, - tensor_name, - target); - } else { - out = pe::Depthwise_Conv2d_NCHW(A.as_tensor_ref(), - B.as_tensor_ref(), - padding[0], - padding[1], - stride[0], - stride[1], - tensor_name); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + out = pe::Conv2d_NCHW_5D(A.as_tensor_ref(), + B.as_tensor_ref(), + padding[0], + padding[1], + stride[0], + stride[1], + dilation[0], + dilation[1], + key, + tensor_name, + target); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + out = pe::Depthwise_Conv2d_NCHW(A.as_tensor_ref(), + B.as_tensor_ref(), + padding[0], + padding[1], + stride[0], + stride[1], + tensor_name); + }, + }); } else if (data_format == "NHWC") { out = pe::Depthwise_Conv2d_NHWC(A.as_tensor_ref(), B.as_tensor_ref(), @@ -934,11 +982,14 @@ std::shared_ptr StrategyForDepthwiseConv2d( ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (target.arch == Target::Arch::NVGPU) { - pe::IRCudaScheduleDepthwiseConv(ir_sch, vec_tensor); - } else { - CINN_NOT_IMPLEMENTED - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { CINN_NOT_IMPLEMENTED; }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + pe::IRCudaScheduleDepthwiseConv(ir_sch, vec_tensor); + }, + }); std::vector res{ cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = cinn::common::CINNValuePack{res}; @@ -1063,7 +1114,8 @@ std::shared_ptr StrategyForBatchNorm( CHECK(Variance.as_tensor()); ir::Tensor out; auto tensor_input = A.as_tensor_ref(); - if (tensor_input->shape.size() != 4 && target.arch == Target::Arch::X86) { + if (tensor_input->shape.size() != 4 && + std::holds_alternative(target.arch)) { CHECK_EQ(input_layouts.size(), 5U) << "batch_norm_NCHWc's input layout should be 5"; std::string input_layout = input_layouts[0]; @@ -1246,16 +1298,25 @@ std::shared_ptr StrategyForPool1d( auto block_input_pad = ir_sch.GetBlock(input_pad.as_tensor()->name); ir_sch.ComputeInline(block_input_pad); } - if (target.arch == Target::Arch::NVGPU) { - CHECK(!vec_tensor.empty()); - Expr Out = vec_tensor[0]; - CHECK(Out.as_tensor()); - auto loops = ir_sch.GetLoops(Out.as_tensor()->name); - ir_sch.Split(loops[1], {-1, 2}); - loops = ir_sch.GetLoops(Out.as_tensor()->name); - ir_sch.Bind(loops[0], "blockIdx.x"); - ir_sch.Bind(loops[1], "threadIdx.x"); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + // Do nothing. + }, + [&](common::ARMArch) { + // Do nothing. + }, + [&](common::NVGPUArch) { + CHECK(!vec_tensor.empty()); + Expr Out = vec_tensor[0]; + CHECK(Out.as_tensor()); + auto loops = ir_sch.GetLoops(Out.as_tensor()->name); + ir_sch.Split(loops[1], {-1, 2}); + loops = ir_sch.GetLoops(Out.as_tensor()->name); + ir_sch.Bind(loops[0], "blockIdx.x"); + ir_sch.Bind(loops[1], "threadIdx.x"); + }, + }); std::vector res{CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = CINNValuePack{res}; }); @@ -1469,11 +1530,12 @@ std::shared_ptr StrategyForPool2d( ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (target.arch == Target::Arch::NVGPU) { - pe::IRGlobalPoolScheduleGPU(ir_sch, target); - } else { - CINN_NOT_IMPLEMENTED - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { CINN_NOT_IMPLEMENTED; }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { pe::IRGlobalPoolScheduleGPU(ir_sch, target); }, + }); std::vector res{CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = CINNValuePack{res}; }); @@ -1548,9 +1610,14 @@ std::shared_ptr StrategyForPool2d( auto block_input_pad = ir_sch.GetBlock(input_pad_name); ir_sch.ComputeInline(block_input_pad); } - if (target.arch == Target::Arch::NVGPU) { - pe::IRPoolScheduleGPU(ir_sch, target, arg_pack_size); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) {}, + [&](common::ARMArch) {}, + [&](common::NVGPUArch) { + pe::IRPoolScheduleGPU(ir_sch, target, arg_pack_size); + }, + }); std::vector res{CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = CINNValuePack{res}; }); @@ -1558,15 +1625,23 @@ std::shared_ptr StrategyForPool2d( auto strategy = std::make_shared(); bool use_warp_reduce = false; - if (global_pooling && data_format == "NCHW" && - target.arch == Target::Arch::NVGPU) { - // TODO(hp03): 32 may not be the exact number, try also 16 or 8 or other - // number - // we choose 32 to make sure all the threads in a warp has work to do, - if ((A_tensor->shape[2].as_int32() * A_tensor->shape[3].as_int32()) >= 32) { - use_warp_reduce = true; - } - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { use_warp_reduce = false; }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + if (global_pooling && data_format == "NCHW") { + // TODO(hp03): 32 may not be the exact number, try also 16 or 8 or + // other number + // we choose 32 to make sure all the threads in a warp has work + // to do, + if ((A_tensor->shape[2].as_int32() * A_tensor->shape[3].as_int32()) >= + 32) { + use_warp_reduce = true; + } + } + }, + }); strategy->AddImpl(pool2d_compute, pool2d_schedule, "strategy.pool2d.x86", 1); if (use_warp_reduce) { strategy->AddImpl(global_pool2d_compute, @@ -1778,16 +1853,21 @@ std::shared_ptr StrategyForPool3d( auto block_input_pad = ir_sch.GetBlock(input_pad.as_tensor()->name); ir_sch.ComputeInline(block_input_pad); } - if (target.arch == Target::Arch::NVGPU) { - CHECK(!vec_tensor.empty()); - Expr Out = vec_tensor[0]; - CHECK(Out.as_tensor()); - auto loops = ir_sch.GetLoops(Out.as_tensor()->name); - ir_sch.Split(loops[1], {-1, 2}); - loops = ir_sch.GetLoops(Out.as_tensor()->name); - ir_sch.Bind(loops[0], "blockIdx.x"); - ir_sch.Bind(loops[1], "threadIdx.x"); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { /*nothing*/ }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + CHECK(!vec_tensor.empty()); + Expr Out = vec_tensor[0]; + CHECK(Out.as_tensor()); + auto loops = ir_sch.GetLoops(Out.as_tensor()->name); + ir_sch.Split(loops[1], {-1, 2}); + loops = ir_sch.GetLoops(Out.as_tensor()->name); + ir_sch.Bind(loops[0], "blockIdx.x"); + ir_sch.Bind(loops[1], "threadIdx.x"); + }, + }); std::vector res{CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = CINNValuePack{res}; }); @@ -1912,12 +1992,12 @@ std::shared_ptr StrategyForSoftmax( const std::vector> &output_shapes, const Target &target) { int axis = -1; - bool use_mkldnn = false; + bool use_onednn = false; if (attrs.attr_store.count("axis")) { axis = absl::get(attrs.attr_store.at("axis")); } - if (attrs.attr_store.count("use_mkldnn")) { - use_mkldnn = absl::get(attrs.attr_store.at("use_mkldnn")); + if (attrs.attr_store.count("use_onednn")) { + use_onednn = absl::get(attrs.attr_store.at("use_onednn")); } framework::CINNCompute softmax_compute( [=](lang::Args args, lang::RetValue *ret) { @@ -1942,8 +2022,8 @@ std::shared_ptr StrategyForSoftmax( pack_args[pack_args.size() - 1].operator std::string(); #ifdef CINN_WITH_DNNL - if (use_mkldnn) { - out = pe::SoftmaxMKLDNN(A, new_axis, tensor_name); + if (use_onednn) { + out = pe::SoftmaxONEDNN(A, new_axis, tensor_name); } else { out = pe::Softmax(A, new_axis, tensor_name); } @@ -1979,37 +2059,42 @@ std::shared_ptr StrategyForSoftmax( ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (target.arch == Target::Arch::NVGPU) { - if (output_shapes[0].size() > 1) { - auto all_blocks = ir_sch.GetAllBlocks(); - CHECK_EQ(all_blocks.size(), 3); - auto loops = ir_sch.GetLoops(all_blocks[2]); - ir_sch.ComputeAt(all_blocks[1], loops.back()); - - if (output_shapes[0][0] != 1) { - ir_sch.SimpleComputeAt(all_blocks[0], loops[0]); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + pe::IRSoftmaxScheduleCPU(ir_sch, axis); + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + if (output_shapes[0].size() > 1) { + auto all_blocks = ir_sch.GetAllBlocks(); + CHECK_EQ(all_blocks.size(), 3); + auto loops = ir_sch.GetLoops(all_blocks[2]); + ir_sch.ComputeAt(all_blocks[1], loops.back()); + + if (output_shapes[0][0] != 1) { + ir_sch.SimpleComputeAt(all_blocks[0], loops[0]); + } - loops = ir_sch.GetLoops(all_blocks[2]); - int loop_index = 1; - if (output_shapes[0][0] == 1) loop_index--; - CHECK_GE(loops.size(), loop_index + 1); - auto splited_loops = ir_sch.Split(loops[loop_index], {-1, 5}); + loops = ir_sch.GetLoops(all_blocks[2]); + int loop_index = 1; + if (output_shapes[0][0] == 1) loop_index--; + CHECK_GE(loops.size(), loop_index + 1); + auto splited_loops = ir_sch.Split(loops[loop_index], {-1, 5}); - all_blocks = ir_sch.GetAllBlocks(); - loops = ir_sch.GetLoops(all_blocks[2]); - ir_sch.Bind(loops[0], "blockIdx.x"); - ir_sch.Bind(loops[1], "threadIdx.x"); - } - std::vector res{ - CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = CINNValuePack{res}; - } else if (target.arch == Target::Arch::X86) { - pe::IRSoftmaxScheduleCPU(ir_sch, axis); - std::vector res{ - CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = CINNValuePack{res}; - } + all_blocks = ir_sch.GetAllBlocks(); + loops = ir_sch.GetLoops(all_blocks[2]); + ir_sch.Bind(loops[0], "blockIdx.x"); + ir_sch.Bind(loops[1], "threadIdx.x"); + } + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + }, + }); }); auto strategy = std::make_shared(); @@ -2043,7 +2128,7 @@ std::vector> InferLayoutForSoftmax( CHECK_EQ(input_layouts.size(), 1U) << "The input's layout size is not 1! Please check again."; if (input_shapes[0].size() > 4) { - // input tensor needs to be transformed back to NCHW for mkldnn + // input tensor needs to be transformed back to NCHW for onednn return {{"NCHW", "NCHW"}, {"NCHW"}}; } return {{input_layouts[0], input_layouts[0]}, input_layouts}; @@ -2399,6 +2484,8 @@ CINN_REGISTER_HELPER(nn_ops) { .set_num_outputs(1) .set_attr( "CINNStrategy", cinn::hlir::op::StrategyForRelu6) + .set_attr( + "CINNStrategySymbolic", cinn::hlir::op::StrategyForRelu6Symbolic) .set_attr("infershape", MakeOpFunction(cinn::hlir::op::InferShapeForRelu)) .set_attr("inferdtype", MakeOpFunction(cinn::hlir::op::InferDtypeForRelu)) #ifndef CINN_WITH_CUDA diff --git a/paddle/cinn/hlir/op/op_util.cc b/paddle/cinn/hlir/op/op_util.cc index b0976f22c38cb..37eef516bac46 100644 --- a/paddle/cinn/hlir/op/op_util.cc +++ b/paddle/cinn/hlir/op/op_util.cc @@ -72,18 +72,45 @@ CINNSchedule GetInjectiveScheduleFunc( ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); pe::IRInjectiveSchedule(ir_sch, output_shapes.front(), target); - /*if (target.arch == Target::Arch::NVGPU) { - pe::IRInjectiveSchedule(ir_sch, output_shapes.front(), target); - } else if (target.arch == Target::Arch::X86) { - pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, - vectorizable); - }*/ std::vector res{ cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; *ret = cinn::common::CINNValuePack{res}; }); } +std::string GetExternFuncNameArchPrefixImpl(common::UnknownArch, + const std::string& func_name) { + std::stringstream ss; + ss << func_name << " only supports X86 and NVGPU! Please Check.\n"; + PADDLE_THROW(phi::errors::Fatal(ss.str())); +} + +std::string GetExternFuncNameArchPrefixImpl(common::X86Arch, + const std::string& func_name) { + return "host_"; +} + +std::string GetExternFuncNameArchPrefixImpl(common::ARMArch, + const std::string& func_name) { + std::stringstream ss; + ss << func_name << " only supports X86 and NVGPU! Please Check.\n"; + PADDLE_THROW(phi::errors::Fatal(ss.str())); +} + +std::string GetExternFuncNameArchPrefixImpl(common::NVGPUArch, + const std::string& func_name) { + return "nvgpu_"; +} + +std::string GetExternFuncNameArchPrefix(common::Arch arch, + const std::string& func_name) { + return std::visit( + [&](const auto& impl) { + return GetExternFuncNameArchPrefixImpl(impl, func_name); + }, + arch.variant()); +} + std::string GetExternFuncName(const cinn::common::Target& target, const cinn::common::Type& type, const std::string& func_name, @@ -95,15 +122,8 @@ std::string GetExternFuncName(const cinn::common::Target& target, func_proto_name.append("cinn_"); } if (need_target) { - if (target.arch == cinn::common::Target::Arch::NVGPU) { - func_proto_name.append("nvgpu_"); - } else if (target.arch == cinn::common::Target::Arch::X86) { - func_proto_name.append("host_"); - } else { - std::stringstream ss; - ss << func_name << " only supports X86 and NVGPU! Please Check.\n"; - PADDLE_THROW(phi::errors::Fatal(ss.str())); - } + const auto& prefix = GetExternFuncNameArchPrefix(target.arch, func_name); + func_proto_name.append(prefix); } func_proto_name.append(func_name); if (!need_type) { diff --git a/paddle/cinn/hlir/op/reduction.cc b/paddle/cinn/hlir/op/reduction.cc index d5a378dc809e6..b3180ba555f3a 100644 --- a/paddle/cinn/hlir/op/reduction.cc +++ b/paddle/cinn/hlir/op/reduction.cc @@ -205,7 +205,7 @@ std::shared_ptr StrategyForReduce( ir::ModuleExpr mod_expr(vec_ast); ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (!FLAGS_cinn_new_group_scheduler && target.arch == Target::Arch::NVGPU) { + const auto ReduceSchedule = [&]() { if (!WithoutLastDimInReduce(inputs[0]->shape, reduce_axes)) { if (arg_pack.size() == 4) { CHECK_EQ(vec_tensor.size(), 2); @@ -307,11 +307,29 @@ std::shared_ptr StrategyForReduce( PADDLE_THROW(phi::errors::InvalidArgument("Unkown Reduce Type!")); } } - } else { - std::vector res{ - CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = CINNValuePack{res}; - } + }; + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + }, + [&](common::ARMArch) { + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + }, + [&](common::NVGPUArch) { + if (!FLAGS_cinn_new_group_scheduler) { + ReduceSchedule(); + } else { + std::vector res{ + CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = CINNValuePack{res}; + } + }, + }); }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/transform.cc b/paddle/cinn/hlir/op/transform.cc index 21754487e7846..3d7bfdbf3623c 100644 --- a/paddle/cinn/hlir/op/transform.cc +++ b/paddle/cinn/hlir/op/transform.cc @@ -89,27 +89,32 @@ std::shared_ptr StrategyForMatMul( auto new_B = tensor_B->Reshape(new_shape_B_e, stages); std::vector out; - if (target.arch == Target::Arch::X86) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { #ifdef CINN_WITH_MKL_CBLAS - out = pe::MatmulMKL(new_A, - new_B, - trans_a, - trans_b, - alpha, - UniqName("MatmulMKL_output"), - target); + out = pe::MatmulMKL(new_A, + new_B, + trans_a, + trans_b, + alpha, + UniqName("MatmulMKL_output"), + target); #else - out = pe::MatmulV2(new_A, - new_B, - trans_a, - trans_b, - alpha, - UniqName("MatmulV2_output"), - target); + out = pe::MatmulV2(new_A, + new_B, + trans_a, + trans_b, + alpha, + UniqName("MatmulV2_output"), + target); #endif - } else { - out = pe::Matmul(new_A, new_B, trans_a, trans_b, alpha, tensor_name); - } + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + out = pe::Matmul(new_A, new_B, trans_a, trans_b, alpha, tensor_name); + }, + }); std::vector res; for (auto &t : out) { @@ -619,17 +624,23 @@ std::shared_ptr StrategyForMul( CHECK(pack_args.back().is_string()); std::string tensor_name = pack_args.back().operator std::string(); - if (target.arch == Target::Arch::X86) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { #ifdef CINN_WITH_MKL_CBLAS - out = pe::MatmulMKL( - new_A, new_B, false, is_infer, 1.0f, tensor_name, target); + out = pe::MatmulMKL( + new_A, new_B, false, is_infer, 1.0f, tensor_name, target); #else - out = pe::MatmulV2( - new_A, new_B, false, is_infer, 1.0f, tensor_name, target); + out = pe::MatmulV2( + new_A, new_B, false, is_infer, 1.0f, tensor_name, target); #endif - } else { - out = pe::Matmul(new_A, new_B, false, is_infer, 1.0f, tensor_name); - } + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + out = + pe::Matmul(new_A, new_B, false, is_infer, 1.0f, tensor_name); + }, + }); std::vector res; for (auto &t : out) { @@ -854,7 +865,7 @@ std::shared_ptr StrategyForLayoutTransform( ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); - if (target.arch == Target::Arch::X86) { + if (std::holds_alternative(target.arch)) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target); } else { CINN_NOT_IMPLEMENTED diff --git a/paddle/cinn/hlir/pass/CMakeLists.txt b/paddle/cinn/hlir/pass/CMakeLists.txt index 328ecb6d4e0dc..744c0fe12c8a0 100644 --- a/paddle/cinn/hlir/pass/CMakeLists.txt +++ b/paddle/cinn/hlir/pass/CMakeLists.txt @@ -46,7 +46,7 @@ if(NOT WITH_CUDA) #cinn_cc_test(test_alterlayout SRCS alterlayout_test.cc DEPS cinncore) endif() cinn_cc_test(test_dot_merger SRCS test_dot_merger.cc DEPS cinncore) -cinn_cc_test(test_dce_pass SRCS dce_pass_test.cc DEPS cinncore) +# cinn_cc_test(test_dce_pass SRCS dce_pass_test.cc DEPS cinncore) cinn_cc_test(test_common_subexpression_elimination SRCS common_subexpression_elimination_test.cc DEPS cinncore) cinn_cc_test(test_constant_folding_pass SRCS constant_folding_pass_test.cc DEPS diff --git a/paddle/cinn/hlir/pass/alterlayout.cc b/paddle/cinn/hlir/pass/alterlayout.cc index 8ca3475c2d7e3..74c8c0915e0af 100644 --- a/paddle/cinn/hlir/pass/alterlayout.cc +++ b/paddle/cinn/hlir/pass/alterlayout.cc @@ -140,7 +140,7 @@ std::vector UpdateInferInfos( void AlterLayoutPass(Graph* graph) { // alter layout only in X86 for it's specific layout requirements - if (graph->target_.arch == Target::Arch::X86) { + if (std::holds_alternative(graph->target_.arch)) { auto store_nodes = std::get<0>(graph->topological_order()); auto& shape_dict = graph->GetMutableAttrs< absl::flat_hash_map>("infershape"); diff --git a/paddle/cinn/hlir/pe/broadcast.cc b/paddle/cinn/hlir/pe/broadcast.cc index fb47ed737fdf3..fab2af9c5f0dc 100644 --- a/paddle/cinn/hlir/pe/broadcast.cc +++ b/paddle/cinn/hlir/pe/broadcast.cc @@ -400,10 +400,7 @@ Tensor BroadcastTo(const Tensor& A, } else if (MathEqual(a_shape_i, out_shape[idx])) { broadcast_indice.push_back(indice[idx]); } else { - std::stringstream ss; - ss << "fail to broad cast input shape " << a_shape_i - << " to output shape " << out_shape[idx]; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); + broadcast_indice.push_back(indice[idx] % a_shape_i); } } return A(broadcast_indice); diff --git a/paddle/cinn/hlir/pe/elementwise.cc b/paddle/cinn/hlir/pe/elementwise.cc index 663b32451ae12..559014658de0e 100644 --- a/paddle/cinn/hlir/pe/elementwise.cc +++ b/paddle/cinn/hlir/pe/elementwise.cc @@ -288,7 +288,6 @@ ir::Tensor Reshape(const ir::Tensor& A, auto temp = inner_offset % A_expr_shape[i]; indice_a.insert(indice_a.begin(), temp); } - LOG(INFO) << "indice_a = " << indice_a[0]; return A(indice_a); }, name); @@ -340,9 +339,16 @@ ir::Tensor Tril(const ir::Tensor& A, ir::Tensor res = Compute( ToCinnExprs(out_shape), [=](const std::vector& indice) { - return ir::Select::Make(indice[0] >= indice[1] - diagonal, + PADDLE_ENFORCE_GE(indice.size(), + size_t(2), + phi::errors::InvalidArgument( + "The Tril op input tensor must have a rank " + "greater than or equal to 2.")); + std::vector new_indice(indice.end() - 2, indice.end()); + Expr col_indice = indice.back(); + return ir::Select::Make(new_indice[0] >= new_indice[1] - diagonal, A(indice), - ir::Expr(static_cast(0.))); + ir::Zero(A->type())); }, name); return res; diff --git a/paddle/cinn/hlir/pe/ir_schedule_pe.cc b/paddle/cinn/hlir/pe/ir_schedule_pe.cc index d224a5fd1e1ca..4950d575015bc 100644 --- a/paddle/cinn/hlir/pe/ir_schedule_pe.cc +++ b/paddle/cinn/hlir/pe/ir_schedule_pe.cc @@ -184,8 +184,8 @@ std::vector IRCudaScheduleMatMul( const cinn::common::CINNValuePack &arg_pack, const std::vector &output_shape, const cinn::common::Target &target) { - if (target.arch == Target::Arch::X86) { - CINN_NOT_IMPLEMENTED + if (!std::holds_alternative(target.arch)) { + CINN_NOT_IMPLEMENTED; } std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { diff --git a/paddle/cinn/hlir/pe/nn.cc b/paddle/cinn/hlir/pe/nn.cc index 9e48b26ae9392..48bbda538351b 100644 --- a/paddle/cinn/hlir/pe/nn.cc +++ b/paddle/cinn/hlir/pe/nn.cc @@ -652,7 +652,7 @@ std::vector Conv2d_NCHWc(const ir::Tensor &input, } #ifdef CINN_WITH_DNNL -std::vector Conv2d_NCHW_MKLDNN(const ir::Tensor &input, +std::vector Conv2d_NCHW_ONEDNN(const ir::Tensor &input, const ir::Tensor &weights, int pad_h, int pad_w, @@ -674,7 +674,7 @@ std::vector Conv2d_NCHW_MKLDNN(const ir::Tensor &input, auto call = Compute( {Expr(1)}, [=]() -> Expr { - return lang::CallExtern("cinn_cpu_mkldnn_conv2d_nchw_fp32", + return lang::CallExtern("cinn_cpu_onednn_conv2d_nchw_fp32", { Expr(input->shape[0]), // batch_size Expr(input->shape[1]), // c_in @@ -694,7 +694,7 @@ std::vector Conv2d_NCHW_MKLDNN(const ir::Tensor &input, weights // weights }); }, - UniqName("conv2d_nchw_mkldnn_out")); + UniqName("conv2d_nchw_onednn_out")); auto out = call->TupleGet(0); out->WithBuffer(input->type()); return {out, call}; @@ -1020,11 +1020,11 @@ std::vector Softmax(const ir::Tensor &A, } #ifdef CINN_WITH_DNNL -std::vector SoftmaxMKLDNN(const ir::Tensor &A, +std::vector SoftmaxONEDNN(const ir::Tensor &A, int axis, const std::string &output_name) { CHECK_LE(A->shape.size(), 4U) - << "Input's dimension of mkldnn softmax op is less than 4! Please check."; + << "Input's dimension of onednn softmax op is less than 4! Please check."; if (axis == -1) { axis = A->shape.size() - 1; } @@ -1036,7 +1036,7 @@ std::vector SoftmaxMKLDNN(const ir::Tensor &A, auto call = Compute( {Expr(1)}, [=]() -> Expr { - return lang::CallExtern("cinn_cpu_mkldnn_softmax_fp32", + return lang::CallExtern("cinn_cpu_onednn_softmax_fp32", { shape[0], // batch_size shape[1], // c_in diff --git a/paddle/cinn/hlir/pe/nn.h b/paddle/cinn/hlir/pe/nn.h index 32e2db2dc38f7..c3cc860e657bc 100755 --- a/paddle/cinn/hlir/pe/nn.h +++ b/paddle/cinn/hlir/pe/nn.h @@ -179,7 +179,7 @@ std::vector Conv2d_NCHWc( const cinn::common::Target &target = cinn::common::DefaultHostTarget()); #ifdef CINN_WITH_DNNL -std::vector Conv2d_NCHW_MKLDNN( +std::vector Conv2d_NCHW_ONEDNN( const ir::Tensor &input, const ir::Tensor &weights, int pad_h, @@ -333,7 +333,7 @@ std::vector Softmax( const std::string &output_name = UniqName("T_softmax_out")); #ifdef CINN_WITH_DNNL -std::vector SoftmaxMKLDNN( +std::vector SoftmaxONEDNN( const ir::Tensor &A, int axis = -1, const std::string &output_name = UniqName("T_softmax_out")); diff --git a/paddle/cinn/hlir/pe/schedule.cc b/paddle/cinn/hlir/pe/schedule.cc index 3e4af70e1b1cc..0206c288738ff 100644 --- a/paddle/cinn/hlir/pe/schedule.cc +++ b/paddle/cinn/hlir/pe/schedule.cc @@ -36,21 +36,31 @@ namespace cinn { namespace hlir { namespace pe { -ScheduleParam::ScheduleParam(cinn::common::Target::Arch arch) { - switch (arch) { - case cinn::common::Target::Arch::X86: { - param_data = CreateX86Params(); - break; - } - case cinn::common::Target::Arch::NVGPU: { - param_data = CreateCudaParams(); - break; - } - default: { - PADDLE_THROW(phi::errors::InvalidArgument( - "Schedule params must be initialized with target x86 or nvgpu.")); - } - } +using ParamsT = + absl::flat_hash_map>>; + +ParamsT CreateParamsImpl(common::UnknownArch) { + PADDLE_THROW(phi::errors::InvalidArgument( + "Schedule params must be initialized with target x86 or nvgpu.")); +} + +ParamsT CreateParamsImpl(common::X86Arch) { return CreateX86Params(); } + +ParamsT CreateParamsImpl(common::ARMArch) { + PADDLE_THROW(phi::errors::InvalidArgument( + "Schedule params must be initialized with target x86 or nvgpu.")); +} + +ParamsT CreateParamsImpl(common::NVGPUArch) { return CreateCudaParams(); } + +ParamsT CreateParams(common::Arch arch) { + return std::visit([](const auto &impl) { return CreateParamsImpl(impl); }, + arch.variant()); +} + +ScheduleParam::ScheduleParam(cinn::common::Arch arch) { + param_data = CreateParams(arch); } ScheduleParam::~ScheduleParam() {} @@ -873,7 +883,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const cinn::common::Target &target, const std::string &key, bool do_padding) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "Conv2d_NCHWc_1X1_Schedule_CPU schedule only used in x86"; CHECK(packed_out.defined()); CHECK(input_pad.defined()); @@ -1022,7 +1032,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &weights_dilation, const ir::Tensor &data, const cinn::common::Target &target) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse schedule only used in x86"; CHECK(packed_out.defined()); CHECK(input_pad.defined()); @@ -1144,7 +1154,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &weights_dilation, const ir::Tensor &data, const cinn::common::Target &target) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "Conv2d_NCHWc_Schedule_CPU_Nofuse schedule only used in x86"; CHECK(packed_out.defined()); CHECK(input_pad.defined()); @@ -1251,7 +1261,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const cinn::common::Target &target, const std::string &key, bool do_padding) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "Conv2d_NCHWc_Schedule_CPU schedule only used in x86"; CHECK(packed_out.defined()); CHECK(input_pad.defined()); @@ -1383,7 +1393,7 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( const ir::Tensor &data, const cinn::common::Target &target, bool do_padding) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse schedule only used in x86"; CHECK(packed_out.defined()); CHECK(input_pad.defined()); @@ -2813,16 +2823,21 @@ void CudaSplitSchedule(cinn::common::CINNValuePack *arg_pack, if (i != axis) fused_shape = fused_shape * output_shapes[0][i]; } int compute_at_level = 0; - if (target.arch == Target::Arch::NVGPU) { - if (fused_shape > target.max_num_threads()) { - stages[last_output]->Split(0, target.max_num_threads()); - stages[last_output]->Bind(0, "blockIdx.x"); - stages[last_output]->Bind(1, "threadIdx.x"); - compute_at_level++; - } else { - stages[last_output]->Bind(0, "threadIdx.x"); - } - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) {}, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { + if (fused_shape > target.max_num_threads()) { + stages[last_output]->Split(0, target.max_num_threads()); + stages[last_output]->Bind(0, "blockIdx.x"); + stages[last_output]->Bind(1, "threadIdx.x"); + compute_at_level++; + } else { + stages[last_output]->Bind(0, "threadIdx.x"); + } + }, + }); for (int i = 0; i < out_tensors.size() - 1; i++) { stages[out_tensors[i]]->ComputeAt2(stages[last_output], compute_at_level); diff --git a/paddle/cinn/hlir/pe/schedule.h b/paddle/cinn/hlir/pe/schedule.h index 7aef85c77518e..a8af004e04960 100644 --- a/paddle/cinn/hlir/pe/schedule.h +++ b/paddle/cinn/hlir/pe/schedule.h @@ -35,11 +35,11 @@ class ScheduleParam { ScheduleParam(const ScheduleParam &) = delete; ScheduleParam &operator=(const ScheduleParam &) = delete; static ScheduleParam &get_cuda_instance() { - static ScheduleParam instance{cinn::common::Target::Arch::NVGPU}; + static ScheduleParam instance{cinn::common::NVGPUArch{}}; return instance; } static ScheduleParam &get_x86_instance() { - static ScheduleParam instance{cinn::common::Target::Arch::X86}; + static ScheduleParam instance{cinn::common::X86Arch{}}; return instance; } absl::flat_hash_map>> param_data; diff --git a/paddle/cinn/hlir/pe/transform.cc b/paddle/cinn/hlir/pe/transform.cc index 3cd4120f89a1b..6f42a2268b35d 100644 --- a/paddle/cinn/hlir/pe/transform.cc +++ b/paddle/cinn/hlir/pe/transform.cc @@ -565,7 +565,7 @@ std::vector MatmulMKL(const Tensor& A, float alpha, const std::string& name, const cinn::common::Target& target) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "mkl should be used in the cpu environment"; std::vector shape_A = A->shape; std::vector shape_B = B->shape; @@ -658,10 +658,19 @@ int GetMulFactor(int shape, return split_factor; } -std::vector MulBase(const Tensor& A, - const Tensor& B, - const std::string& name, - const cinn::common::Target& target) { +std::vector MulBaseCallImpl(common::UnknownArch, + const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { + LOG(FATAL) << "NotImplemented."; +} + +std::vector MulBaseCallImpl(common::X86Arch, + const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { std::vector output_shape; CHECK_EQ(A->shape.size(), 2U) << "tensor_A's shape size should be two while current shape size is " @@ -674,55 +683,96 @@ std::vector MulBase(const Tensor& A, output_shape.push_back(A->shape[0]); output_shape.push_back(B->shape[0]); - if (target.arch == Target::Arch::X86) { - int reduce_dim = A->shape[1].as_int32(); - int split_factor = GetMulFactor(reduce_dim, A->type(), target); - Var reduce_k_first( - ir::Cast::Make(A->shape[1]->type(), Expr(reduce_dim / split_factor)), - UniqName("reduce_k_first")); - auto mul_reduce_first = Compute( - {A->shape[0], B->shape[0], Expr(split_factor)}, - [=](const std::vector& indice) { - CHECK_EQ(indice.size(), 3U) - << "indice size should be three while current size is " - << indice.size(); - return lang::ReduceSum( - A({indice[0], reduce_k_first * Expr(split_factor) + indice[2]}) * - B({indice[1], - reduce_k_first * Expr(split_factor) + indice[2]}), - {reduce_k_first}); - }, - UniqName("mul_reduce_k_first")); - Var reduce_k_second(ir::Cast::Make(A->shape[1]->type(), Expr(split_factor)), - UniqName("reduce_k_second")); - return {Compute( - output_shape, - [=](const std::vector& indice) { - std::vector new_indice = indice; - new_indice.push_back(reduce_k_second); - return lang::ReduceSum(mul_reduce_first(new_indice), - {reduce_k_second}); - }, - name), - mul_reduce_first}; - } else { - Var reduce_k(A->shape[1], UniqName("reduce_k")); - return {Compute( - output_shape, - [=](const std::vector& indice) { - std::vector A_indice; - std::vector B_indice; - CHECK_EQ(indice.size(), 2U) - << "indice size should be two while current size is " - << indice.size(); - A_indice.push_back(indice[0]); - B_indice.push_back(indice[1]); - A_indice.push_back(reduce_k); - B_indice.push_back(reduce_k); - return lang::ReduceSum(A(A_indice) * B(B_indice), {reduce_k}); - }, - name)}; - } + int reduce_dim = A->shape[1].as_int32(); + int split_factor = GetMulFactor(reduce_dim, A->type(), target); + Var reduce_k_first( + ir::Cast::Make(A->shape[1]->type(), Expr(reduce_dim / split_factor)), + UniqName("reduce_k_first")); + auto mul_reduce_first = Compute( + {A->shape[0], B->shape[0], Expr(split_factor)}, + [=](const std::vector& indice) { + CHECK_EQ(indice.size(), 3U) + << "indice size should be three while current size is " + << indice.size(); + return lang::ReduceSum( + A({indice[0], reduce_k_first * Expr(split_factor) + indice[2]}) * + B({indice[1], reduce_k_first * Expr(split_factor) + indice[2]}), + {reduce_k_first}); + }, + UniqName("mul_reduce_k_first")); + Var reduce_k_second(ir::Cast::Make(A->shape[1]->type(), Expr(split_factor)), + UniqName("reduce_k_second")); + return {Compute( + output_shape, + [=](const std::vector& indice) { + std::vector new_indice = indice; + new_indice.push_back(reduce_k_second); + return lang::ReduceSum(mul_reduce_first(new_indice), + {reduce_k_second}); + }, + name), + mul_reduce_first}; +} + +std::vector MulBaseCallImpl(common::ARMArch, + const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { + LOG(FATAL) << "NotImplemented."; +} + +std::vector MulBaseCallImpl(common::NVGPUArch, + const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { + std::vector output_shape; + CHECK_EQ(A->shape.size(), 2U) + << "tensor_A's shape size should be two while current shape size is " + << A->shape.size(); + CHECK_EQ(B->shape.size(), 2U) + << "tensor_B's shape size should be two while current shape size is " + << B->shape.size(); + CHECK_EQ(A->shape[1], B->shape[1]) + << "tensor_A's last shape should be same with tensor_B"; + output_shape.push_back(A->shape[0]); + output_shape.push_back(B->shape[0]); + + Var reduce_k(A->shape[1], UniqName("reduce_k")); + return {Compute( + output_shape, + [=](const std::vector& indice) { + std::vector A_indice; + std::vector B_indice; + CHECK_EQ(indice.size(), 2U) + << "indice size should be two while current size is " + << indice.size(); + A_indice.push_back(indice[0]); + B_indice.push_back(indice[1]); + A_indice.push_back(reduce_k); + B_indice.push_back(reduce_k); + return lang::ReduceSum(A(A_indice) * B(B_indice), {reduce_k}); + }, + name)}; +} + +std::vector MulBaseCall(const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { + return std::visit( + [&](const auto& impl) { + return MulBaseCallImpl(impl, A, B, name, target); + }, + target.arch.variant()); +} + +std::vector MulBase(const Tensor& A, + const Tensor& B, + const std::string& name, + const cinn::common::Target& target) { + return MulBaseCall(A, B, name, target); } std::vector Mul(const Tensor& A, @@ -751,7 +801,7 @@ std::vector MulMKL(const Tensor& A, const Tensor& B, const std::string& name, const cinn::common::Target& target) { - CHECK(target.arch == Target::Arch::X86) + CHECK(std::holds_alternative(target.arch)) << "mkl should be used in the cpu environment"; std::vector shape_A = A->shape; std::vector shape_B = B->shape; @@ -1271,14 +1321,18 @@ ir::Tensor ScatterAssign(const ir::Tensor& input, CHECK_EQ(index->type(), cinn::common::Int(32)) << "Param [Index] of ScatterAssign only support int32 ! Please Check.\n"; std::string extern_fun_name; - if (target.arch == cinn::common::Target::Arch::NVGPU) { - extern_fun_name.assign("cinn_cuda_find_int"); - } else if (target.arch == cinn::common::Target::Arch::X86) { - extern_fun_name.assign("cinn_host_find_int"); - } else { - PADDLE_THROW(phi::errors::Fatal( - "ScatterAssign only support X86 and NVGPU ! Please Check.\n")); - } + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { + PADDLE_THROW(phi::errors::Fatal( + "ScatterAssign only support X86 and NVGPU ! Please Check.\n")); + }, + [&](common::X86Arch) { extern_fun_name.assign("cinn_host_find_int"); }, + [&](common::ARMArch) { + PADDLE_THROW(phi::errors::Fatal( + "ScatterAssign only support X86 and NVGPU ! Please Check.\n")); + }, + [&](common::NVGPUArch) { extern_fun_name.assign("cinn_cuda_find_int"); }, + }); auto pos_axis = axis; if (pos_axis < 0) pos_axis += input->shape.size(); @@ -1309,7 +1363,7 @@ ir::Tensor ScatterAdd(const ir::Tensor& input, const cinn::common::Target& target, const int axis, const std::string& output_name) { - CHECK_EQ(target.arch, cinn::common::Target::Arch::NVGPU) + CHECK(std::holds_alternative(target.arch)) << "Op IndexAdd only support NVGPU now ! Please Check.\n"; CHECK_EQ(index->type(), cinn::common::Int(32)) diff --git a/paddle/cinn/ir/group_schedule/config/group_tile_config.cc b/paddle/cinn/ir/group_schedule/config/group_tile_config.cc index 0d443086bdce9..eb29a090092e0 100644 --- a/paddle/cinn/ir/group_schedule/config/group_tile_config.cc +++ b/paddle/cinn/ir/group_schedule/config/group_tile_config.cc @@ -102,7 +102,8 @@ BuildPureStaticShapeConfig( } else if (base_info->reduce_numel <= 256) { // warp reduce int64_t reduce_block = Next2Power(base_info->reduce_numel); - int64_t spatial_inner_num = 256 / reduce_block; + int64_t spatial_inner_num = + std::min(256 / reduce_block, base_info->spatial_numel); int64_t tree_reduce_num = 32; int64_t warp_num = 8; BucketInfo bucket_info{/* sp_lower_bound = */ 1, diff --git a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc index 1dc21ce8a3180..85890576d2647 100644 --- a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc +++ b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc @@ -555,7 +555,7 @@ void StaticShapeGroupScheduler::DoVerticalLoopFusion() { } void StaticShapeGroupScheduler::BindCudaAxis() { - if (target_.arch != Target::Arch::NVGPU) return; + if (!std::holds_alternative(target_.arch)) return; VLOG(5) << "[Start BindCudaAxis] func body: " << ir_sch_->GetModule().GetExprs().front(); @@ -594,7 +594,7 @@ std::ostream& operator<<(std::ostream& os, const Range& x) { // and MultiDimIntegerSet, re implement this function to simplify these ugly // codes. void StaticShapeGroupScheduler::AllocateStorage() { - if (target_.arch != Target::Arch::NVGPU) return; + if (!std::holds_alternative(target_.arch)) return; VLOG(5) << "[Start AllocateStorage] func body: " << ir_sch_->GetModule().GetExprs().front(); diff --git a/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc b/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc index 8a3c2dfa71356..08b587f95fd71 100644 --- a/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc +++ b/paddle/cinn/ir/group_schedule/tactic/tile_first_general_tactic.cc @@ -219,19 +219,21 @@ void TileFirstGeneralTactic::SplitWarpNumber(ir::IRSchedule* sch, }; if (!IsWarpNumGT(1)) return; - const auto LimitWarpNum = [&](const ir::Expr& loop, ScheduleConfig* config) { + const auto GetMinimalWarpNum = [&](const ir::Expr& loop, + const ScheduleConfig& config) -> int { ir::Expr extent = loop.As()->extent; common::cas_intervals_t var_intervals = common::CollectVarIntervalsOfExprs({extent}); common::SymbolicExprAnalyzer analyzer(var_intervals); const auto& proved_gt = - analyzer.ProveGT(ir::Expr(config->tile_config.warp_num), extent); + analyzer.ProveGT(ir::Expr(config.tile_config.warp_num * 32), extent); if (proved_gt.value_or(false)) { ir::Expr upper_bound = analyzer.UpperBound(extent); if (upper_bound.is_constant()) { - config->tile_config.warp_num = upper_bound.get_constant(); + return (static_cast(upper_bound.get_constant()) + 31) / 32; } } + return config.tile_config.warp_num; }; auto loops = sch->GetLoops(block_id); @@ -248,9 +250,9 @@ void TileFirstGeneralTactic::SplitWarpNumber(ir::IRSchedule* sch, } } else if (IsWarpReduce(context_->config)) { // get num warp from flatten num - LimitWarpNum(loops[0], &(context_->config)); - int thread_y = context_->config.tile_config.warp_num * 32 / - context_->config.tile_config.tree_reduce_num; + int minimal_warp_number = GetMinimalWarpNum(loops[0], context_->config); + int thread_y = + minimal_warp_number * 32 / context_->config.tile_config.tree_reduce_num; sch->Split(loops[0], std::vector({-1, thread_y})); if (IsReduceBlock(context_->config, block_id) && diff --git a/paddle/cinn/ir/module.cc b/paddle/cinn/ir/module.cc index 20298e32920fb..96c0187cbd9ce 100644 --- a/paddle/cinn/ir/module.cc +++ b/paddle/cinn/ir/module.cc @@ -35,6 +35,25 @@ void Module::Builder::AddFunctionWithoutOptim(const ir::LoweredFunc &func) { module_->functions.push_back(func); } +std::optional GetDataAlignmentImpl(common::UnknownArch arch) { + return std::nullopt; +} + +std::optional GetDataAlignmentImpl(common::X86Arch arch) { return 32; } + +std::optional GetDataAlignmentImpl(common::ARMArch arch) { + return std::nullopt; +} + +std::optional GetDataAlignmentImpl(common::NVGPUArch arch) { + return std::nullopt; +} + +std::optional GetDataAlignment(common::Arch arch) { + return std::visit([](const auto &impl) { return GetDataAlignmentImpl(impl); }, + arch.variant()); +} + void Module::Builder::AddBuffer(ir::Buffer buffer) { CHECK(buffer->target.defined()) << "buffer [" << buffer->name << "]'s target is undefined"; @@ -43,8 +62,8 @@ void Module::Builder::AddBuffer(ir::Buffer buffer) { return x.as_buffer()->name == buffer->name; }) == std::end(module_->buffers)) { module_->buffers.push_back(buffer); - if (module_->target.arch == Target::Arch::X86) { - module_->buffers.back().as_buffer()->data_alignment = 32; + if (auto alignment = GetDataAlignment(module_->target.arch)) { + module_->buffers.back().as_buffer()->data_alignment = alignment.value(); } } } @@ -64,7 +83,7 @@ void Module::Builder::Clear() { module_->predicates.clear(); } -Target::Arch Module::Builder::GetTargetArch() { return module_->target.arch; } +common::Arch Module::Builder::GetTargetArch() { return module_->target.arch; } Module Module::Builder::Build() { if (module_->functions.empty()) { diff --git a/paddle/cinn/ir/module.h b/paddle/cinn/ir/module.h index 160d0087a0e54..438c0e6db30d5 100644 --- a/paddle/cinn/ir/module.h +++ b/paddle/cinn/ir/module.h @@ -47,7 +47,7 @@ class Module : public ir::IrNodeRef { void AddPredicate(ir::Expr predicate); void SetInferShapeFunc(ir::Expr infer_shape_func); void Clear(); - Target::Arch GetTargetArch(); + common::Arch GetTargetArch(); Module Build(); diff --git a/paddle/cinn/ir/op/ir_operators.cc b/paddle/cinn/ir/op/ir_operators.cc index d11a26685851f..6b68e3ce60c40 100644 --- a/paddle/cinn/ir/op/ir_operators.cc +++ b/paddle/cinn/ir/op/ir_operators.cc @@ -69,6 +69,40 @@ Expr operator>>(Expr a, Expr b) { return lang::CallExtern("right_shift", {a, b}, {{"vectorizable", false}}); } +Expr BitwiseOrCallImpl(common::UnknownArch, + const Target& target, + Expr a, + Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_or."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseOrCallImpl(common::X86Arch, const Target& target, Expr a, Expr b) { + return lang::CallExtern("bitwise_or", {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseOrCallImpl(common::ARMArch, const Target& target, Expr a, Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_or."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseOrCallImpl(common::NVGPUArch, + const Target& target, + Expr a, + Expr b) { + Type t_a = a.type(); + auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_or"); + return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseOrCall(const Target& target, Expr a, Expr b) { + return std::visit( + [&](const auto& arch) { return BitwiseOrCallImpl(arch, target, a, b); }, + target.arch.variant()); +} + Expr operator|(Expr a, Expr b) { CHECK(a.type().is_int() || a.type().is_uint()); CHECK(b.type().is_int() || b.type().is_uint()); @@ -82,16 +116,41 @@ Expr operator|(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == cinn::common::Target::Arch::X86) { - return lang::CallExtern("bitwise_or", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == cinn::common::Target::Arch::NVGPU) { - auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_or"); - return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); - } else { - std::stringstream ss; - ss << "Unsupport arch: " << target.arch_str() << " for bitwise_or."; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); - } + return BitwiseOrCall(target, a, b); +} + +Expr BitwiseAndCallImpl(common::UnknownArch, + const Target& target, + Expr a, + Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_and."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseAndCallImpl(common::X86Arch, const Target& target, Expr a, Expr b) { + return lang::CallExtern("bitwise_and", {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseAndCallImpl(common::ARMArch, const Target& target, Expr a, Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_and."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseAndCallImpl(common::NVGPUArch, + const Target& target, + Expr a, + Expr b) { + Type t_a = a.type(); + auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_and"); + return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseAndCall(const Target& target, Expr a, Expr b) { + return std::visit( + [&](const auto& arch) { return BitwiseAndCallImpl(arch, target, a, b); }, + target.arch.variant()); } Expr operator&(Expr a, Expr b) { @@ -107,16 +166,41 @@ Expr operator&(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == cinn::common::Target::Arch::X86) { - return lang::CallExtern("bitwise_and", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == cinn::common::Target::Arch::NVGPU) { - auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_and"); - return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); - } else { - std::stringstream ss; - ss << "Unsupport arch: " << target.arch_str() << " for bitwise_and."; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); - } + return BitwiseAndCall(target, a, b); +} + +Expr BitwiseXorCallImpl(common::UnknownArch, + const Target& target, + Expr a, + Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_xor."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseXorCallImpl(common::X86Arch, const Target& target, Expr a, Expr b) { + return lang::CallExtern("bitwise_xor", {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseXorCallImpl(common::ARMArch, const Target& target, Expr a, Expr b) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_xor."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseXorCallImpl(common::NVGPUArch, + const Target& target, + Expr a, + Expr b) { + Type t_a = a.type(); + auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_xor"); + return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); +} + +Expr BitwiseXorCall(const Target& target, Expr a, Expr b) { + return std::visit( + [&](const auto& arch) { return BitwiseXorCallImpl(arch, target, a, b); }, + target.arch.variant()); } Expr operator^(Expr a, Expr b) { @@ -132,31 +216,40 @@ Expr operator^(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == cinn::common::Target::Arch::X86) { - return lang::CallExtern("bitwise_xor", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == cinn::common::Target::Arch::NVGPU) { - auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_xor"); - return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); - } else { - std::stringstream ss; - ss << "Unsupport arch: " << target.arch_str() << " for bitwise_xor."; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); - } + return BitwiseXorCall(target, a, b); +} + +Expr BitwiseNotCallImpl(common::UnknownArch, const Target& target, Expr a) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_not."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseNotCallImpl(common::X86Arch, const Target& target, Expr a) { + return lang::CallExtern("bitwise_not", {a}, {{"vectorizable", false}}); +} + +Expr BitwiseNotCallImpl(common::ARMArch, const Target& target, Expr a) { + std::stringstream ss; + ss << "Unsupport arch: " << target.arch_str() << " for bitwise_not."; + PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); +} + +Expr BitwiseNotCallImpl(common::NVGPUArch, const Target& target, Expr a) { + auto func_name = hlir::GetExternFuncName(target, a->type(), "bitwise_not"); + return lang::CallExtern(func_name, {a}, {{"vectorizable", false}}); +} + +Expr BitwiseNotCall(const Target& target, Expr a) { + return std::visit( + [&](const auto& arch) { return BitwiseNotCallImpl(arch, target, a); }, + target.arch.variant()); } Expr operator~(Expr a) { CHECK(a.type().is_int() || a.type().is_uint()); auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == cinn::common::Target::Arch::X86) { - return lang::CallExtern("bitwise_not", {a}, {{"vectorizable", false}}); - } else if (target.arch == cinn::common::Target::Arch::NVGPU) { - auto func_name = hlir::GetExternFuncName(target, a->type(), "bitwise_not"); - return lang::CallExtern(func_name, {a}, {{"vectorizable", false}}); - } else { - std::stringstream ss; - ss << "Unsupport arch: " << target.arch_str() << " for bitwise_not."; - PADDLE_THROW(phi::errors::InvalidArgument(ss.str())); - } + return BitwiseNotCall(target, a); } } // namespace ir diff --git a/paddle/cinn/ir/schedule/impl/for_type.cc b/paddle/cinn/ir/schedule/impl/for_type.cc index a53870f09ea46..84d45d6827d3d 100644 --- a/paddle/cinn/ir/schedule/impl/for_type.cc +++ b/paddle/cinn/ir/schedule/impl/for_type.cc @@ -123,8 +123,7 @@ void DyScheduleImpl::Bind(const Expr& loop, const std::string& thread_axis) { throw IRScheduleErrorHandler(primitive, os.str(), module_expr_); } int offset = thread_axis.back() - 'x'; - auto cur_dev_info = - common::DevInfoMgr::GetDevInfo(0); + auto cur_dev_info = common::DevInfoMgr::GetDevInfo(0); const std::array kMaxBlockDims = cur_dev_info->GetMaxBlockDims(); const std::array kMaxGridDims = cur_dev_info->GetMaxGridDims(); auto check_offset = [&](const char& c) -> bool { @@ -202,8 +201,7 @@ void StScheduleImpl::Bind(const Expr& loop, const std::string& thread_axis) { << "thread_axis " << thread_axis << " is not supported"; int offset = thread_axis.back() - 'x'; auto cur_dev_info = - cinn::common::DevInfoMgr::GetDevInfo( - 0); + cinn::common::DevInfoMgr::GetDevInfo(0); const std::array kMaxBlockDims = cur_dev_info->GetMaxBlockDims(); const std::array kMaxGridDims = cur_dev_info->GetMaxGridDims(); auto check_offset = [&](const char& c) -> bool { diff --git a/paddle/cinn/ir/schedule/ir_schedule.h b/paddle/cinn/ir/schedule/ir_schedule.h index cab1b0d38d868..7927efdaa277f 100644 --- a/paddle/cinn/ir/schedule/ir_schedule.h +++ b/paddle/cinn/ir/schedule/ir_schedule.h @@ -32,11 +32,11 @@ namespace cinn { namespace ir { /** - * A struct containing all the schedule primitives. Each shedule primitive is a - * member function of IRSchedule. Schedule primitves are implmented by + * A struct containing all the schedule primitives. Each schedule primitive is a + * member function of IRSchedule. Schedule primitives are implemented by * StScheduleImpl manipulating the AST - IR(Expr). To support serializing and * replaying, each schedule primitive should append a ScheduleDesc::Step to the - * trace_ in its corresponding function implment. + * trace_ in its corresponding function implement. */ class IRSchedule { public: @@ -353,7 +353,7 @@ class IRSchedule { * If the rfactor loop is k and rf_axis is 0, the rfactor transformation is * divided into 2 steps: * 1. get the rfactor block where the reduce loop k is transformed to the - * serial loop with no accumalation and a new rfactor tensor is created. The + * serial loop with no accumulation and a new rfactor tensor is created. The * axis k will be placed in the rf_axis of the new rf_tensor. The rf_block is * as follows: \code for (rf_k, 0, 30) // rfactor loop k is transformed * to the serial loop. for (i, 0, 10) // serial loop for (j, 0, 20) // @@ -390,7 +390,7 @@ class IRSchedule { * If the rf loop is j and rf_axis is 0, the transformation is * divided into 2 steps: * 1. get the rf block where the reduce loop j is transformed to the - * serial loop with no accumalation and a new rf tensor is created. + * serial loop with no accumulation and a new rf tensor is created. * The axis j will be placed in the rf_axis of the new rf_tensor. * The rf_block is as follows: * \code @@ -457,7 +457,7 @@ class IRSchedule { /*! * \brief Insert a tag in schedule_desc to mark the beginning of post - * processing, the schedue primitive itself does not make any changes to the + * processing, the schedule primitive itself does not make any changes to the * IR. */ void TagPostSchedule(); @@ -491,7 +491,7 @@ class IRSchedule { /*! * \brief The base class of the inliner, which handles: * 1) Remove the block to be lined - * 2) Maintain a list of index variables and their substition of the buffer + * 2) Maintain a list of index variables and their substitution of the buffer * being inlined */ class BaseInliner : public ir::IRMutator<> { diff --git a/paddle/cinn/ir/schedule/schedule_base.cc b/paddle/cinn/ir/schedule/schedule_base.cc index b34221d73f052..885391aecd073 100644 --- a/paddle/cinn/ir/schedule/schedule_base.cc +++ b/paddle/cinn/ir/schedule/schedule_base.cc @@ -105,7 +105,7 @@ void ScheduleBase::Broadcast(const std::string& block_name, } std::vector all_loops = this->GetLoops(block_name); if (axes[0] >= all_loops.size()) { - throw std::runtime_error("axes execeed loop size"); + throw std::runtime_error("axes exceed loop size"); } // Get Last loop @@ -150,14 +150,14 @@ void ScheduleBase::Broadcast(const std::string& block_name, auto stride = Expr(1); auto in_offset = Expr(0); - std::set brodacast_set(info.broadcast_axes.begin(), + std::set broadcast_set(info.broadcast_axes.begin(), info.broadcast_axes.end()); for (int i = all_loops.size() - 1; i >= 0; --i) { auto loop_temp = all_loops[i].As(); offset = offset + loop_temp->loop_var * stride; stride = stride * loop_temp->extent; - if (!brodacast_set.count(i)) { + if (!broadcast_set.count(i)) { in_offset = in_offset + loop_temp->loop_var * stride; } } diff --git a/paddle/cinn/ir/test/buffer_test.cc b/paddle/cinn/ir/test/buffer_test.cc index 9dd4c489c999d..b8d4d247b30a9 100644 --- a/paddle/cinn/ir/test/buffer_test.cc +++ b/paddle/cinn/ir/test/buffer_test.cc @@ -69,7 +69,7 @@ TEST(Buffer, bind_to_multiple_tensors) { auto funcs = lang::Lower("func1", stages, {A, B}); Target target; - target.arch = Target::Arch ::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit ::k32; target.os = Target::OS ::Linux; diff --git a/paddle/cinn/operator_fusion/CMakeLists.txt b/paddle/cinn/operator_fusion/CMakeLists.txt new file mode 100644 index 0000000000000..d5a46bd4ef2e2 --- /dev/null +++ b/paddle/cinn/operator_fusion/CMakeLists.txt @@ -0,0 +1,9 @@ +add_subdirectory(frontend) +add_subdirectory(backend) +add_subdirectory(policy) + +cc_library( + op_fusion + SRCS ${frontend_fusion_src} ${backend_fusion_src} ${policy_fusion_src} + pattern_graph.cc + DEPS phi) diff --git a/paddle/cinn/operator_fusion/backend/CMakeLists.txt b/paddle/cinn/operator_fusion/backend/CMakeLists.txt new file mode 100644 index 0000000000000..fe18d42b10fec --- /dev/null +++ b/paddle/cinn/operator_fusion/backend/CMakeLists.txt @@ -0,0 +1 @@ +gather_srcs(backend_fusion_src SRCS pattern_fuser.cc) diff --git a/paddle/cinn/operator_fusion/backend/pattern.h b/paddle/cinn/operator_fusion/backend/pattern.h new file mode 100644 index 0000000000000..7006fb02b829e --- /dev/null +++ b/paddle/cinn/operator_fusion/backend/pattern.h @@ -0,0 +1,88 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/cinn/hlir/framework/pir/trivial_op_impl.h" +#include "paddle/cinn/ir/ir_base.h" +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/utils.h" + +namespace cinn::fusion { + +struct BackendStage {}; + +template <> +struct PatternContent { + explicit PatternContent(pir::Operation* op, + std::optional expr) + : op(op), expr(expr) {} + pir::Operation* op; + std::optional expr; +}; + +using BackendContent = PatternContent; +using TrivialOp = cinn::hlir::framework::pir::trivial_fusion_detail::TrivialOp; +using ReduceOp = cinn::hlir::framework::pir::trivial_fusion_detail::ReduceOp; +using FusionOp = std::variant; +template <> +struct TrivialPattern { + explicit TrivialPattern(const std::vector& ops, + const TrivialOp& op) + : ops_(ops), trivial_op(op) {} + std::vector ops_; + TrivialOp trivial_op; + static std::string name() { return "Trivial"; } + std::vector ops() const { return ops_; } +}; + +template <> +struct ReducePattern { + explicit ReducePattern(const std::vector& ops, + const ReduceOp& op) + : ops_(ops), reduce_op(op) {} + std::vector ops_; + ReduceOp reduce_op; + std::vector ops() const { return ops_; } + pir::Operation* GetReduceOp() const { return ops_.back(); } + static std::string name() { return "Reduce"; } +}; + +template <> +struct UnsupportPattern { + explicit UnsupportPattern(const std::vector& ops) + : ops_(ops) {} + std::vector ops_; + std::vector ops() const { return ops_; } + static std::string name() { return "Unsupport"; } +}; + +template <> +struct HorizontalFusionPattern { + explicit HorizontalFusionPattern( + const std::vector>& patterns) + : patterns_(patterns) {} + std::vector> patterns_; + std::vector ops() const { + std::vector result; + for (const auto& pattern : patterns_) { + auto ops = GetOpsInPattern(pattern); + ExtendVector(&result, ops); + } + return result; + } + static std::string name() { return "HorizontalFusionPattern"; } +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/backend/pattern_fuser.cc b/paddle/cinn/operator_fusion/backend/pattern_fuser.cc new file mode 100644 index 0000000000000..61e8fd658f94a --- /dev/null +++ b/paddle/cinn/operator_fusion/backend/pattern_fuser.cc @@ -0,0 +1,211 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "glog/logging.h" + +#include "paddle/cinn/hlir/framework/pir/trivial_op_impl.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/backend/pattern_fuser.h" + +namespace cinn::fusion { + +template <> +StmtPattern ConvertToStmtPattern( + const PatternContent& content) { + const auto& kind = GetOpPatternKind(content.op); + if (kind == hlir::framework::kReduction) { + CHECK(content.expr.has_value()); + return ReducePattern({content.op}, + ReduceOp(content.expr.value())); + } else if (kind == hlir::framework::kElementWise || + kind == hlir::framework::kBroadcast || + kind == hlir::framework::kInjective) { + CHECK(content.expr.has_value()); + return TrivialPattern({content.op}, + TrivialOp(content.expr.value())); + } else { + CHECK(false); + return UnsupportPattern({content.op}); + } +} + +// template StmtPattern RT_x_RT(const +// ReduceTreePattern& upstream, const +// ReduceTreePattern& downstream); + +template <> +StmtPattern MergePatternImpl( + const ReduceTreePattern& first, + const TrivialPattern& second) { + return ReduceTreePlusTrivialPattern(first, second); +} + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const ReducePattern& second) { + const auto& ops = UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + const auto& reduce_op = + cinn::hlir::framework::pir::trivial_fusion_detail::TrivalxOther_Fusion( + first.trivial_op, second.reduce_op); + return ReducePattern(ops, reduce_op); +} + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const TrivialPattern& second) { + const auto& ops = UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + const auto& trivial_op = + cinn::hlir::framework::pir::trivial_fusion_detail::TrivalxOther_Fusion( + first.trivial_op, second.trivial_op); + return TrivialPattern(ops, trivial_op); +} + +template <> +StmtPattern MergePatternImpl( + const HorizontalFusionPattern& first, + const HorizontalFusionPattern& second) { + const auto& contents = + UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + return HorizontalFusionPattern({first, second}); +} + +/// Start: Tmp Transform Operation for ReduceTree +std::vector ReduceTransformRecursive( + ReduceOp reduce_op, + const ReduceTreePattern& reduce_tree_pattern, + const std::vector& fake_reduce_iter_idx = {}) { + FusionOp root_op = reduce_op; + VLOG(4) << "ReduceTransformRecursive: " << *_GetFuncBodyPointer(root_op); + std::vector result; + for (const auto& child_tree : reduce_tree_pattern.childs()) { + const auto& child_reduce_op = child_tree.GetRootPattern().reduce_op; + auto transformed_nodes = cinn::hlir::framework::pir::trivial_fusion_detail:: + TransformReduceLoopRange( + child_reduce_op, &root_op, fake_reduce_iter_idx); + for (auto& node : transformed_nodes) { + auto child_flatten = + ReduceTransformRecursive(std::get(node), child_tree); + result.insert(result.end(), child_flatten.begin(), child_flatten.end()); + } + } + result.push_back(root_op); + VLOG(4) << "ReduceTransformRecursive: End"; + return result; +} + +std::vector ReduceTreeTrivialTransformRecursive( + TrivialOp trivial_op, + const ReduceTreePlusTrivialPattern& rt_pattern) { + FusionOp root_op = trivial_op; + VLOG(4) << "ReduceTrivialTransformRecursive: " + << *_GetFuncBodyPointer(root_op); + std::vector result; + // for (const auto& child_tree : ) { + // + const auto& child_tree = rt_pattern.tree; + const auto& child_reduce_op = child_tree.GetRootPattern().reduce_op; + auto transformed_nodes = cinn::hlir::framework::pir::trivial_fusion_detail:: + TransformReduceLoopRange( + child_reduce_op, &root_op, rt_pattern.fake_reduce_iter_idx); + for (auto& node : transformed_nodes) { + auto child_flatten = ReduceTransformRecursive( + std::get(node), child_tree, rt_pattern.fake_reduce_iter_idx); + result.insert(result.end(), child_flatten.begin(), child_flatten.end()); + } + //} + result.push_back( + cinn::hlir::framework::pir::trivial_fusion_detail::SinkTrivialLoopAlign( + std::get(root_op), + rt_pattern.tree.GetRootPattern().reduce_op, + rt_pattern.fake_reduce_iter_idx)); + VLOG(4) << "ReduceTrivialTransformRecursive End;"; + return result; +} + +/// End: Tmp Transform Operation for reduce tree + +std::vector GetFusionOpFromPattern( + const StmtPattern& pattern); + +struct FusionOpGetter { + std::vector operator()( + const TrivialPattern& pattern) { + return {pattern.trivial_op}; + } + + std::vector operator()(const ReducePattern& pattern) { + return {pattern.reduce_op}; + } + + std::vector operator()( + const ReduceTreePattern& pattern) { + return ReduceTransformRecursive(pattern.GetRootPattern().reduce_op, + pattern); + } + + std::vector operator()( + const ReduceTreePlusTrivialPattern& pattern) { + return ReduceTreeTrivialTransformRecursive(pattern.sink_trivial.trivial_op, + pattern); + } + + std::vector operator()( + const HorizontalFusionPattern& pattern) { + std::vector result; + for (const auto& sub_pattern : pattern.patterns_) { + result = ConcatVector(result, GetFusionOpFromPattern(sub_pattern)); + } + return result; + } + + std::vector operator()( + const UnsupportPattern& pattern) { + CHECK(false) << "Not Implemented."; + } +}; + +// tmp transform for reduce_tree and reduce_tree_trivial. +std::vector GetFusionOpFromPattern( + const StmtPattern& pattern) { + return std::visit(FusionOpGetter(), pattern.variant()); +} + +struct FusionOp2Expr { + std::vector operator()(const TrivialOp& op) { + return {op.GetFuncBody()}; + } + std::vector operator()(const ReduceOp& op) { + const auto& t_r = SplitReduceOp(op); + return {t_r.first.GetFuncBody(), t_r.second.GetFuncBody()}; + } +}; + +std::vector GetExprFromPattern( + const StmtPattern& pattern) { + const auto& fusion_ops = GetFusionOpFromPattern(pattern); + std::vector results; + for (const auto& op : fusion_ops) { + results = ConcatVector(results, std::visit(FusionOp2Expr(), op)); + } + return results; +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/backend/pattern_fuser.h b/paddle/cinn/operator_fusion/backend/pattern_fuser.h new file mode 100644 index 0000000000000..a460d67f8e02f --- /dev/null +++ b/paddle/cinn/operator_fusion/backend/pattern_fuser.h @@ -0,0 +1,52 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/pattern_fuser.h" + +namespace cinn::fusion { + +// extern template to don't allow compiler specialize the following code. + +template <> +StmtPattern ConvertToStmtPattern( + const PatternContent& content); + +template <> +StmtPattern MergePatternImpl( + const ReduceTreePattern& first, + const TrivialPattern& second); + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const ReducePattern& second); + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const TrivialPattern& second); + +template <> +StmtPattern MergePatternImpl( + const HorizontalFusionPattern& first, + const HorizontalFusionPattern& second); + +std::vector GetExprFromPattern( + const StmtPattern& pattern); + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/frontend/CMakeLists.txt b/paddle/cinn/operator_fusion/frontend/CMakeLists.txt new file mode 100644 index 0000000000000..e9683e520ef5a --- /dev/null +++ b/paddle/cinn/operator_fusion/frontend/CMakeLists.txt @@ -0,0 +1 @@ +gather_srcs(frontend_fusion_src SRCS pattern_fuser.cc) diff --git a/paddle/cinn/operator_fusion/frontend/pattern.h b/paddle/cinn/operator_fusion/frontend/pattern.h new file mode 100644 index 0000000000000..e267483e56586 --- /dev/null +++ b/paddle/cinn/operator_fusion/frontend/pattern.h @@ -0,0 +1,92 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/utils.h" + +namespace cinn::fusion { + +struct FrontendStage {}; + +template <> +struct PatternContent { + explicit PatternContent(pir::Operation* op) : op(op) {} + pir::Operation* op; + bool operator==(const PatternContent& other) const { + return op == other.op; + } +}; + +using FrontendContent = PatternContent; + +} // namespace cinn::fusion + +namespace std { +template <> +struct hash { + size_t operator()(const cinn::fusion::FrontendContent& content) const { + return std::hash()(content.op); + } +}; + +} // namespace std + +namespace cinn::fusion { +template <> +struct TrivialPattern { + explicit TrivialPattern(const std::vector& ops) + : ops_(ops) {} + std::vector ops_; + static std::string name() { return "Trivial"; } + std::vector ops() const { return ops_; } +}; + +template <> +struct ReducePattern { + explicit ReducePattern(const std::vector& ops) : ops_(ops) {} + std::vector ops_; + std::vector ops() const { return ops_; } + pir::Operation* GetReduceOp() const { return ops_.back(); } + static std::string name() { return "Reduce"; } +}; + +template <> +struct UnsupportPattern { + explicit UnsupportPattern(const std::vector& ops) + : ops_(ops) {} + std::vector ops_; + std::vector ops() const { return ops_; } + static std::string name() { return "Unsupport"; } +}; + +template <> +struct HorizontalFusionPattern { + explicit HorizontalFusionPattern( + const std::vector>& patterns) + : patterns_(patterns) {} + std::vector> patterns_; + std::vector ops() const { + std::vector result; + for (const auto& pattern : patterns_) { + auto ops = GetOpsInPattern(pattern); + ExtendVector(&result, ops); + } + return result; + } + static std::string name() { return "HorizontalFusionPattern"; } +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/frontend/pattern_fuser.cc b/paddle/cinn/operator_fusion/frontend/pattern_fuser.cc new file mode 100644 index 0000000000000..332230f409979 --- /dev/null +++ b/paddle/cinn/operator_fusion/frontend/pattern_fuser.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/frontend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" + +namespace cinn::fusion { + +template <> +StmtPattern ConvertToStmtPattern( + const PatternContent& content) { + const auto& kind = GetOpPatternKind(content.op); + if (kind == hlir::framework::kReduction) { + return ReducePattern({content.op}); + } else if (kind == hlir::framework::kElementWise || + kind == hlir::framework::kBroadcast || + kind == hlir::framework::kInjective) { + return TrivialPattern({content.op}); + } else { + return UnsupportPattern({content.op}); + } +} + +template <> +StmtPattern MergePatternImpl( + const ReduceTreePattern& first, + const TrivialPattern& second) { + return ReduceTreePlusTrivialPattern(first, second); +} + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const ReducePattern& second) { + const auto& contents = + UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + return ReducePattern(contents); +} + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const TrivialPattern& second) { + const auto& contents = + UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + return TrivialPattern(contents); +} + +template <> +StmtPattern MergePatternImpl( + const HorizontalFusionPattern& first, + const HorizontalFusionPattern& second) { + const auto& contents = + UniqueConcatVector(GetOpsInPattern(first), + GetOpsInPattern(second)); + return HorizontalFusionPattern({first, second}); +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/frontend/pattern_fuser.h b/paddle/cinn/operator_fusion/frontend/pattern_fuser.h new file mode 100644 index 0000000000000..d92b8429ad16b --- /dev/null +++ b/paddle/cinn/operator_fusion/frontend/pattern_fuser.h @@ -0,0 +1,49 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/frontend/pattern.h" +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/pattern_fuser.h" + +namespace cinn::fusion { + +// extern template to don't allow compiler specialize the following code. + +template <> +StmtPattern ConvertToStmtPattern( + const PatternContent& content); + +template <> +StmtPattern MergePatternImpl( + const ReduceTreePattern& first, + const TrivialPattern& second); + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const ReducePattern& second); + +template <> +StmtPattern MergePatternImpl( + const TrivialPattern& first, + const TrivialPattern& second); + +template <> +StmtPattern MergePatternImpl( + const HorizontalFusionPattern& first, + const HorizontalFusionPattern& second); + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/group_cluster.h b/paddle/cinn/operator_fusion/group_cluster.h new file mode 100644 index 0000000000000..aa545699a0d4d --- /dev/null +++ b/paddle/cinn/operator_fusion/group_cluster.h @@ -0,0 +1,91 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/backend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/pattern_graph.h" +#include "paddle/cinn/operator_fusion/policy/general_topo_policy.h" +#include "paddle/cinn/operator_fusion/policy/relative_judge_policy.h" +#include "paddle/cinn/operator_fusion/policy/shardable_axes_policy.h" + +namespace cinn::fusion { + +template +inline std::vector> ClusterOps( + const std::vector>& contents) { + std::function)> func = + [](const fusion::PatternContent& content) { return content.op; }; + const auto& origin_ops = fusion::MapVector(contents, func); + CHECK_GT(origin_ops.size(), 0); + VLOG(4) << "Start Cluster Ops!"; + VLOG(4) << "Input Group with size " << origin_ops.size() << " :\n" + << fusion::OpsDebugStr(origin_ops); + + std::vector outputs; + const auto& ops = [&] { + std::vector ops; + for (const auto& content : contents) { + if (content.op->name() == "cf.yield") { // just skip cf.yield. + for (auto& operand : content.op->operands()) { + outputs.push_back(operand.source()); + } + continue; + } + ops.emplace_back(content.op); + } + return ops; + }(); + + const auto& content_without_yield = + FilterVector(contents, [](const fusion::PatternContent& content) { + return content.op->name() != "cf.yield"; + }); + + pir::Program* program = ops.at(0)->GetParentProgram(); + + const auto* shape_analysis = + &pir::ShapeAnalysisManager::Instance().Get(program); + + VLOG(4) << "Start Create Policies and PolicyManager!"; + const auto& relative_judge_policy = + std::make_shared>(ops, shape_analysis); + + const auto& general_topo_policy = + std::make_shared>(); + + auto policy_manager = + fusion::PolicyManager({relative_judge_policy, general_topo_policy}); + + auto topo_manager = fusion::PolicyManager({general_topo_policy}); + + VLOG(4) << "Start Create PatternGraph"; + fusion::PatternGraph graph( + content_without_yield, outputs, policy_manager, topo_manager); + auto result = graph.ClusterOps(); + + VLOG(4) << "End Cluster Ops! result size:" << result.size(); + for (const auto& node : result) { + VLOG(4) << "\n" + << node->DebugStr() << "\n" + << fusion::StmtPatternDebugStr(node->stmt_pattern_); + } + + return result; +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/pattern.h b/paddle/cinn/operator_fusion/pattern.h new file mode 100644 index 0000000000000..908b4a4348bfc --- /dev/null +++ b/paddle/cinn/operator_fusion/pattern.h @@ -0,0 +1,101 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "glog/logging.h" +#include "paddle/cinn/operator_fusion/utils.h" +#include "paddle/pir/include/core/operation.h" + +namespace cinn::fusion { + +template +struct PatternContent {}; + +template +class TrivialPattern {}; + +template +class ReducePattern {}; + +template +struct ReduceTreePattern { + explicit ReduceTreePattern(const std::vector>& childs, + const ReducePattern& root) + : childs_(childs), root_(root) {} + const ReducePattern& GetRootPattern() const { return root_; } + std::vector ops() const { + std::vector result{root_.ops()}; + for (const auto& child : childs_) { + result = UniqueConcatVector(result, child.ops()); + } + return result; + } + static std::string name() { return "ReduceTree"; } + const std::vector>& childs() const { return childs_; } + std::vector>& childs() { return childs_; } + void InsertChild(const ReduceTreePattern& child) { + childs_.push_back(child); + } + std::vector> FlattenReducePattern() const { + std::vector> result; + for (const auto& child : childs_) { + result = ConcatVector(result, child.FlattenReducePattern()); + } + return result; + } + + private: + std::vector> childs_; + ReducePattern root_; +}; + +template +struct ReduceTreePlusTrivialPattern { + explicit ReduceTreePlusTrivialPattern(const ReduceTreePattern& tree, + const TrivialPattern& sink_trivial) + : tree(tree), sink_trivial(sink_trivial) {} + ReduceTreePattern tree; + TrivialPattern sink_trivial; + std::vector ops() const { + return UniqueConcatVector(tree.ops(), sink_trivial.ops()); + } + static std::string name() { return "ReduceTree+Trivial"; } + std::vector fake_reduce_iter_idx; +}; + +template +class UnsupportPattern {}; +template +class HorizontalFusionPattern {}; + +template +using StmtPatternBase = std::variant, + ReducePattern, + ReduceTreePattern, + ReduceTreePlusTrivialPattern, + HorizontalFusionPattern, + UnsupportPattern>; + +template +struct StmtPattern final : public StmtPatternBase { + using StmtPatternBase::StmtPatternBase; + const StmtPatternBase& variant() const { + return static_cast&>(*this); + } +}; +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/pattern_fuser.h b/paddle/cinn/operator_fusion/pattern_fuser.h new file mode 100644 index 0000000000000..802031b6b2304 --- /dev/null +++ b/paddle/cinn/operator_fusion/pattern_fuser.h @@ -0,0 +1,208 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "glog/logging.h" + +#include "paddle/cinn/adt/adt.h" +#include "paddle/cinn/hlir/framework/op.h" +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/utils.h" + +// This file is the protocol of the pattern fuser. Please implement +// ConvertToStmtPattern and MergePatternImpl in the specializations. + +namespace cinn::fusion { + +template +ReducePattern ToReducePattern(const StmtPattern& second) { + return std::get>(second); +} + +template +std::string GetPatternName(const StmtPattern& s) { + return std::visit([](const auto& impl) { return impl.name(); }, s.variant()); +} + +template +StmtPattern ConvertToStmtPattern(const PatternContent& content); + +template +std::vector GetOpsInPattern(const StmtPattern& pattern) { + return std::visit([](const auto& impl) { return impl.ops(); }, + pattern.variant()); +} + +template +bool IsReducePattern(const StmtPattern& pattern) { + return std::holds_alternative>(pattern); +} + +template +bool IsReduceTreePattern(const StmtPattern& pattern) { + return std::holds_alternative>(pattern); +} + +template +bool IsOpsDependents(const StmtPattern& pattern) { + return std::holds_alternative>(pattern); +} + +template +bool IsUnsupportPattern(const StmtPattern& pattern) { + return std::holds_alternative>(pattern); +} + +template +bool IsReduceTrivialPattern(const StmtPattern& pattern) { + return std::holds_alternative>(pattern); +} + +template +std::unordered_set GetPatternInputValuesIncludeInner( + const StmtPattern& A) { + std::unordered_set result; + for (const auto& op : GetOpsInPattern(A)) { + for (const auto& value : op->operands()) { + result.insert(value.source()); + } + } + return result; +} + +template +std::unordered_set GetPatternOutputValuesIncludedInner( + const StmtPattern& A) { + std::unordered_set result; + for (const auto& op : GetOpsInPattern(A)) { + for (const auto& value : op->results()) { + result.insert(value); + } + } + return result; +} + +template +std::unordered_set GetPatternInputValues(const StmtPattern& A) { + auto all_input_values = GetPatternInputValuesIncludeInner(A); + for (const auto& value : GetPatternOutputValuesIncludedInner(A)) { + all_input_values.erase(value); + } + VLOG(4) << "GetPatternInputValues: " << all_input_values.size(); + return all_input_values; +} + +template +std::string StmtPatternDebugStr(const StmtPattern& stmt) { + std::stringstream ss; + auto all_ops = GetOpsInPattern(stmt); + ss << "StmtPattern, size " << all_ops.size() << " :\n"; + ss << OpsDebugStr(all_ops); + return ss.str(); +} + +static bool IsDirectUpstream(const pir::Operation* upstream, + const pir::Operation* downstream) { + for (const auto& value : downstream->results()) { + for (const auto& operand : upstream->operands()) { + if (value == operand.source()) { + return true; + } + } + } + return false; +} + +template +int InsertDownstreamIntoTree(const ReduceTreePattern& upstream, + ReduceTreePattern& downstream) { // NOLINT + if (IsDirectUpstream(upstream.GetRootPattern().GetReduceOp(), + downstream.GetRootPattern().GetReduceOp())) { + downstream.InsertChild(upstream); + return 1; + } + int insert_num = 0; + for (auto& child : downstream.childs()) { + insert_num += InsertDownstreamIntoTree(upstream, child); + } + return insert_num; +} + +template +StmtPattern MergePatternImpl(const ReduceTreePattern& upstream, + const ReduceTreePattern& downstream) { + ReduceTreePattern result = downstream; // copy first. + int insert_num = InsertDownstreamIntoTree(upstream, result); + CHECK(insert_num == 1) << "Must insert only once, but insert " << insert_num; + return result; +} + +template +StmtPattern MergePatternImpl(const ReduceTreePattern& first, + const TrivialPattern& second); + +template +StmtPattern MergePatternImpl(const TrivialPattern& first, + const ReducePattern& second); + +template +StmtPattern MergePatternImpl(const TrivialPattern& first, + const TrivialPattern& second); + +template +StmtPattern MergePatternImpl(const HorizontalFusionPattern& first, + const HorizontalFusionPattern& second); + +template +StmtPattern MergePattern(const StmtPattern& first, + const StmtPattern& second) { + VLOG(4) << "MergePattern: " << GetPatternName(first) << " x " + << GetPatternName(second); + const auto PatternMatch = adt::match{ + [&](const ReduceTreePattern& lhs, const ReduceTreePattern& rhs) { + return MergePatternImpl(lhs, rhs); + }, + [&](const ReduceTreePattern& lhs, const TrivialPattern& rhs) { + return MergePatternImpl(lhs, rhs); + }, + [&](const TrivialPattern& lhs, const ReducePattern& rhs) { + return MergePatternImpl(lhs, rhs); + }, + [&](const TrivialPattern& lhs, const TrivialPattern& rhs) { + return MergePatternImpl(lhs, rhs); + }, + [&](const HorizontalFusionPattern& lhs, + const HorizontalFusionPattern& rhs) { + return MergePatternImpl(lhs, rhs); + }, + [&](const auto& lhs, const auto& rhs) -> StmtPattern { + CHECK(false) << "Found not support merge!" << GetPatternName(first) + << "X" << GetPatternName(second); + }, + }; + return std::visit(PatternMatch, first.variant(), second.variant()); +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/pattern_graph.cc b/paddle/cinn/operator_fusion/pattern_graph.cc new file mode 100644 index 0000000000000..547f7ff9e14cf --- /dev/null +++ b/paddle/cinn/operator_fusion/pattern_graph.cc @@ -0,0 +1,258 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/operator_fusion/pattern_graph.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/backend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern_fuser.h" + +namespace cinn::fusion { + +template +std::vector> PatternGraph::ClusterOps() { + VLOG(4) << "[Group Cluster] Initial Condition: " << GraphInfo(); + + VLOG(4) << "[Group Cluster] Start SinkTrivialPattern"; + SinkTrivialPattern(); + VLOG(4) << "[Group Cluster] After SinkTrivialPattern: " << GraphInfo(); + + // ReducePattern -> ReduceTreePattern + VLOG(4) << "[Group Cluster] Start ReduceLiftReduceTree"; + ReduceLiftReduceTree(); + VLOG(4) << "[Group Cluster] After ReduceLiftReduceTree: " << GraphInfo(); + + // ReduceTreePattern + ReduceTreePattern fusion + VLOG(4) << "[Group Cluster] Start ReduceTreeGrown"; + ReduceTreeGrown(); + VLOG(4) << "[Group Cluster] After ReduceTreeGrown: " << GraphInfo(); + + // ReduceTreePattern + TrivialPattern fusion. + VLOG(4) << "[Group Cluster] Start ReduceTree_Trivial_Fusion"; + ReduceTree_Trivial_Fusion(); + VLOG(4) << "[Group Cluster] After ReduceTree_Trivial_Fusion: " << GraphInfo(); + + // Horizontal fusion. + VLOG(4) << "[Group Cluster] Start HorizontalFusion"; + HorizontalFusion(); + VLOG(4) << "[Group Cluster] After HorizontalFusion: " << GraphInfo(); + + return SortByTopoOrder(); +} + +template +std::vector> PatternGraph::SortByTopoOrder() { + // sort all_pattern_nodes_ by topo order. + std::vector> res; + std::list> topo_queue; + std::map, int> degree; + for (const auto& node : all_pattern_nodes_) { + degree[node] = node->upstream_.size(); + if (degree[node] == 0) { + topo_queue.push_back(node); + } + } + while (!topo_queue.empty()) { + PatternNodePtr node = topo_queue.front(); + topo_queue.pop_front(); + res.push_back(node); + for (const auto& downstream_op : node->downstream_) { + degree[downstream_op] = degree[downstream_op] - 1; + if (degree[downstream_op] == 0) { + topo_queue.push_back(downstream_op); + } + } + } + return res; +} + +template +void PatternGraph::SinkTrivialPattern() { + GraphTransformer< + NodePattern, + T, + And>>, + IsNotOutputNodeMatcher>, + MergeTrivialPatternOperation>(this); +} + +template +void PatternGraph::ReduceLiftReduceTree() { + GraphTransformer< + NodePattern, + T, + And, StmtPatternGraphMatcher>>, + LiftReduceToReduceTreeOperation>(this); +} + +template +void PatternGraph::HorizontalFusion() { + GraphTransformer>, + LiftToHorizontalFusionPatternOperation>(this); + + GraphTransformer(this); +} + +template +void PatternGraph::ReduceTreeGrown() { + GraphTransformer, + MergeReduceTreeOperation>(this); +} + +template +void PatternGraph::ReduceTree_Trivial_Fusion() { + GraphTransformer< + NodePattern, + T, + And, + MergeReduceTreeAndTrivialOperation>(this); +} + +template +PatternGraph::PatternGraph(const std::vector>& contents, + const std::vector& outputs, + const PolicyManager policy_manager, + const PolicyManager topo_manager) + : policy_manager_(policy_manager), + topo_manager_(topo_manager), + outputs_(outputs) { + std::unordered_map> op_to_node_map; + + VLOG(4) << "len(outputs) = " << outputs_.size(); + for (const auto& v : outputs) { + VLOG(4) << "output is" << OpsDebugStr({v.defining_op()}); + } + + for (const auto& content : contents) { + PatternNodePtr node = std::make_shared>(content); + op_to_node_map[content.op] = node; + all_pattern_nodes_.emplace(node); + node->sink_op_ = content.op; + } + + for (const auto& content : contents) { + PatternNodePtr cur_node = op_to_node_map[content.op]; + + // add upstream nodes + for (int i = 0; i < content.op->num_operands(); ++i) { + ::pir::Operation* input_op = content.op->operand_source(i).defining_op(); + if (op_to_node_map.find(input_op) != op_to_node_map.end()) { + PatternNodePtr upstream_node = op_to_node_map[input_op]; + cur_node->upstream_.push_back(upstream_node); + } + } + + // add downstream nodes + for (int i = 0; i < content.op->num_results(); ++i) { + pir::Value related_value = content.op->result(i); + for (auto consumer_it = related_value.use_begin(); + consumer_it != related_value.use_end(); + ++consumer_it) { + ::pir::Operation* output_op = consumer_it->owner(); + if (op_to_node_map.find(output_op) != op_to_node_map.end()) { + PatternNodePtr downstream_node = op_to_node_map[output_op]; + cur_node->downstream_.push_back(downstream_node); + } + } + } + } + + VLOG(4) << "PatternGraph Created, pattern node size: " + << all_pattern_nodes_.size(); +} + +template +void PatternGraph::RemoveNode(const PatternNodePtr& node) { + VLOG(4) << "Start Remove: " << node; + if (all_pattern_nodes_.find(node) != all_pattern_nodes_.end()) { + VLOG(4) << "Removed! "; + all_pattern_nodes_.erase(node); + } + + for (PatternNodePtr& upstream : node->upstream_) { + RemoveFromVector(&upstream->downstream_, node); + } + + for (PatternNodePtr& downstream : node->downstream_) { + RemoveFromVector(&downstream->upstream_, node); + } +} + +template +void PatternGraph::AppendNode(const PatternNodePtr& node) { + all_pattern_nodes_.emplace(node); +} + +template +std::string PatternGraph::GraphInfo() const { + std::stringstream ss; + ss << "\n========= GraphInfo ==========="; + for (const auto& v : all_pattern_nodes_) { + ss << "\n" << v->DebugStr(); + ss << "\n IsOutput: " << IsOutputNodeMatcher()(*this, v); + } + ss << "\n==============================="; + return ss.str(); +} + +template +PatternNodePtr PatternGraph::MergeNode( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + PatternNodePtr merged_node = + std::make_shared>(upstream, downstream); + + // deal with the reference. + ExtendVector(&merged_node->upstream_, upstream->upstream_); + ExtendVector(&merged_node->upstream_, downstream->upstream_); + RemoveFromVector(&merged_node->upstream_, upstream); + + ExtendVector(&merged_node->downstream_, upstream->downstream_); + ExtendVector(&merged_node->downstream_, downstream->downstream_); + RemoveFromVector(&merged_node->downstream_, downstream); + + for (const auto& upstream_node : merged_node->upstream_) { + upstream_node->downstream_.push_back(merged_node); + RemoveFromVector(&upstream_node->downstream_, upstream); + RemoveFromVector(&upstream_node->downstream_, downstream); + } + for (const auto& downstream_node : merged_node->downstream_) { + downstream_node->upstream_.push_back(merged_node); + RemoveFromVector(&downstream_node->downstream_, upstream); + RemoveFromVector(&downstream_node->downstream_, downstream); + } + + const auto vec_unique = [](const std::vector>& vec) { + auto set = std::unordered_set(vec.begin(), vec.end()); + return set.size() == vec.size(); + }; + + CHECK(vec_unique(merged_node->upstream_)); + CHECK(vec_unique(merged_node->downstream_)); + + // deal with the graph storage. + AppendNode(merged_node); + return merged_node; +} + +template class PatternGraph; +template class PatternGraph; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/pattern_graph.h b/paddle/cinn/operator_fusion/pattern_graph.h new file mode 100644 index 0000000000000..589235d8d76a8 --- /dev/null +++ b/paddle/cinn/operator_fusion/pattern_graph.h @@ -0,0 +1,373 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/cinn/operator_fusion/pattern_node.h" +#include "paddle/cinn/operator_fusion/policy/policy_manager.h" +#include "paddle/cinn/operator_fusion/policy/relative_judge_policy.h" +#include "paddle/cinn/operator_fusion/utils.h" + +namespace cinn::fusion { + +template +using PatternNodePtrSet = std::unordered_set>; + +template +class PatternGraph { + public: + PatternGraph(const std::vector>& nodes, + const std::vector& outputs, + const PolicyManager policy_manager, + const PolicyManager topo_manager); + + std::vector> ClusterOps(); + + void SinkTrivialPattern(); + void HorizontalFusion(); + void ReduceLiftReduceTree(); + void ReduceTreeGrown(); + void ReduceTree_Trivial_Fusion(); + + void RemoveNode(const PatternNodePtr& node); + void AppendNode(const PatternNodePtr& node); + std::string GraphInfo() const; + PatternNodePtr MergeNode(const PatternNodePtr& upstream, + const PatternNodePtr& downstream); + std::vector> SortByTopoOrder(); + + public: + PatternNodePtrSet all_pattern_nodes_; + std::vector outputs_; + PolicyManager policy_manager_; + PolicyManager topo_manager_; +}; + +// PatternGraphFusionOperation := (GraphMatcher, GraphOperation) +// SearchAlgorithm := NodePattern | EdgePattern | GraphMatcher +// GraphOperation := Merge2Node | SplitNode | SplitAllAndMergeDownstream + +struct NodePattern {}; +struct EdgePattern {}; +struct GraphPattern {}; // not implemented. +struct NodePairPattern {}; // not implemented. + +template +struct SearchAlgorithm {}; + +template +struct SearchAlgorithm { + PatternGraph* graph_; + PatternNodePtrSet visited_nodes; + + explicit SearchAlgorithm(PatternGraph* graph) { + VLOG(4) << "Create NodePattern algorithm."; + graph_ = graph; + } + + PatternNodePtr FindMatchedNode() { + for (PatternNodePtr iter_node : graph_->all_pattern_nodes_) { + if (GraphMatcher()(*graph_, iter_node) && + !visited_nodes.count(iter_node)) { + visited_nodes.insert(iter_node); + VLOG(4) << "Find Matched Node: " << iter_node; + return iter_node; + } + } + VLOG(4) << "Can't find matched node any more."; + return nullptr; + } + + void operator()() { + while (true) { + PatternNodePtr node = FindMatchedNode(); + if (node == nullptr) { + break; + } + GraphOperation()(graph_, node); + } + } +}; + +template +struct SearchAlgorithm { + PatternGraph* graph_; + std::set, PatternNodePtr>> + visited_node_pair; + explicit SearchAlgorithm(PatternGraph* graph) { + VLOG(4) << "Create NodePairPattern algorithm."; + graph_ = graph; + } + std::optional, PatternNodePtr>> + FindMatchedPair() { + for (PatternNodePtr i : graph_->all_pattern_nodes_) { + for (PatternNodePtr j : graph_->all_pattern_nodes_) { + if (i == j) continue; + const auto& pair = std::make_pair(i, j); + if (GraphMatcher()(*graph_, i, j) && !visited_node_pair.count(pair)) { + visited_node_pair.insert(pair); + VLOG(4) << "Find Matched Node Pair: (" << i << ", " << j << ")"; + return pair; + } + } + } + VLOG(4) << "Can't find matched node any more."; + return {}; + } + void operator()() { + while (true) { + const auto& node = FindMatchedPair(); + if (!node.has_value()) break; + const auto& [i, j] = node.value(); + GraphOperation()(graph_, i, j); + } + } +}; + +// Operation + +struct MergeReduceTreeOperation { + template + void operator()(PatternGraph* graph, PatternNodePtr node) { + CHECK_EQ(node->downstream_.size(), 1); + auto downstream = node->downstream_.at(0); + auto merged_node = graph->MergeNode(node, downstream); + graph->RemoveNode(downstream); + graph->RemoveNode(node); + VLOG(4) << "MergeReduceTreeOperation: \nupstream " << node->DebugStr() + << "\ndownstream " << downstream->DebugStr() << "\nmerged " + << merged_node->DebugStr(); + } +}; + +struct MergeReduceTreeAndTrivialOperation { + template + void operator()(PatternGraph* graph, PatternNodePtr node) { + CHECK_EQ(node->downstream_.size(), 1); + auto downstream = node->downstream_.at(0); + auto fake_reduce_iter_idx = + graph->policy_manager_.GetFakeReduceIterIdx(node, downstream); + PatternNodePtr merged_node = graph->MergeNode(node, downstream); + std::get>(merged_node->stmt_pattern_) + .fake_reduce_iter_idx = fake_reduce_iter_idx; + graph->RemoveNode(downstream); + graph->RemoveNode(node); + VLOG(4) << "MergeReduceTreeAndTrivialOperation: \nupstream " + << node->DebugStr() << "\ndownstream " << downstream->DebugStr() + << "\nmerged " << merged_node->DebugStr(); + } +}; + +struct LiftReduceToReduceTreeOperation { + template + void operator()(PatternGraph* graph, PatternNodePtr node) { + const auto& reduce_pattern = ToReducePattern(node->stmt_pattern_); + node->stmt_pattern_ = ReduceTreePattern({}, reduce_pattern); + VLOG(4) << "LiftReduceToReduceTreeOperation: \nnode " << node->DebugStr(); + } +}; + +struct MergeTrivialPatternOperation { + template + void operator()(PatternGraph* graph, + PatternNodePtr upstream) { + std::vector> fusion_candidate = + upstream->downstream_; + upstream->downstream_.clear(); + for (const auto& downstream : fusion_candidate) { + if (std::holds_alternative>( + downstream->stmt_pattern_) || + std::holds_alternative>( + downstream->stmt_pattern_)) { + auto merged_node = graph->MergeNode(upstream, downstream); + graph->RemoveNode(downstream); + VLOG(4) << "MergeTrivialPatternOperation: \nupstream " + << upstream->DebugStr() << "\ndownstream " + << downstream->DebugStr() << "\nmerged " + << merged_node->DebugStr(); + } else { + upstream->downstream_.push_back(downstream); + } + } + if (upstream->downstream_.empty()) { + graph->RemoveNode(upstream); + } + } +}; + +struct LiftToHorizontalFusionPatternOperation { + template + void operator()(PatternGraph* graph, PatternNodePtr i) { + i->stmt_pattern_ = HorizontalFusionPattern({i->stmt_pattern_}); + } +}; + +// Matcher + +template +struct AlwaysTrue { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return true; + } +}; + +template +struct StmtPatternGraphMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return GetPatternName(node->stmt_pattern_) == StmtPattern::name(); + } +}; + +struct CanFuseRxTMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return (std::holds_alternative>(node->stmt_pattern_) && + !node->downstream_.empty() && + std::holds_alternative>( + node->downstream_.at(0)->stmt_pattern_)); + } +}; + +struct CanFuseReduceTreeMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return StmtPatternGraphMatcher>()(graph, node) && + !node->downstream_.empty() && + std::holds_alternative>( + node->downstream_.at(0)->stmt_pattern_) && + graph.policy_manager_.CanFuse(node, node->downstream_.at(0)); + } +}; + +struct CanFuseReduceTreeAndTrivialMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return StmtPatternGraphMatcher>()(graph, node) && + !node->downstream_.empty() && + std::holds_alternative>( + node->downstream_.at(0)->stmt_pattern_) && + graph.policy_manager_.CanFuse(node, node->downstream_.at(0)); + } +}; + +struct HorizontalFusionConstrain { + template + bool operator()(const PatternGraph& graph, + const PatternNodePtr& first, + const PatternNodePtr& second) { + if (!StmtPatternGraphMatcher>()(graph, first)) { + return false; + } + if (!StmtPatternGraphMatcher>()(graph, second)) { + return false; + } + const auto& first_dim = first->sink_op_->result(0) + .type() + .template dyn_cast() + .dims(); + const auto& second_dim = second->sink_op_->result(0) + .type() + .template dyn_cast() + .dims(); + return graph.topo_manager_.CanFuse(first, second) && + first_dim == second_dim; + } +}; + +struct HorizontalFusionOperation { + template + void operator()(PatternGraph* graph, + const PatternNodePtr& i, + const PatternNodePtr& j) { + CHECK(GetPatternName(i->stmt_pattern_) == + HorizontalFusionPattern::name()); + CHECK(GetPatternName(j->stmt_pattern_) == + HorizontalFusionPattern::name()); + graph->MergeNode(i, j); + graph->RemoveNode(i); + graph->RemoveNode(j); + } +}; + +struct NonSinkNodeMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return !node->downstream_.empty(); + } +}; + +struct IsOutputNodeMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + bool res = IsAnyFirstInSecond(node->sink_op_->results(), graph.outputs_); + return res; + } +}; + +struct IsNotOutputNodeMatcher { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + bool res = !IsOutputNodeMatcher()(graph, node); + return res; + } +}; + +template +struct DownstreamSmallerThan { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return node->downstream_.size() < N; + } +}; + +template +struct And { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return A()(graph, node) && B()(graph, node); + } +}; + +template +struct Or { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return A()(graph, node) || B()(graph, node); + } +}; + +template +struct Not { + template + bool operator()(const PatternGraph& graph, const PatternNodePtr& node) { + return !A()(graph, node); + } +}; + +template +void GraphTransformer(PatternGraph* graph) { + VLOG(4) << "Start GraphTransformer..."; + auto alog = + SearchAlgorithm(graph); + alog(); +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/pattern_node.h b/paddle/cinn/operator_fusion/pattern_node.h new file mode 100644 index 0000000000000..d6c9f8202669e --- /dev/null +++ b/paddle/cinn/operator_fusion/pattern_node.h @@ -0,0 +1,59 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/pattern.h" +#include "paddle/cinn/operator_fusion/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/utils.h" + +namespace cinn::fusion { + +template +struct PatternNode { + using PatternNodePtr = std::shared_ptr>; + + explicit PatternNode(const PatternContent& content) + : sink_op_(content.op), stmt_pattern_(ConvertToStmtPattern(content)) {} + + explicit PatternNode(PatternNodePtr fused_up_node, + PatternNodePtr fused_down_node) + : sink_op_(fused_down_node->sink_op_), + stmt_pattern_(MergePattern(fused_up_node->stmt_pattern_, + fused_down_node->stmt_pattern_)) {} + + std::string DebugStr() const { + std::stringstream ss; + ss << "Node: " << this << ", Pattern: " << GetPatternName(stmt_pattern_) + << "\n -u>: "; + for (const auto& u : upstream_) { + ss << u << ", "; + } + ss << "\n stmt_pattern_; + pir::Operation* sink_op_; + + std::vector upstream_; + std::vector downstream_; +}; + +template +using PatternNodePtr = std::shared_ptr>; +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/CMakeLists.txt b/paddle/cinn/operator_fusion/policy/CMakeLists.txt new file mode 100644 index 0000000000000..94a33e9599640 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/CMakeLists.txt @@ -0,0 +1,8 @@ +gather_srcs( + policy_fusion_src + SRCS + shardable_axes_base.cc + policy_manager.cc + relative_judge_policy.cc + general_topo_policy.cc + shardable_axes_policy.cc) diff --git a/paddle/cinn/operator_fusion/policy/general_topo_policy.cc b/paddle/cinn/operator_fusion/policy/general_topo_policy.cc new file mode 100644 index 0000000000000..53d54b8fa0f65 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/general_topo_policy.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/operator_fusion/policy/general_topo_policy.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/backend/pattern_fuser.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern_fuser.h" + +namespace cinn::fusion { + +template +bool IsDownstreamNode(const PatternNodePtr start, + const PatternNodePtr target) { + if (start == target) return true; + for (const auto& down_node : start->downstream_) { + if (IsDownstreamNode(down_node, target)) return true; + } + return false; +} + +template +bool IsIndirectDownstreamNode(const PatternNodePtr start, + const PatternNodePtr target) { + for (const auto& node : start->downstream_) { + if (node == target) continue; + if (IsDownstreamNode(node, target)) return true; + } + return false; +} + +template +bool GeneralTopoPolicy::CanFuse(const PatternNodePtr& first, + const PatternNodePtr& second) { + VLOG(4) << "Start GeneralTopoPolicy"; + return !(IsIndirectDownstreamNode(first, second) || + IsIndirectDownstreamNode(second, first)); +} + +template class GeneralTopoPolicy; +template class GeneralTopoPolicy; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/general_topo_policy.h b/paddle/cinn/operator_fusion/policy/general_topo_policy.h new file mode 100644 index 0000000000000..8fc8e360dd01d --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/general_topo_policy.h @@ -0,0 +1,28 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/cinn/operator_fusion/policy/policy_manager.h" + +namespace cinn::fusion { + +template +class GeneralTopoPolicy final : virtual public Policy { + public: + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + std::string Name() { return "GeneralTopoPolicy"; } +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/policy_manager.cc b/paddle/cinn/operator_fusion/policy/policy_manager.cc new file mode 100644 index 0000000000000..5d21dfed23b90 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/policy_manager.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/operator_fusion/policy/policy_manager.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" +#include "paddle/common/enforce.h" + +namespace cinn::fusion { + +template +bool PolicyManager::CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const { + for (const auto& policy : policies_) { + if (!policy->CanFuse(upstream, downstream)) return false; + } + return true; +} + +template +std::vector PolicyManager::GetFakeReduceIterIdx( + const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const { + for (const auto& policy : policies_) { + if (policy->Name() == "RelativeJudgePolicy") { + return policy->GetFakeReduceIterIdx(upstream, downstream); + } + } + return {}; +} + +template class PolicyManager; +template class PolicyManager; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/policy_manager.h b/paddle/cinn/operator_fusion/policy/policy_manager.h new file mode 100644 index 0000000000000..33c4227f7b1c7 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/policy_manager.h @@ -0,0 +1,51 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/cinn/operator_fusion/pattern_node.h" + +namespace cinn::fusion { + +template +class Policy { + public: + virtual std::string Name() = 0; + virtual bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) = 0; + virtual std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + return {}; + } +}; + +template +using PolicyPtr = std::shared_ptr>; + +template +class PolicyManager { + public: + explicit PolicyManager(const std::vector>& policies) + : policies_(policies) {} + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const; + std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, + const PatternNodePtr& downstream) const; + + private: + std::vector> policies_; +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/relative_judge_policy.cc b/paddle/cinn/operator_fusion/policy/relative_judge_policy.cc new file mode 100644 index 0000000000000..630403776b49d --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/relative_judge_policy.cc @@ -0,0 +1,342 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/operator_fusion/policy/relative_judge_policy.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" + +namespace cinn::fusion { + +template +bool RelativeJudgePolicy::IsDownstreamStmtDependReduceOp( + pir::Operation* reduce, const StmtPattern& downstream) { + const auto& values = GetPatternInputValues(downstream); + for (const auto& value : reduce->results()) { + if (std::find(values.begin(), values.end(), value) != values.end()) { + return true; + } + } + return false; +} + +template +std::optional> +RelativeJudgePolicy::GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector>& candidates) { + pir::Operation* reduce = upstream.GetReduceOp(); + for (const auto& candidate : candidates) { + if (IsDownstreamStmtDependReduceOp(reduce, candidate)) { + return candidate; + } + } + return {}; +} + +SplitDims SplitReduceInputDimsIfRelatedWithNonReduceAxis( + const ShardableAxesSignature& signature, pir::Operation* op) { + const auto& v = op->operand_source(0); + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set output_names_set(output_names.begin(), + output_names.end()); + auto result = SplitDims(); + int idx = 0; + for (const auto& in : input_names) { + if (output_names_set.count(in) == 0) { + result.non_related.emplace_back(v, idx); + } else { + result.related.emplace_back(v, idx); + } + idx += 1; + } + return result; +} + +SplitDims SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + const ShardableAxesSignature& signature, const pir::Operation* op) { + const auto& v = op->result(0); + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set input_names_set(input_names.begin(), input_names.end()); + auto result = SplitDims(); + int idx = 0; + for (const auto& name : output_names) { + if (input_names_set.count(name) == 0) { + result.non_related.emplace_back(v, idx); + } else { + result.related.emplace_back(v, idx); + } + idx += 1; + } + return result; +} + +template +bool RelativeJudgePolicy::IsBroadcastEdge( + const std::vector& upstream_out_dims, + const std::vector& downstream_reduce_dims) { + VLOG(4) << "IsBroadcastEdge: upstream_out_dims.size()" + << upstream_out_dims.size(); + VLOG(4) << "IsBroadcastEdge: downstream_reduce_dims.size()" + << downstream_reduce_dims.size(); + + for (const auto& downstream_reduce_dim : downstream_reduce_dims) { + for (const auto& upstream_out_dim : upstream_out_dims) { + VLOG(4) << "upstream_out_dim: " << upstream_out_dim.DebugStr() + << " downstream_reduce_dim: " << downstream_reduce_dim.DebugStr(); + if (IsRelated(upstream_out_dim, downstream_reduce_dim)) { + return false; + } + } + } + + VLOG(4) << "IsBroadcastEdge"; + return true; +} + +template +bool RelativeJudgePolicy::ReduceTreeGrownCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + const auto& upstream_tree = + std::get>(upstream->stmt_pattern_); + VLOG(4) << "upstream->stmt_pattern_:" + << OpsDebugStr(GetOpsInPattern(upstream_tree)); + const auto& downstream_tree = + std::get>(downstream->stmt_pattern_); + VLOG(4) << "downstream->stmt_pattern_" + << OpsDebugStr(GetOpsInPattern(downstream_tree)); + const auto& maybe_downstream_op = GetDownstreamFromCandidate( + upstream_tree.GetRootPattern(), downstream_tree.FlattenReducePattern()); + int idx = 0; + for (const auto& r_pattern : downstream_tree.childs()) { + idx += 1; + VLOG(4) << "downstream_tree.reduce_patterns_" + << "[" << idx << "]" << OpsDebugStr(GetOpsInPattern(r_pattern)); + } + if (!maybe_downstream_op.has_value()) { + VLOG(4) << "can't find candidate from patterns. can fuse return false."; + return false; + } + const pir::Value& reduce_out_value = + upstream_tree.GetRootPattern().GetReduceOp()->result(0); + pir::Operation* downstream_reduce_op = + maybe_downstream_op.value().GetReduceOp(); + const auto& split_reduce_dim_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(downstream_reduce_op), downstream_reduce_op); + VLOG(4) << split_reduce_dim_result.DebugStr(); + const auto& upstream_output_dims = GetAllValueDimFromValue(reduce_out_value); + auto res = IsBroadcastEdge(upstream_output_dims, + split_reduce_dim_result.non_related); + VLOG(4) << "ReduceTreeGrownCanMerge: " << res; + return res; +} + +template +SplitDims RelativeJudgePolicy::SplitDimsWithRelationship( + const std::vector& targets, + const std::vector& related_with) { + VLOG(4) << "SplitDimsWithRelationship"; + auto result = SplitDims(); + bool is_related = false; + for (auto& target_dim : targets) { + is_related = false; + for (auto& related_dim : related_with) { + if (IsRelated(related_dim, target_dim)) is_related = true; + } + if (is_related) { + result.related.push_back(target_dim); + } else { + result.non_related.push_back(target_dim); + } + } + + return result; +} + +bool DimsEqual(const std::vector& first, + const std::vector& second) { + const auto GetDimInfo = + [](const std::vector& dims) -> std::unordered_map { + std::unordered_map result; + for (const auto& dim : dims) { + VLOG(4) << "dim: " << dim.DebugStr(); + size_t value = dim.GetNumericValue(); + VLOG(4) << "value: " << value; + if (result.find(value) == result.end()) { + result[value] = 1; + } else { + result[value] += 1; + } + } + return result; + }; + VLOG(4) << "GetDimInfo"; + const std::unordered_map& first_dims = GetDimInfo(first); + VLOG(4) << "GetDimInfo"; + const std::unordered_map& second_dims = GetDimInfo(second); + if (first_dims.size() != second_dims.size()) return false; + for (const auto& [dim_value, count] : first_dims) { + if (second_dims.find(dim_value) == second_dims.end() || + second_dims.at(dim_value) != count) + return false; + } + return true; +} + +template +bool RelativeJudgePolicy::ReducePlusTrivialCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + VLOG(4) << "RT can fuse"; + + // const auto& split_reduce_dims_result = + // SplitReduceInputDimsIfRelatedWithNonReduceAxis( + // axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + + // VLOG(4) << split_reduce_dims_result.DebugStr(); + + // const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + // const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + + // TODO(wuzhanfei) fix bug in relation that if has multi path in graph + // test_rms_norm can test + + const auto& split_reduce_input_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_reduce_dims = split_reduce_input_dims_result.non_related; + + const auto& split_reduce_output_dims_result = + SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_non_reduce_dims = + split_reduce_output_dims_result.related; + // replace codes upside with original design + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + VLOG(4) << split_trivial_dims_result.DebugStr(); + + auto res = + DimsEqual(split_trivial_dims_result.non_related, upstream_reduce_dims); + res = res || IsFlattenDimSmaller(upstream, downstream); + VLOG(4) << "ReducePlusTrivialCanMerge: " << res; + return res; +} + +template +bool RelativeJudgePolicy::IsFlattenDimSmaller( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + const auto& split_reduce_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + VLOG(4) << "IsFlattenDimSmaller: " + << axes_info_.GetSignature(downstream->sink_op_).DebugStr(); + int rank = axes_info_.GetSignature(downstream->sink_op_) + .outputs[0] + .axis_names.size(); + VLOG(4) << "IsFlattenDimSmaller: " << rank << " " + << split_trivial_dims_result.related.size() << " " + << upstream_non_reduce_dims.size(); + bool res = (rank - split_trivial_dims_result.related.size()) <= + upstream_non_reduce_dims.size(); + VLOG(4) << "IsFlattenDimSmaller: " << res; + return res; +} + +template +bool RelativeJudgePolicy::CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) { + if (std::holds_alternative>(upstream->stmt_pattern_) && + std::holds_alternative>(downstream->stmt_pattern_)) { + return ReducePlusTrivialCanMerge(upstream, downstream); + } + if (std::holds_alternative>(upstream->stmt_pattern_) && + std::holds_alternative>(downstream->stmt_pattern_)) { + return ReduceTreeGrownCanMerge(upstream, downstream); + } + return true; // other case. +} + +template +std::vector RelativeJudgePolicy::GetFakeReduceIterIdx( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + if (!std::holds_alternative>(upstream->stmt_pattern_) && + !std::holds_alternative>(downstream->stmt_pattern_)) { + PADDLE_THROW("Illegal Call GetFakeReduceIterIdx"); + } + + // TODO(xiongkun): replace after fix bug in relation that if has multi path in + // graph const auto& split_reduce_dims_result = + // SplitReduceInputDimsIfRelatedWithNonReduceAxis( + // axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + + // const auto& upstream_reduce_dims = split_reduce_dims_result.non_related; + // const auto& upstream_non_reduce_dims = split_reduce_dims_result.related; + // + + const auto& split_reduce_input_dims_result = + SplitReduceInputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_reduce_dims = split_reduce_input_dims_result.non_related; + const auto& split_reduce_output_dims_result = + SplitReduceOutputDimsIfRelatedWithNonReduceAxis( + axes_info_.GetSignature(upstream->sink_op_), upstream->sink_op_); + VLOG(4) << split_reduce_input_dims_result.DebugStr(); + const auto& upstream_non_reduce_dims = + split_reduce_output_dims_result.related; + + // ======================= + + const auto& split_trivial_dims_result = SplitDimsWithRelationship( + GetAllValueDimFromValue(downstream->sink_op_->result(0)), + upstream_non_reduce_dims); + + const auto& trivial_reorder_dims = split_trivial_dims_result.non_related; + + // CHECK(upstream_reduce_dims.size() == trivial_reorder_dims.size() || + // trivial_reorder_dims.size() == 0); + std::unordered_set visited_dims; + std::vector result; + for (auto& reduce_dim : upstream_reduce_dims) { + for (auto& trivial_dim : trivial_reorder_dims) { + if (visited_dims.find(trivial_dim) == visited_dims.end() && + trivial_dim.GetNumericValue() == reduce_dim.GetNumericValue()) { + visited_dims.emplace(trivial_dim); + result.emplace_back(trivial_dim.idx_); + break; + } + } + } + VLOG(4) << "FakeReduceIterIdx: " << cinn::utils::Join(result, ", "); + return result; +} + +template class RelativeJudgePolicy; +template class RelativeJudgePolicy; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/relative_judge_policy.h b/paddle/cinn/operator_fusion/policy/relative_judge_policy.h new file mode 100644 index 0000000000000..ac7d9037d24f5 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/relative_judge_policy.h @@ -0,0 +1,304 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/cinn/operator_fusion/policy/policy_manager.h" +#include "paddle/cinn/operator_fusion/policy/shardable_axes_base.h" +#include "paddle/cinn/operator_fusion/utils.h" + +namespace cinn::fusion { + +struct ValueDim { + pir::Value v_; + size_t idx_; + ValueDim(pir::Value v, size_t idx) : v_(v), idx_(idx) {} + ValueDim() = default; + ValueDim(const ValueDim& v) = default; + bool operator==(const ValueDim& v) const { + return (idx_ == v.idx_) && (v_ == v.v_); + } + + size_t GetNumericValue() const { + return v_.type().dyn_cast().dims().at(idx_); + } + + std::string DebugStr() const { + std::ostringstream oss; + oss << "ValueDim: "; + oss << "Index: " << idx_; + oss << ", "; + v_.defining_op()->Print(oss); + return oss.str(); + } +}; + +struct ValueDimHash { + std::size_t operator()(const ValueDim& p) const { + auto h1 = std::hash{}(p.idx_); + auto h2 = std::hash{}(p.v_); + // Mainly for demonstration purposes, i.e. works but is overly simple + // In the real world, use sth. like boost.hash_combine + return h1 ^ (h2 << 1); + } +}; + +using ValueDimRelation = + std::unordered_map, + ValueDimHash>; +// ValueDimRelation[in][out] = True; means f(out) = in is related. + +static std::vector GetAllValueDimFromValue(const pir::Value& v) { + std::vector value_dims; + size_t rank = GetRank(v); + for (size_t i = 0; i < rank; ++i) { + value_dims.emplace_back(v, i); + } + return value_dims; +} + +static std::vector GetAllInputValueDim(pir::Operation* op) { + std::vector value_dims; + for (const auto& v : op->operands()) { + value_dims = ConcatVector(value_dims, GetAllValueDimFromValue(v.source())); + } + return value_dims; +} + +static std::vector GetAllOutputValueDim(pir::Operation* op) { + std::vector value_dims; + for (const auto& v : op->results()) { + value_dims = ConcatVector(value_dims, GetAllValueDimFromValue(v)); + } + return value_dims; +} + +static ValueDimRelation CreateOpRelativenessForElementWise(pir::Operation* op) { + ValueDimRelation res; + for (const auto& v : op->operands()) { + const auto& value_dims = GetAllValueDimFromValue(v.source()); + const auto& out_value_dims = GetAllOutputValueDim(op); + CHECK_EQ(value_dims.size(), out_value_dims.size()); + for (size_t i = 0; i < value_dims.size(); ++i) { + res[value_dims[i]][out_value_dims[i]] = true; + } + } + return res; +} + +static std::vector> GetNonBroadCastDims( + pir::Operation* op) { + std::vector> res; + const auto* shape_analysis = + &pir::ShapeAnalysisManager::Instance().Get(op->GetParentProgram()); + + const auto& broad_cast_value = GetBroadcastOpInputOuputValue(op); + CHECK(broad_cast_value.has_value()); + + const auto& [input_value, output_value] = broad_cast_value.value(); + const int input_rank = GetRank(input_value); + const int output_rank = GetRank(output_value); + CHECK_GE(output_rank, input_rank); + + // Compare axis one by one, from back to front. + // The rule of broadcasting: + // https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/beginner/tensor_cn.html#id7 + for (int i = 1; i <= input_rank; ++i) { + int input_axis = input_rank - i; + int output_axis = output_rank - i; + if (input_axis < 0 || output_axis < 0) break; + if (shape_analysis->IsProductEqual( + input_value, {input_axis}, output_value, {output_axis})) { + res.emplace_back(input_axis, output_axis); + } + } + + return res; +} + +static ValueDimRelation CreateOpRelativenessForBroadcast(pir::Operation* op) { + ValueDimRelation res; + const auto& in_value = op->operand(0).source(); + const auto& out_value = op->result(0); + for (const auto& t : GetNonBroadCastDims(op)) { + res[ValueDim(in_value, t.first)][ValueDim(out_value, t.second)] = true; + } + return res; +} + +static ValueDimRelation CreateOpRelativenessForDefault(pir::Operation* op) { + ValueDimRelation res; + for (const auto& out_dim : GetAllOutputValueDim(op)) { + for (const auto& in_dim : GetAllInputValueDim(op)) { + res[in_dim][out_dim] = true; + } + } + return res; +} + +static ValueDimRelation CreateOpRelativenessForReduce(pir::Operation* op) { + const auto& reduce_axis_idx = GetReduceAxisIdx(op); + ValueDimRelation res; + const size_t input_rank = GetRank(op->operand_source(0)); + int out_idx = 0; + bool keep_dim = GetReduceOpKeepDims(op); + for (int i = 0; i < input_rank; i++) { + if (std::find(reduce_axis_idx.begin(), reduce_axis_idx.end(), i) != + reduce_axis_idx.end()) { + res[ValueDim(op->operand_source(0), i)] + [ValueDim(op->result(0), out_idx)] = true; + out_idx += 1; + } else { + out_idx += keep_dim; + } + } + return res; +} + +static std::optional CreateOpRelativenessForSpecialOps( + pir::Operation* op) { + if (op->name() == "cinn_op.reshape") { + // Special Elementwise. + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "pd_op.reshape") { + // Special Elementwise. + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "cinn_op.generate_shape") { + return CreateOpRelativenessForDefault(op); + } + if (op->name() == "cinn_op.yield_store") { + return CreateOpRelativenessForDefault(op); + } + return {}; +} + +static ValueDimRelation GetSingleOpRelation(pir::Operation* op) { + VLOG(4) << "GetSingleOpRelation for " << op->name(); + const auto& special_result = CreateOpRelativenessForSpecialOps(op); + if (special_result != std::nullopt) { + return special_result.value(); + } + + CHECK(op->num_results() == 1) + << "Now we do not support op with multi outputs: " << op->name(); + const hlir::framework::OpPatternKind kind = GetOpPatternKind(op); + ValueDimRelation result; + if (kind == hlir::framework::kReduction) { + result = CreateOpRelativenessForReduce(op); + } else if (kind == hlir::framework::kElementWise) { + result = CreateOpRelativenessForElementWise(op); + } else if (kind == hlir::framework::kBroadcast) { + result = CreateOpRelativenessForBroadcast(op); + } else { + result = CreateOpRelativenessForDefault(op); + } + return result; +} + +static std::vector> FlattenRelation( + const ValueDimRelation& axes_relation) { + std::vector> res; + for (const auto& in_dim_pair : axes_relation) { + for (const auto& out_dim_pair : in_dim_pair.second) { + res.emplace_back(in_dim_pair.first, out_dim_pair.first); + } + } + return res; +} + +static ValueDimRelation AnalysisIndexExprRelation( + const std::vector& ops) { + ValueDimRelation res; + + for (size_t i = ops.size(); i >= 1; --i) { + pir::Operation* op = ops[i - 1]; + if (op->name() == "cf.yield") continue; + + const auto& value_dim_relation = GetSingleOpRelation(op); + for (const auto& in_out_pair : FlattenRelation(value_dim_relation)) { + for (const auto& out_relation : res[in_out_pair.second]) { + res[in_out_pair.first][out_relation.first] = true; + } + res[in_out_pair.first][in_out_pair.second] = true; + } + } + return res; +} + +struct SplitDims { + std::vector related; + std::vector non_related; + + std::string DebugStr() const { + std::stringstream ss; + ss << "SplitDims:\nrelated:\n"; + for (const auto& dim : related) { + ss << dim.DebugStr() << "\n"; + } + ss << "non_related:\n"; + for (const auto& dim : non_related) { + ss << dim.DebugStr() << "\n"; + } + return ss.str(); + } +}; + +template +class RelativeJudgePolicy final : public Policy { + public: + RelativeJudgePolicy(const std::vector& ops, + const pir::ShapeConstraintIRAnalysis* shape_analysis) + : axes_info_(ops, shape_analysis) { + VLOG(4) << "[relative_judge_policy] Start AnalysisIndexExprRelation."; + index_expr_map_ = AnalysisIndexExprRelation(ops); + VLOG(4) << "[relative_judge_policy] End AnalysisIndexExprRelation."; + } + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + + std::string Name() { return "RelativeJudgePolicy"; } + + std::vector GetFakeReduceIterIdx( + const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + + bool IsRelated(ValueDim in, ValueDim out) { + return index_expr_map_[in].count(out) == 1; + } + + private: + ValueDimRelation index_expr_map_; + ShardableAxesInfoManager axes_info_; + bool ReduceTreeGrownCanMerge(const PatternNodePtr&, + const PatternNodePtr&); + bool IsFlattenDimSmaller(const PatternNodePtr& upstream, + const PatternNodePtr& downstream); + bool ReducePlusTrivialCanMerge(const PatternNodePtr&, + const PatternNodePtr&); + SplitDims SplitDimsWithRelationship( + const std::vector& targets, + const std::vector& related_with); + std::optional> GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector>& candidates); + bool IsDownstreamStmtDependReduceOp(pir::Operation* reduce, + const StmtPattern& downstream); + bool IsBroadcastEdge(const std::vector& upstream_out_dims, + const std::vector&); +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/shardable_axes_base.cc b/paddle/cinn/operator_fusion/policy/shardable_axes_base.cc new file mode 100644 index 0000000000000..a9876ea0b8271 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/shardable_axes_base.cc @@ -0,0 +1,305 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/cinn/operator_fusion/policy/shardable_axes_base.h" + +namespace cinn::fusion { + +ShardableAxes ShardableAxesInfoManager::ReplaceShardableAxesWithRootName( + const ShardableAxes& axes) { + std::vector names; + for (auto name : axes.axis_names) { + names.push_back(name_union_[name]); + } + return ShardableAxes(names); +} + +ShardableAxesSignature ShardableAxesInfoManager::GetSignature( + pir::Operation* op) { + return op_signature_map_[op]; + // TODO(baizhou) fix broadcast signature and enable here + // auto result = ShardableAxesSignature(); + // auto origin_sig = op_signature_map_[op]; + // for (const auto& axes : origin_sig.inputs) { + // result.inputs.emplace_back(ReplaceShardableAxesWithRootName(axes)); + // } + // for (const auto& axes : origin_sig.outputs) { + // result.outputs.emplace_back(ReplaceShardableAxesWithRootName(axes)); + // } + // return result; +} + +ShardableAxes ShardableAxesInfoManager::GetAxes(pir::Value value) { + return ReplaceShardableAxesWithRootName(value_axes_map_[value]); +} + +std::string ShardableAxesInfoManager::GetUniqueName() { + static std::atomic counter = 0; + counter += 1; + return "D" + std::to_string(counter); +} + +std::vector CreateNewNamesWithRank(int64_t rank) { + auto result = std::vector(); + for (int64_t i = 0; i < rank; i++) { + result.emplace_back(ShardableAxesInfoManager::GetUniqueName()); + } + return result; +} + +ShardableAxesSignature CreateDefaultSignature(pir::Operation* op) { + ShardableAxesSignature result = ShardableAxesSignature(); + for (int i = 0; i < op->num_operands(); ++i) { + result.inputs.emplace_back( + CreateNewNamesWithRank(GetRank(op->operand_source(i)))); + } + for (int i = 0; i < op->num_results(); ++i) { + result.outputs.emplace_back(CreateNewNamesWithRank(GetRank(op->result(i)))); + } + return result; +} + +std::optional CreateSignatureForSpecialOps( + pir::Operation* op) { + if (op->isa()) { + return CreateDefaultSignature(op); + } + if (op->name() == "cinn_op.generate_shape") { + return CreateDefaultSignature(op); + } + if (op->name() == "cinn_op.yield_store") { + return CreateDefaultSignature(op); + } + if (op->name() == "cinn_op.reshape") { + return CreateDefaultSignature(op); + } + if (op->name() == "pd_op.reshape") { + return CreateDefaultSignature(op); + } + return std::nullopt; +} + +ShardableAxesSignature CreateSignatureForReduce(pir::Operation* reduce_op) { + CHECK_EQ(reduce_op->num_operands(), 1); + CHECK_EQ(reduce_op->num_results(), 1); + ShardableAxesSignature result = ShardableAxesSignature(); + const size_t input_rank = GetRank(reduce_op->operand_source(0)); + auto input_axes = CreateNewNamesWithRank(input_rank); + + const auto& reduce_axis_idx = GetReduceAxisIdx(reduce_op); + bool keep_dim = GetReduceOpKeepDims(reduce_op); + auto output_axes = std::vector(); + + for (int i = 0; i < input_rank; i++) { + if (std::find(reduce_axis_idx.begin(), reduce_axis_idx.end(), i) != + reduce_axis_idx.end()) { + if (keep_dim) { + output_axes.emplace_back(ShardableAxesInfoManager::GetUniqueName()); + } // else do nothing + } else { + output_axes.emplace_back(input_axes[i]); + } + } + + result.inputs.emplace_back(input_axes); + result.outputs.emplace_back(output_axes); + + return result; +} + +ShardableAxesSignature CreateSignatureForElementWise(pir::Operation* op) { + ShardableAxesSignature result = ShardableAxesSignature(); + + int64_t rank = GetRank(op->result(0)); + auto same_axes = CreateNewNamesWithRank(rank); + + for (int i = 0; i < op->num_operands(); ++i) { + CHECK(rank == GetRank(op->operand_source(i))); + result.inputs.emplace_back(same_axes); + } + for (int i = 0; i < op->num_results(); ++i) { + CHECK(rank == GetRank(op->result(i))); + result.outputs.emplace_back(same_axes); + } + return result; +} + +ShardableAxesSignature CreateSignatureForBroadcast( + pir::Operation* op, const pir::ShapeConstraintIRAnalysis* shape_analysis) { + ShardableAxesSignature result = ShardableAxesSignature(); + + const auto& broad_cast_value = GetBroadcastOpInputOuputValue(op); + CHECK(broad_cast_value.has_value()); + + const auto& [input_value, output_value] = broad_cast_value.value(); + const int input_rank = GetRank(input_value); + const int output_rank = GetRank(output_value); + CHECK_GE(output_rank, input_rank); + + // Create axes for operands. For expand op, the second operand is the shape of + // output. + for (int i = 0; i < op->num_operands(); ++i) { + result.inputs.emplace_back( + CreateNewNamesWithRank(GetRank(op->operand_source(i)))); + } + + // Create output axes. Compare axis one by one, from back to front. + // The rule of broadcasting: + // https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/beginner/tensor_cn.html#id7 + const auto& input_axis_names = result.inputs[0].axis_names; + std::vector output_axis_names; + for (int i = 1; i <= output_rank; ++i) { + int input_axis = input_rank - i; + int output_axis = output_rank - i; + if ((input_axis >= 0) && + shape_analysis->IsProductEqual( + input_value, {input_axis}, output_value, {output_axis})) { + output_axis_names.emplace_back(input_axis_names[input_axis]); + } else { + output_axis_names.emplace_back(ShardableAxesInfoManager::GetUniqueName()); + } + } + std::reverse(output_axis_names.begin(), output_axis_names.end()); + result.outputs.emplace_back(ShardableAxes(output_axis_names)); + + return result; +} + +ShardableAxesSignature ShardableAxesInfoManager::CreateShardableSignature( + pir::Operation* op) { + auto special_result = CreateSignatureForSpecialOps(op); + if (special_result != std::nullopt) { + VLOG(4) << "[ShardableAxesInfoManager] Create Shardable Axes Signature : \n" + << op->name() << " : " << special_result.value().DebugStr(); + return special_result.value(); + } + + CHECK(op->num_results() == 1) + << "Now we do not support op with multi outputs: " << op->name(); + ShardableAxesSignature result; + const hlir::framework::OpPatternKind kind = GetOpPatternKind(op); + if (kind == hlir::framework::kReduction) { + result = CreateSignatureForReduce(op); + } else if (kind == hlir::framework::kElementWise) { + result = CreateSignatureForElementWise(op); + } else if (kind == hlir::framework::kBroadcast) { + result = CreateSignatureForBroadcast(op, shape_analysis_); + } else { + result = CreateDefaultSignature(op); + } + VLOG(4) << "[ShardableAxesInfoManager] Create Shardable Axes Signature : \n" + << op->name() << " : " << result.DebugStr(); + return result; +} + +ShardableAxesInfoManager::ShardableAxesInfoManager( + const std::vector& ops, + const pir::ShapeConstraintIRAnalysis* shape_analysis) + : ops_(ops), shape_analysis_(shape_analysis) { + for (const auto& op : ops) { + if (op->name() == "cf.yield") continue; + op_signature_map_[op] = CreateShardableSignature(op); + } + + const auto FindRoot = [&](std::string non_root) { + std::string result = non_root; + while (name_union_[result] != result) { + result = name_union_[result]; + } + return result; + }; + + const auto CombineAxes = [&](const ShardableAxes& root, + const ShardableAxes& non_root) { + CHECK_EQ(root.axis_names.size(), non_root.axis_names.size()); + for (int i = 0; i < non_root.axis_names.size(); i++) { + name_union_[non_root.axis_names[i]] = FindRoot(root.axis_names[i]); + } + }; + + for (const auto& [op, axes_signature] : op_signature_map_) { + for (int i = 0; i < op->num_operands(); ++i) { + auto value = op->operand_source(i); + auto axes = axes_signature.inputs[i]; + if (value_axes_map_.find(value) == value_axes_map_.end()) { + value_axes_map_[value] = axes; + for (auto& axis_name : axes.axis_names) { + name_union_[axis_name] = axis_name; + } + } else { + CombineAxes(value_axes_map_[value], axes); + } + } + for (int i = 0; i < op->num_results(); ++i) { + auto value = op->result(i); + auto axes = axes_signature.outputs[i]; + if (value_axes_map_.find(value) == value_axes_map_.end()) { + value_axes_map_[value] = axes; + for (auto& axis_name : axes.axis_names) { + name_union_[axis_name] = axis_name; + } + } else { + CombineAxes(value_axes_map_[value], axes); + } + } + } + + VLOG(4) << NameUnionDebugStr(); +} + +std::string ShardableAxes::DebugStr() const { + std::stringstream ss; + for (const auto& name : axis_names) { + ss << name << ", "; + } + return ss.str(); +} + +std::string ShardableAxesSignature::DebugStr() const { + std::stringstream ss; + ss << "ShardableAxes Signature:\n"; + for (int i = 0; i < inputs.size(); i++) { + ss << "input " << i << ": " << inputs[i].DebugStr() << "\n"; + } + for (int i = 0; i < outputs.size(); i++) { + ss << "output " << i << ": " << outputs[i].DebugStr() << "\n"; + } + return ss.str(); +} + +std::string ShardableAxesInfoManager::NameUnionDebugStr() const { + std::stringstream ss; + ss << "[ShardableAxesInfoManager] NameUnion :\n"; + + std::unordered_map> root_to_sons; + for (const auto& [non_root, root] : name_union_) { + if (root_to_sons.find(root) == root_to_sons.end()) { + root_to_sons[root] = std::vector{non_root}; + } else { + root_to_sons[root].push_back(non_root); + } + } + for (const auto& [root, sons] : root_to_sons) { + ss << "Root " << root << ": "; + for (const auto& son : sons) { + ss << son << ", "; + } + ss << "\n"; + } + + return ss.str(); +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h b/paddle/cinn/operator_fusion/policy/shardable_axes_base.h similarity index 65% rename from paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h rename to paddle/cinn/operator_fusion/policy/shardable_axes_base.h index c9c341c0b05de..1202641bab3c4 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_base.h +++ b/paddle/cinn/operator_fusion/policy/shardable_axes_base.h @@ -14,39 +14,42 @@ #pragma once -#include "paddle/cinn/frontend/group_cluster/common_utils.h" +#include "paddle/cinn/operator_fusion/utils.h" -namespace cinn::frontend::group_cluster::policy { +namespace cinn::fusion { struct ShardableAxes { + ShardableAxes() : axis_names({}) {} explicit ShardableAxes(const std::vector& names) : axis_names(names) {} std::vector axis_names; - std::string DebugStr(); + std::string DebugStr() const; }; struct ShardableAxesSignature { std::vector inputs; std::vector outputs; - std::string DebugStr(); + std::string DebugStr() const; }; struct ShardableAxesInfoManager { ShardableAxesInfoManager( - const std::vector& ops, + const std::vector& ops, const pir::ShapeConstraintIRAnalysis* shape_analysis); - ShardableAxesSignature GetSignature(const pir::Operation* op); - ShardableAxes GetAxes(const pir::Value value); + ShardableAxesSignature GetSignature(pir::Operation* op); + ShardableAxes GetAxes(pir::Value value); + ShardableAxesSignature CreateShardableSignature(pir::Operation* op); + ShardableAxes ReplaceShardableAxesWithRootName(const ShardableAxes& axes); static std::string GetUniqueName(); + std::string NameUnionDebugStr() const; private: - const std::vector& ops_; + const std::vector& ops_; const pir::ShapeConstraintIRAnalysis* shape_analysis_; - std::unordered_map - op_signature_map_; + std::unordered_map op_signature_map_; std::unordered_map value_axes_map_; std::unordered_map name_union_; }; -} // namespace cinn::frontend::group_cluster::policy +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/shardable_axes_policy.cc b/paddle/cinn/operator_fusion/policy/shardable_axes_policy.cc new file mode 100644 index 0000000000000..24ffa6d862c86 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/shardable_axes_policy.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/cinn/operator_fusion/policy/shardable_axes_policy.h" +#include "paddle/cinn/operator_fusion/backend/pattern.h" +#include "paddle/cinn/operator_fusion/frontend/pattern.h" + +namespace cinn::fusion { + +template +bool ShardableAxesRRFusePolicy::IsDownstreamStmtDependReduceOp( + pir::Operation* reduce, const StmtPattern& downstream) { + const auto& values = GetPatternInputValues(downstream); + for (const auto& value : reduce->results()) { + if (std::find(values.begin(), values.end(), value) != values.end()) { + return true; + } + } + return false; +} + +template +std::optional> +ShardableAxesRRFusePolicy::GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector>& candidates) { + pir::Operation* reduce = upstream.GetReduceOp(); + for (const auto& candidate : candidates) { + if (IsDownstreamStmtDependReduceOp(reduce, candidate)) { + return candidate; + } + } + return {}; +} + +static std::set GetReduceAxesName( + const ShardableAxesSignature& signature) { + const auto& input_names = signature.inputs[0].axis_names; + const auto& output_names = signature.outputs[0].axis_names; + std::set res(input_names.begin(), input_names.end()); + for (const auto& n : output_names) { + res.erase(n); + } + return res; +} + +template +bool ShardableAxesRRFusePolicy::ReduceTreeGrownCanMerge( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + if (!upstream->IsReduceTree() || !downstream->IsReduceTree()) { + return false; + } + const auto& upstream_tree = + std::get(upstream->stmt_pattern_); + const auto& downstream_tree = + std::get(downstream->stmt_pattern_); + const auto& maybe_downstream_op = GetDownstreamFromCandidate( + upstream_tree.GetRootPattern(), downstream_tree.reduce_patterns_); + if (!maybe_downstream_op.has_value()) { + return false; + } + const pir::Value& reduce_out_value = + upstream_tree.GetRootPattern().GetReduceOp()->result(0); + pir::Operation* downstream_reduce_op = + maybe_downstream_op.value().GetReduceOp(); + const auto& reduce_names = + GetReduceAxesName(axes_info_.GetSignature(downstream_reduce_op)); + for (const auto& n : + axes_info_.GetAxes(downstream_reduce_op->result(0)).axis_names) { + if (reduce_names.count(n) > 0) { + // not meeting the BroadcastEdge condition. + return false; + } + } + return true; +} + +template +bool ShardableAxesRRFusePolicy::CanFuse( + const PatternNodePtr& upstream, const PatternNodePtr& downstream) { + // TODO(wuzhanfei) shardable axes policy + return ReduceTreeGrownCanMerge(upstream, downstream); +} + +template class RelativeJudgePolicy; +template class RelativeJudgePolicy; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/policy/shardable_axes_policy.h b/paddle/cinn/operator_fusion/policy/shardable_axes_policy.h new file mode 100644 index 0000000000000..d4c662c6c3a09 --- /dev/null +++ b/paddle/cinn/operator_fusion/policy/shardable_axes_policy.h @@ -0,0 +1,43 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/cinn/operator_fusion/policy/policy_manager.h" +#include "paddle/cinn/operator_fusion/policy/shardable_axes_base.h" + +namespace cinn::fusion { + +template +class ShardableAxesRRFusePolicy final : public Policy { + public: + ShardableAxesRRFusePolicy( + const std::vector& ops, // NOLINT + const pir::ShapeConstraintIRAnalysis* shape_analysis) // NOLINT + : axes_info_(ops, shape_analysis) {} + bool CanFuse(const PatternNodePtr& upstream, + const PatternNodePtr& downstream) override; + std::string Name() { return "ShardableAxesRRFusePolicy"; } + + private: + bool ReduceTreeGrownCanMerge(const PatternNodePtr&, + const PatternNodePtr&); + std::optional> GetDownstreamFromCandidate( + const ReducePattern& upstream, + const std::vector>& candidates); + ShardableAxesInfoManager axes_info_; + bool IsDownstreamStmtDependReduceOp(pir::Operation* reduce, + const StmtPattern& downstream); +}; + +} // namespace cinn::fusion diff --git a/paddle/cinn/operator_fusion/utils.h b/paddle/cinn/operator_fusion/utils.h new file mode 100644 index 0000000000000..696836fe2a780 --- /dev/null +++ b/paddle/cinn/operator_fusion/utils.h @@ -0,0 +1,178 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" +#include "paddle/cinn/hlir/dialect/operator/ir/manual_op.h" +#include "paddle/cinn/hlir/framework/op.h" +#include "paddle/cinn/utils/string.h" +#include "paddle/pir/include/dialect/control_flow/ir/cf_op.h" + +namespace cinn::fusion { + +using OpPatternKind = cinn::hlir::framework::OpPatternKind; +static OpPatternKind GetOpPatternKind(const ::pir::Operation* op) { + return hlir::framework::pir::CompatibleInfo::OpKind(*op); +} + +static size_t GetRank(pir::Value value) { + return value.type().dyn_cast().dims().size(); +} + +static std::vector GetReduceAxisIdx(pir::Operation* reduce_op) { + const size_t input_rank = GetRank(reduce_op->operand_source(0)); + const auto& attr_val = reduce_op->attributes().at("dim"); + CHECK(attr_val.isa<::pir::ArrayAttribute>()); + const auto& axis_attr = attr_val.dyn_cast<::pir::ArrayAttribute>(); + std::vector reduce_axis_idx; + for (int i = 0; i < axis_attr.size(); ++i) { + int64_t axis = axis_attr.at(i).dyn_cast<::pir::Int64Attribute>().data(); + if (axis < 0) { + axis += input_rank; + } + CHECK_GE(axis, 0); + CHECK_LT(axis, input_rank); + reduce_axis_idx.push_back(axis); + } + VLOG(4) << "GetReduceAxisIdx: " << utils::Join(reduce_axis_idx, ","); + return reduce_axis_idx; +} + +static bool GetReduceOpKeepDims(pir::Operation* reduce_op) { + const auto& attr_val = reduce_op->attributes().at("keep_dim"); + CHECK(attr_val.isa<::pir::BoolAttribute>()); + return attr_val.dyn_cast<::pir::BoolAttribute>().data(); +} + +static std::string OpsDebugStr(std::vector ops) { + std::stringstream ss; + pir::IrPrinter printer(ss); + for (const auto* op : ops) { + printer.PrintOperation(const_cast(op)); + ss << "\n"; + } + return ss.str(); +} + +static std::optional> +GetBroadcastOpInputOuputValue(pir::Operation* op) { + auto* mut_op = const_cast(op); + if (op->isa()) { + auto expand_op = mut_op->dyn_cast(); + return std::make_pair(expand_op.x(), expand_op.out()); + } else if (op->isa()) { + auto broadcast_op = mut_op->dyn_cast(); + return std::make_pair(broadcast_op.x(), broadcast_op.out()); + } else { + CHECK(false) << "Unsupported broadcast op: " << op->name(); + } + return std::nullopt; +} + +template +void RemoveFromVector(std::vector* vec, T item) { + auto iter = std::find(vec->begin(), vec->end(), item); + if (iter != vec->end()) { + vec->erase(iter); + } +} + +template +std::vector ConcatVector(const std::vector& first, + const std::vector& second) { + std::vector result = first; + result.insert(result.end(), second.begin(), second.end()); + return result; +} + +template +std::vector FilterVector(const std::vector& first, const F& func) { + std::vector result; + for (const auto& i : first) { + if (func(i)) { + result.push_back(i); + } + } + return result; +} + +template +std::vector MapVector(const std::vector& as, + const std::function& func) { + std::vector res; + for (const auto& a : as) { + res.push_back(func(a)); + } + return res; +} + +template +std::set ToSet(const std::vector& input) { + std::set result(input.begin(), input.end()); + return result; +} + +template +bool IsAnyFirstInSecond(const std::vector& first, + const std::vector& second) { + const auto& second_set = ToSet(second); + for (const auto& ele : first) { + if (second_set.count(ele)) { + return true; + } + } + return false; +} + +template +std::vector UniqueVectorBySet(const std::vector& v) { + std::set unique(v.begin(), v.end()); + return std::vector(unique.begin(), unique.end()); +} + +template +void ExtendVector(std::vector* first, const std::vector& second) { + std::unordered_set visited = + std::unordered_set(first->begin(), first->end()); + for (auto iter = second.begin(); iter != second.end(); iter++) { + if (visited.find(*iter) == visited.end()) { + visited.emplace(*iter); + first->emplace_back(*iter); + } + } +} + +template +std::vector UniqueConcatVector(const std::vector& first, + const std::vector& second) { + std::vector result = std::vector(first); + ExtendVector(&result, second); + return result; +} + +} // namespace cinn::fusion diff --git a/paddle/cinn/optim/cast_bool_to_int8.cc b/paddle/cinn/optim/cast_bool_to_int8.cc index 64385623bcd21..55c8053fc6db5 100644 --- a/paddle/cinn/optim/cast_bool_to_int8.cc +++ b/paddle/cinn/optim/cast_bool_to_int8.cc @@ -38,10 +38,30 @@ struct Mutator : public ir::IRMutator<> { } // namespace +void CastBoolExprToInt8Impl(common::UnknownArch, Expr* e) { + LOG(FATAL) << "unknown architecture."; +} + +void CastBoolExprToInt8Impl(common::X86Arch, Expr* e) { + Mutator mutator; + mutator.Visit(e, e); +} + +void CastBoolExprToInt8Impl(common::ARMArch, Expr* e) { + // Do nothing. +} + +void CastBoolExprToInt8Impl(common::NVGPUArch, Expr* e) { + // Do nothing. +} + +void CastBoolExprToInt8(common::Arch arch, Expr* e) { + return std::visit( + [&](const auto& impl) { return CastBoolExprToInt8Impl(impl, e); }, + arch.variant()); +} + void CastBoolToInt8(Expr* e, Target target) { - if (target.arch == Target::Arch::X86) { - Mutator mutator; - mutator.Visit(e, e); - } + CastBoolExprToInt8(target.arch, e); } } // namespace cinn::optim diff --git a/paddle/cinn/optim/lower_intrin.cc b/paddle/cinn/optim/lower_intrin.cc index 07fe5370e7761..5c0fa6566d60c 100644 --- a/paddle/cinn/optim/lower_intrin.cc +++ b/paddle/cinn/optim/lower_intrin.cc @@ -25,12 +25,14 @@ namespace cinn { namespace optim { -void LowerIntrin(Expr *e, Target target) { - if (target.arch == Target::Arch::X86) { - codegen::RegisterCpuIntrinRule(); - } else { - return; - } +template +void LowerIntrinImpl(const T &, const Target &target, Expr *e) { + // Do nothing. +} + +void LowerIntrinImpl(common::X86Arch, const Target &target, Expr *e) { + codegen::RegisterCpuIntrinRule(); + struct Mutator : ir::IRMutator { Target target; @@ -99,5 +101,15 @@ void LowerIntrin(Expr *e, Target target) { m(e); } +void LowerIntrinByArch(Expr *e, const Target &target) { + return std::visit( + [&](const auto &impl) { return LowerIntrinImpl(impl, target, e); }, + target.arch.variant()); +} + +void LowerIntrin(Expr *e, Target target) { + return LowerIntrinByArch(e, target); +} + } // namespace optim } // namespace cinn diff --git a/paddle/cinn/optim/map_extern_call.cc b/paddle/cinn/optim/map_extern_call.cc index d260cea233dd4..1b9bbf1e57374 100644 --- a/paddle/cinn/optim/map_extern_call.cc +++ b/paddle/cinn/optim/map_extern_call.cc @@ -44,6 +44,65 @@ static const std::set kExternInt32CallsGPU{{"left_shift", static const std::set kExternFp32CallsCPU = { "erf", "acos", "acosh", "asin", "asinh", "atan", "atanh", "remainder"}; +void DealWithCpuIntrinsics(ir::Call *node, Expr *expr) { + if (kExternFp32CallsCPU.count(node->name)) { + PADDLE_ENFORCE_GE( + node->read_args.size(), + 1UL, + phi::errors::InvalidArgument( + "The size of node's read args is incorrect." + "Expected size is greater than or equal to 1, but receive %d.", + node->read_args.size())); + CHECK(node->read_args.front().type().is_float()) + << "CPU extern call intrinsics only support float now! Please " + "check."; + if (node->read_args.front().type().is_float(32)) { + auto out_type = node->type(); + *expr = lang::CallExtern(node->name + "f", node->read_args); + } + } +} + +void DealWithIntrinsicsImpl(common::UnknownArch, ir::Call *node, Expr *expr) { + DealWithCpuIntrinsics(node, expr); +} + +void DealWithIntrinsicsImpl(common::X86Arch, ir::Call *node, Expr *expr) { + DealWithCpuIntrinsics(node, expr); +} + +void DealWithIntrinsicsImpl(common::ARMArch, ir::Call *node, Expr *expr) { + DealWithCpuIntrinsics(node, expr); +} + +void DealWithIntrinsicsImpl(common::NVGPUArch, ir::Call *node, Expr *expr) { + auto arg_size = node->read_args.size(); + if (arg_size == 0UL) { + // some node like __syncthreads hasn't arguments + return; + } + const auto &dtype = node->read_args.front().type(); + const auto &name = node->name; + + bool node_in_extern_fp32 = kExternFp32CallsGPU.count(name); + bool node_in_extern_int32 = kExternInt32CallsGPU.count(name); + if (!node_in_extern_fp32 && !node_in_extern_int32) { + return; + } + + std::string extern_func = + hlir::GetExternFuncName(cinn::common::DefaultNVGPUTarget(), dtype, name); + *expr = lang::CallExtern(extern_func, node->read_args, node->attrs); +} + +void DealWithIntrinsics(common::Arch arch, ir::Call *node, Expr *expr) { + return std::visit( + [&](const auto &impl) { + return DealWithIntrinsicsImpl(impl, node, expr); + }, + arch.variant()); +} + void MapExternCall(Expr *e, Target target) { struct Mutator : ir::IRMutator { Target target; @@ -56,50 +115,7 @@ void MapExternCall(Expr *e, Target target) { auto *node = expr->As(); CHECK(node); OptimizeConstantPow(node); - if (target.arch == Target::Arch::NVGPU) { - DealWithNvGpuIntrinsics(node, expr); - } else { - DealWithCpuIntrinsics(node, expr); - } - } - - void DealWithCpuIntrinsics(ir::Call *node, Expr *expr) { - if (kExternFp32CallsCPU.count(node->name)) { - PADDLE_ENFORCE_GE( - node->read_args.size(), - 1UL, - phi::errors::InvalidArgument( - "The size of node's read args is incorrect." - "Expected size is greater than or equal to 1, but receive %d.", - node->read_args.size())); - CHECK(node->read_args.front().type().is_float()) - << "CPU extern call intrinsics only support float now! Please " - "check."; - if (node->read_args.front().type().is_float(32)) { - auto out_type = node->type(); - *expr = lang::CallExtern(node->name + "f", node->read_args); - } - } - } - - void DealWithNvGpuIntrinsics(ir::Call *node, Expr *expr) { - auto arg_size = node->read_args.size(); - if (arg_size == 0UL) { - // some node like __syncthreads hasn't arguments - return; - } - const auto &dtype = node->read_args.front().type(); - const auto &name = node->name; - - bool node_in_extern_fp32 = kExternFp32CallsGPU.count(name); - bool node_in_extern_int32 = kExternInt32CallsGPU.count(name); - if (!node_in_extern_fp32 && !node_in_extern_int32) { - return; - } - - std::string extern_func = hlir::GetExternFuncName( - cinn::common::DefaultNVGPUTarget(), dtype, name); - *expr = lang::CallExtern(extern_func, node->read_args, node->attrs); + DealWithIntrinsics(target.arch, node, expr); } // Replace pow(x, 0.5) to sqrt(x) and pow(x, -0.5) to rsqrt(x), which diff --git a/paddle/cinn/optim/optimize.cc b/paddle/cinn/optim/optimize.cc index bd6690838c09e..3e1ac6a2030b5 100644 --- a/paddle/cinn/optim/optimize.cc +++ b/paddle/cinn/optim/optimize.cc @@ -66,7 +66,7 @@ Expr Optimize(Expr e, RemoveGpuForloopsAxis(&copied); } CudaSyncThreadsDropIfThenElse(&copied); - // TransBufferWithDynamicShape(&copied); + // CudaTransBufferWithDynamicShape(&copied); #endif SimplifyBlocks(&copied); diff --git a/paddle/cinn/optim/trans_buffer_with_dynamic_shape.cc b/paddle/cinn/optim/trans_buffer_with_dynamic_shape.cc index a0b5ec89b494c..c46efa09cc64a 100644 --- a/paddle/cinn/optim/trans_buffer_with_dynamic_shape.cc +++ b/paddle/cinn/optim/trans_buffer_with_dynamic_shape.cc @@ -103,12 +103,11 @@ struct Mutator : public ir::IRMutator<> { } // namespace -void TransBufferWithDynamicShape(ir::Expr* e) { +void CudaTransBufferWithDynamicShape(ir::Expr* e) { Mutator mutator; mutator.Visit(e, e); #ifdef CINN_WITH_CUDA - auto cur_dev_info = - common::DevInfoMgr::GetDevInfo(0); + auto cur_dev_info = common::DevInfoMgr::GetDevInfo(0); if (cur_dev_info->IsValid()) { size_t max_shm_per_block = cur_dev_info->GetMaxSharedMemPerBlock(); CHECK(mutator.shared_mem_size_used_ <= max_shm_per_block) diff --git a/paddle/cinn/optim/trans_buffer_with_dynamic_shape.h b/paddle/cinn/optim/trans_buffer_with_dynamic_shape.h index 4913347c0971c..c546770a0941f 100644 --- a/paddle/cinn/optim/trans_buffer_with_dynamic_shape.h +++ b/paddle/cinn/optim/trans_buffer_with_dynamic_shape.h @@ -24,7 +24,7 @@ namespace optim { * Given Expr AST, translate dynamic shape in buffers to * static shape, the pass is just used on Nvidia GPU temporarily. */ -void TransBufferWithDynamicShape(ir::Expr* expr); +void CudaTransBufferWithDynamicShape(ir::Expr* expr); } // namespace optim } // namespace cinn diff --git a/paddle/cinn/optim/transform_polyfor_to_for_test.cc b/paddle/cinn/optim/transform_polyfor_to_for_test.cc index b6f7c073df154..652365d11722c 100644 --- a/paddle/cinn/optim/transform_polyfor_to_for_test.cc +++ b/paddle/cinn/optim/transform_polyfor_to_for_test.cc @@ -49,7 +49,7 @@ TEST(Expr, basic) { auto func = Lower("matmul", stages, {A, B, C}); Target target; - target.arch = Target::Arch ::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit ::k32; target.os = Target::OS ::Linux; diff --git a/paddle/cinn/optim/vectorize_loops_test.cc b/paddle/cinn/optim/vectorize_loops_test.cc index 7f9abe1e2c512..4e4ac9e24763c 100644 --- a/paddle/cinn/optim/vectorize_loops_test.cc +++ b/paddle/cinn/optim/vectorize_loops_test.cc @@ -55,7 +55,7 @@ TEST(Vectorize, replace_var) { Expr func = optim::Optimize(funcs, cinn::common::DefaultHostTarget()); Target target; - target.arch = Target::Arch ::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit ::k32; target.os = Target::OS ::Linux; @@ -99,7 +99,7 @@ TEST(Vectorize, TestMarkVectorize) { Expr N(500); Target target; - target.arch = Target::Arch ::X86; + target.arch = common::X86Arch{}; target.bits = Target::Bit ::k32; target.os = Target::OS ::Linux; diff --git a/paddle/cinn/pybind/CMakeLists.txt b/paddle/cinn/pybind/CMakeLists.txt index ec409578930df..970203a273389 100755 --- a/paddle/cinn/pybind/CMakeLists.txt +++ b/paddle/cinn/pybind/CMakeLists.txt @@ -15,6 +15,8 @@ set(srcs utils.cc schedule.cc) +gather_srcs(cinnapi_src SRCS ${srcs}) + if(WITH_CUDA) message(STATUS "Compile core_api with CUDA support") cinn_nv_library( diff --git a/paddle/cinn/pybind/bind.cc b/paddle/cinn/pybind/bind.cc index 4c20f22b973cf..6882a1ac87208 100644 --- a/paddle/cinn/pybind/bind.cc +++ b/paddle/cinn/pybind/bind.cc @@ -21,27 +21,28 @@ namespace py = pybind11; namespace cinn::pybind { -PYBIND11_MODULE(core_api, m) { - m.doc() = "CINN core API"; - - py::module runtime = m.def_submodule("runtime", "bind cinn_runtime"); - py::module common = m.def_submodule("common", "namespace cinn::common"); - py::module lang = m.def_submodule("lang", "namespace cinn::lang"); - py::module ir = m.def_submodule("ir", "namespace cinn::ir"); - py::module poly = m.def_submodule("poly", "namespace cinn::poly, polyhedral"); - py::module backends = m.def_submodule( +void BindCINN(py::module *m) { + py::module cinn = + m->def_submodule("cinn", "Compiler Infrastructure for Neural Networks"); + py::module runtime = cinn.def_submodule("runtime", "bind cinn_runtime"); + py::module common = cinn.def_submodule("common", "namespace cinn::common"); + py::module lang = cinn.def_submodule("lang", "namespace cinn::lang"); + py::module ir = cinn.def_submodule("ir", "namespace cinn::ir"); + py::module poly = + cinn.def_submodule("poly", "namespace cinn::poly, polyhedral"); + py::module backends = cinn.def_submodule( "backends", "namespace cinn::backends, execution backends"); - py::module optim = - m.def_submodule("optim", "namespace cinn::optim, CINN IR optimization"); - py::module pe = m.def_submodule( + py::module optim = cinn.def_submodule( + "optim", "namespace cinn::optim, CINN IR optimization"); + py::module pe = cinn.def_submodule( "pe", "namespace cinn::hlir::pe, CINN Primitive Emitters"); py::module frontend = - m.def_submodule("frontend", "namespace cinn::frontend, CINN frontend"); - py::module framework = m.def_submodule( + cinn.def_submodule("frontend", "namespace cinn::frontend, CINN frontend"); + py::module framework = cinn.def_submodule( "framework", "namespace cinn::hlir::framework, CINN framework"); py::module utils = - m.def_submodule("utils", "namespace cinn::utils, CINN framework"); - py::module schedule = m.def_submodule( + cinn.def_submodule("utils", "namespace cinn::utils, CINN framework"); + py::module schedule = cinn.def_submodule( "schedule", "namespace cinn::ir::schedule, CINN Schedule"); BindRuntime(&runtime); diff --git a/paddle/cinn/pybind/bind.h b/paddle/cinn/pybind/bind.h index 77566097a19aa..bd9f69ece3c7f 100644 --- a/paddle/cinn/pybind/bind.h +++ b/paddle/cinn/pybind/bind.h @@ -53,4 +53,8 @@ void BindFrontend(pybind11::module *m); void BindFramework(pybind11::module *m); void BindUtils(pybind11::module *m); void BindSchedule(pybind11::module *m); + +__attribute__((visibility("default"))) extern void BindCINN( + pybind11::module *m); + } // namespace cinn::pybind diff --git a/paddle/cinn/pybind/common.cc b/paddle/cinn/pybind/common.cc index 7d777af91204a..9f7bd3bdf0d91 100644 --- a/paddle/cinn/pybind/common.cc +++ b/paddle/cinn/pybind/common.cc @@ -27,11 +27,16 @@ namespace py = pybind11; namespace cinn::pybind { +using cinn::common::Arch; +using cinn::common::ARMArch; using cinn::common::bfloat16; using cinn::common::CINNValue; using cinn::common::float16; +using cinn::common::NVGPUArch; using cinn::common::Target; using cinn::common::Type; +using cinn::common::UnknownArch; +using cinn::common::X86Arch; using utils::GetStreamCnt; using utils::StringFormat; @@ -44,14 +49,26 @@ void BindCinnValue(py::module *); void ResetGlobalNameID() { cinn::common::Context::Global().ResetNameId(); } void BindTarget(py::module *m) { + py::class_(*m, "Arch") + .def("IsX86Arch", + [](const common::Arch &arch) { + return std::holds_alternative(arch); + }) + .def("IsNVGPUArch", [](const common::Arch &arch) { + return std::holds_alternative(arch); + }); + py::class_ target(*m, "Target"); target.def_readwrite("os", &Target::os) .def_readwrite("arch", &Target::arch) + .def_static("X86Arch", []() -> common::Arch { return common::X86Arch{}; }) + .def_static("NVGPUArch", + []() -> common::Arch { return common::NVGPUArch{}; }) .def_readwrite("bits", &Target::bits) .def_readwrite("features", &Target::features) .def(py::init<>()) .def(py::init &>()) .def("defined", &Target::defined) @@ -71,12 +88,6 @@ void BindTarget(py::module *m) { .value("Linux", Target::OS::Linux) .value("Windows", Target::OS::Windows); - py::enum_ arch(target, "Arch"); - arch.value("Unk", Target::Arch::Unk) - .value("X86", Target::Arch::X86) - .value("ARM", Target::Arch::ARM) - .value("NVGPU", Target::Arch::NVGPU); - py::enum_ bit(target, "Bit"); bit.value("Unk", Target::Bit::Unk) .value("k32", Target::Bit::k32) diff --git a/paddle/cinn/pybind/framework.cc b/paddle/cinn/pybind/framework.cc index 5122a61d9fc7b..36c9683e22d1c 100644 --- a/paddle/cinn/pybind/framework.cc +++ b/paddle/cinn/pybind/framework.cc @@ -78,7 +78,12 @@ void BindFramework(pybind11::module *m) { input_output_names, key, target); - CHECK_EQ(funcs.size(), 1U); + PADDLE_ENFORCE_EQ(funcs.size(), + 1U, + phi::errors::InvalidArgument( + "The size of funcs is incorrect." + "Expected size is 1, but receive %d.", + funcs.size())); func = funcs[0]; return func; }); @@ -103,8 +108,11 @@ void BindFramework(pybind11::module *m) { }) .def("get_attr", [](NodeAttr &self, const std::string &key) { - CHECK_EQ(self.attr_store.count(key), 1) - << "Didn't find value with key [" << key << "]."; + PADDLE_ENFORCE_EQ(self.attr_store.count(key), + 1, + phi::errors::InvalidArgument( + "Didn't find value with key [%d].", + self.attr_store.count(key))); return self.attr_store[key]; }) .def("__str__", [](NodeAttr &self) { return utils::GetStreamCnt(self); }); @@ -119,24 +127,27 @@ void BindFramework(pybind11::module *m) { t->shape().data().end()); py::array array(std::move(dt), std::move(shape)); auto *mutable_data = array.mutable_data(); - if (target.arch == Target::Arch::X86) { - std::memcpy(mutable_data, - t->data(), - t->shape().numel() * t->type().bytes()); - } else if (target.arch == Target::Arch::NVGPU) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + std::memcpy(mutable_data, + t->data(), + t->shape().numel() * t->type().bytes()); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy( - mutable_data, - reinterpret_cast(t->mutable_data(target, t->type())), - t->shape().numel() * t->type().bytes(), - cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(mutable_data, + reinterpret_cast( + t->mutable_data(target, t->type())), + t->shape().numel() * t->type().bytes(), + cudaMemcpyDeviceToHost)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); return array; }) .def("var_names", &Scope::var_names); @@ -152,38 +163,41 @@ void BindFramework(pybind11::module *m) { [](hlir::framework::Tensor &self, Type type) { self->set_type(type); }) - .def( - "numpy", - [](hlir::framework::Tensor &self, - const cinn::common::Target &target) { - std::string type_str = cinn::common::Type2Str(self->type()); - if (type_str == "bfloat16") { - type_str = "uint16"; - } - py::dtype dt(type_str); - py::array::ShapeContainer shape(self->shape().data().begin(), - self->shape().data().end()); - py::array array(std::move(dt), std::move(shape)); - void *array_data = array.mutable_data(); - if (target.arch == Target::Arch::X86) { - std::memcpy(array_data, - self->data(), - self->shape().numel() * self->type().bytes()); - } else if (target.arch == Target::Arch::NVGPU) { + .def("numpy", + [](hlir::framework::Tensor &self, + const cinn::common::Target &target) { + std::string type_str = cinn::common::Type2Str(self->type()); + if (type_str == "bfloat16") { + type_str = "uint16"; + } + py::dtype dt(type_str); + py::array::ShapeContainer shape(self->shape().data().begin(), + self->shape().data().end()); + py::array array(std::move(dt), std::move(shape)); + void *array_data = array.mutable_data(); + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + std::memcpy(array_data, + self->data(), + self->shape().numel() * self->type().bytes()); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(array_data, - self->data(), - self->shape().numel() * self->type().bytes(), - cudaMemcpyDeviceToHost)); + CUDA_CALL( + cudaMemcpy(array_data, + self->data(), + self->shape().numel() * self->type().bytes(), + cudaMemcpyDeviceToHost)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else { - CINN_NOT_IMPLEMENTED - } - return array; - }) + }, + }); + return array; + }) .def( "from_numpy", [](hlir::framework::Tensor &self, @@ -194,30 +208,44 @@ void BindFramework(pybind11::module *m) { << "currently only support float32 data type as input"; hlir::framework::shape_t shape; std::copy_n(array.shape(), array.ndim(), std::back_inserter(shape)); - CHECK_EQ( + PADDLE_ENFORCE_EQ( std::accumulate(shape.begin(), shape.end(), 1, [](int32_t a, int32_t b) { return a * b; }), - self->shape().numel()); + self->shape().numel(), + phi::errors::InvalidArgument( + "The product of all elements in the shape container and " + "shape numel is not equal," + "where the product of all elements in the shape " + "container:%d but shape numel:%d.", + std::accumulate(shape.begin(), + shape.end(), + 1, + [](int32_t a, int32_t b) { return a * b; }), + self->shape().numel())); auto *data = self->mutable_data(target, self->type()); - if (target.arch == Target::Arch::X86) { - std::memcpy(data, - array.data(), - self->shape().numel() * self->type().bytes()); - } else if (target.arch == Target::Arch::NVGPU) { + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + std::memcpy(data, + array.data(), + self->shape().numel() * self->type().bytes()); + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(reinterpret_cast(data), - reinterpret_cast(array.data()), - self->shape().numel() * self->type().bytes(), - cudaMemcpyHostToDevice)); + CUDA_CALL( + cudaMemcpy(reinterpret_cast(data), + reinterpret_cast(array.data()), + self->shape().numel() * self->type().bytes(), + cudaMemcpyHostToDevice)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); }); py::class_ instruction(*m, "Instruction"); diff --git a/paddle/cinn/pybind/frontend.cc b/paddle/cinn/pybind/frontend.cc index f7eaf01a59f07..fec7c5efb8b0a 100644 --- a/paddle/cinn/pybind/frontend.cc +++ b/paddle/cinn/pybind/frontend.cc @@ -219,27 +219,34 @@ void BindFrontend(pybind11::module *m) { auto in_tensor = scope->GetTensor(tensor_inputs[i]->id); auto dtype = tensor_inputs[i]->type; auto *data = in_tensor->mutable_data(target, dtype); - CHECK_EQ(input_data[i].size(), in_tensor->shape().numel()) - << "The size of tensor [" << tensor_inputs[i]->id - << "] is different with the input data's size! Please check."; - if (target.arch == Target::Arch::NVGPU) { + PADDLE_ENFORCE_EQ(input_data[i].size(), + in_tensor->shape().numel(), + phi::errors::InvalidArgument( + "The size of tensor [%d] is different with " + "the input data's size! Please check.", + tensor_inputs[i]->id)); + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + memcpy(data, + input_data[i].data(), + in_tensor->shape().numel() * + dtype.bytes()); // All random data + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(data, - input_data[i].data(), - in_tensor->shape().numel() * dtype.bytes(), - cudaMemcpyHostToDevice)); + CUDA_CALL( + cudaMemcpy(data, + input_data[i].data(), + in_tensor->shape().numel() * dtype.bytes(), + cudaMemcpyHostToDevice)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else if (target.arch == Target::Arch::X86) { - memcpy(data, - input_data[i].data(), - in_tensor->shape().numel() * - dtype.bytes()); // All random data - } else { - CINN_NOT_IMPLEMENTED - } + }, + }); } program->Execute(); @@ -294,104 +301,118 @@ void BindFrontend(pybind11::module *m) { * '/python/tests/test_op_benchmark.py' * */ - .def( - "test_benchmark", - [](Program &self, - const cinn::common::Target &target, - const std::vector &tensor_inputs, - const std::vector &input_data, - const Variable &tensor_out, - int repeat_, - const std::string &info) { - std::shared_ptr g( - new hlir::framework::Graph(self, target)); - hlir::framework::ApplyPass(g.get(), "InferShape"); - std::shared_ptr scope = - hlir::framework::BuildScope(target, g); - hlir::framework::CompilationContext context(g, scope, target); - hlir::framework::GraphCompiler gc(context); - auto program = gc.Build(); - for (size_t i = 0; i < tensor_inputs.size(); i++) { - auto in_tensor = scope->GetTensor(tensor_inputs[i]->id); - auto *data = in_tensor->mutable_data(target); - CHECK_EQ(input_data[i].size(), in_tensor->shape().numel()) - << "The size of tensor [" << tensor_inputs[i]->id - << "] is different with the input data's size! Please check."; - if (target.arch == Target::Arch::NVGPU) { + .def("test_benchmark", + [](Program &self, + const cinn::common::Target &target, + const std::vector &tensor_inputs, + const std::vector &input_data, + const Variable &tensor_out, + int repeat_, + const std::string &info) { + std::shared_ptr g( + new hlir::framework::Graph(self, target)); + hlir::framework::ApplyPass(g.get(), "InferShape"); + std::shared_ptr scope = + hlir::framework::BuildScope(target, g); + hlir::framework::CompilationContext context(g, scope, target); + hlir::framework::GraphCompiler gc(context); + auto program = gc.Build(); + for (size_t i = 0; i < tensor_inputs.size(); i++) { + auto in_tensor = scope->GetTensor(tensor_inputs[i]->id); + auto *data = in_tensor->mutable_data(target); + PADDLE_ENFORCE_EQ( + input_data[i].size(), + in_tensor->shape().numel(), + phi::errors::InvalidArgument( + "The size of tensor [%d] is different with " + "the input data's size! Please check.", + tensor_inputs[i]->id)); + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + for (size_t j = 0; j < in_tensor->shape().numel(); j++) { + data[j] = reinterpret_cast( + input_data[i].data())[j]; // All random data + } + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(reinterpret_cast(data), - input_data[i].data(), - in_tensor->shape().numel() * sizeof(float), - cudaMemcpyHostToDevice)); + CUDA_CALL( + cudaMemcpy(reinterpret_cast(data), + input_data[i].data(), + in_tensor->shape().numel() * sizeof(float), + cudaMemcpyHostToDevice)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else if (target.arch == Target::Arch::X86) { - for (size_t j = 0; j < in_tensor->shape().numel(); j++) { - data[j] = reinterpret_cast( - input_data[i].data())[j]; // All random data - } - } else { - CINN_NOT_IMPLEMENTED - } - } - VLOG(3) << info; - program->ExecuteTest(repeat_); - auto out = scope->GetTensor(tensor_out->id); - return out; - }) - .def( - "test_benchmark_with_code", - [](Program &self, - const cinn::common::Target &target, - const std::vector &tensor_inputs, - const std::vector &input_data, - const Variable &tensor_out, - int repeat_, - const std::string &info, - const std::string &code) { - // std::shared_ptr g(new - // hlir::framework::Graph(self, target)); - // hlir::framework::ApplyPass(g.get(), "InferShape"); - std::unordered_set fetch_ids; - auto graph = cinn::frontend::Optimize(&self, fetch_ids, target); - std::shared_ptr scope = - hlir::framework::BuildScope(target, graph); + }, + }); + } + VLOG(3) << info; + program->ExecuteTest(repeat_); + auto out = scope->GetTensor(tensor_out->id); + return out; + }) + .def("test_benchmark_with_code", + [](Program &self, + const cinn::common::Target &target, + const std::vector &tensor_inputs, + const std::vector &input_data, + const Variable &tensor_out, + int repeat_, + const std::string &info, + const std::string &code) { + // std::shared_ptr g(new + // hlir::framework::Graph(self, target)); + // hlir::framework::ApplyPass(g.get(), "InferShape"); + std::unordered_set fetch_ids; + auto graph = cinn::frontend::Optimize(&self, fetch_ids, target); + std::shared_ptr scope = + hlir::framework::BuildScope(target, graph); - hlir::framework::CompilationContext context(graph, scope, target); - hlir::framework::GraphCompiler gc(context); - auto program = gc.Build(code); - for (size_t i = 0; i < tensor_inputs.size(); i++) { - auto in_tensor = scope->GetTensor(tensor_inputs[i]->id); - auto *data = in_tensor->mutable_data(target); - CHECK_EQ(input_data[i].size(), in_tensor->shape().numel()) - << "The size of tensor [" << tensor_inputs[i]->id - << "] is different with the input data's size! Please check."; - if (target.arch == Target::Arch::NVGPU) { + hlir::framework::CompilationContext context(graph, scope, target); + hlir::framework::GraphCompiler gc(context); + auto program = gc.Build(code); + for (size_t i = 0; i < tensor_inputs.size(); i++) { + auto in_tensor = scope->GetTensor(tensor_inputs[i]->id); + auto *data = in_tensor->mutable_data(target); + PADDLE_ENFORCE_EQ( + input_data[i].size(), + in_tensor->shape().numel(), + phi::errors::InvalidArgument( + "The size of tensor [%d] is different with " + "the input data's size! Please check.", + tensor_inputs[i]->id)); + target.arch.Visit(adt::match{ + [&](common::UnknownArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::X86Arch) { + for (size_t j = 0; j < in_tensor->shape().numel(); j++) { + data[j] = reinterpret_cast( + input_data[i].data())[j]; // All random data + } + }, + [&](common::ARMArch) { CINN_NOT_IMPLEMENTED; }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - CUDA_CALL(cudaMemcpy(reinterpret_cast(data), - input_data[i].data(), - in_tensor->shape().numel() * sizeof(float), - cudaMemcpyHostToDevice)); + CUDA_CALL( + cudaMemcpy(reinterpret_cast(data), + input_data[i].data(), + in_tensor->shape().numel() * sizeof(float), + cudaMemcpyHostToDevice)); #else PADDLE_THROW(phi::errors::Fatal("To use CUDA backends, " "you need to set WITH_CUDA ON!")); #endif - } else if (target.arch == Target::Arch::X86) { - for (size_t j = 0; j < in_tensor->shape().numel(); j++) { - data[j] = reinterpret_cast( - input_data[i].data())[j]; // All random data - } - } else { - CINN_NOT_IMPLEMENTED - } - } - VLOG(3) << info; - program->ExecuteTest(repeat_); - auto out = scope->GetTensor(tensor_out->id); - return out; - }); + }, + }); + } + VLOG(3) << info; + program->ExecuteTest(repeat_); + auto out = scope->GetTensor(tensor_out->id); + return out; + }); py::class_(*m, "Interpreter") .def(py::init &, @@ -928,7 +949,7 @@ void BindFrontend(pybind11::module *m) { .def("get_cinn_name", [](PaddleModelConvertor &self, const std::string &paddle_name) { CHECK(self.var_model_to_program_map().count(paddle_name)) - << "Cannot find variabel " << paddle_name + << "Cannot find variable " << paddle_name << " in CINN! Please check."; return self.var_model_to_program_map().at(paddle_name); }); diff --git a/paddle/cinn/pybind/ir/ir.cc b/paddle/cinn/pybind/ir/ir.cc index d9f9bd5fcdf7f..42e8e157998cd 100644 --- a/paddle/cinn/pybind/ir/ir.cc +++ b/paddle/cinn/pybind/ir/ir.cc @@ -33,7 +33,14 @@ void TensorStore(Expr tensor, Expr value, const std::vector& indices) { std::vector AxisMap(const std::string& kinds, const std::vector& iter_expression) { std::vector rets; - CHECK_EQ(kinds.size(), iter_expression.size()); + PADDLE_ENFORCE_EQ( + kinds.size(), + iter_expression.size(), + phi::errors::InvalidArgument( + "The size of kinds and iter expression in AxisMap is not equal," + "where kinds size:%d but iter expression size:%d.", + kinds.size(), + iter_expression.size())); int n = iter_expression.size(); rets.reserve(n); for (int i = 0; i < n; i++) { diff --git a/paddle/cinn/pybind/lang.cc b/paddle/cinn/pybind/lang.cc index 5f7a80e12e2c0..ed321a66ddc18 100644 --- a/paddle/cinn/pybind/lang.cc +++ b/paddle/cinn/pybind/lang.cc @@ -153,13 +153,22 @@ void BindModule(py::module *m) { builder.def(py::init()) .def("add_function", [](ir::Module::Builder &self, ir::LoweredFunc func) { - if (self.GetTargetArch() == Target::Arch::NVGPU) { + self.GetTargetArch().Visit(adt::match{ + [&](common::UnknownArch) { LOG(FATAL) << "NotImplemented"; }, + [&](common::X86Arch) { + // Do nothing + }, + [&](common::ARMArch) { + // Do nothing + }, + [&](common::NVGPUArch) { #ifdef CINN_WITH_CUDA - auto func_expr = Expr(func); - ir::SetCudaAxisInfo(&func_expr); - optim::OptimizeExprGPU(&(func->body)); + auto func_expr = Expr(func); + ir::SetCudaAxisInfo(&func_expr); + optim::OptimizeExprGPU(&(func->body)); #endif - } + }, + }); self.AddFunction(func); }) .def("add_buffer", &ir::Module::Builder::AddBuffer) diff --git a/paddle/cinn/pybind/runtime.cc b/paddle/cinn/pybind/runtime.cc index 0ef1ee542aa35..0d38616147536 100644 --- a/paddle/cinn/pybind/runtime.cc +++ b/paddle/cinn/pybind/runtime.cc @@ -74,30 +74,47 @@ cinn_buffer_t *CreateBufferFromNumpy(py::array data, return buffer; } +cinn_buffer_t *CreateBufferFromNumpyImpl(common::UnknownArch, py::array data) { + LOG(FATAL) << "NotImplemented."; +} + +cinn_buffer_t *CreateBufferFromNumpyImpl(common::X86Arch, py::array data) { + return CreateBufferFromNumpy(data, cinn_x86_device); +} + +cinn_buffer_t *CreateBufferFromNumpyImpl(common::ARMArch, py::array data) { + LOG(FATAL) << "NotImplemented."; +} + +cinn_buffer_t *CreateBufferFromNumpyImpl(common::NVGPUArch, py::array data) { +#ifdef CINN_WITH_CUDA + std::vector shape; + std::copy_n(data.shape(), data.ndim(), std::back_inserter(shape)); + auto *buffer = new cinn_buffer_t(); + buffer->device = cinn_nvgpu_device; + buffer->memory_size = data.nbytes(); + CUDA_CALL(cudaMalloc(&buffer->memory, data.nbytes())); + CUDA_CALL(cudaMemcpy( + buffer->memory, data.data(), data.nbytes(), cudaMemcpyHostToDevice)); + return buffer; +#else + PADDLE_THROW(phi::errors::Fatal( + "To use CUDA backends, you need to set WITH_CUDA ON!")); +#endif +} + +cinn_buffer_t *InterfaceCreateBufferFromNumpy(common::Arch arch, + py::array data) { + return std::visit( + [&](const auto &impl) { return CreateBufferFromNumpyImpl(impl, data); }, + arch.variant()); +} + cinn_buffer_t *CreateBufferFromNumpy( py::array data, cinn::common::Target target = cinn::common::DefaultHostTarget(), int align = 0) { - if (target == cinn::common::DefaultHostTarget()) { - return CreateBufferFromNumpy(data, cinn_x86_device); - } else if (target.arch == Target::Arch::NVGPU) { -#ifdef CINN_WITH_CUDA - std::vector shape; - std::copy_n(data.shape(), data.ndim(), std::back_inserter(shape)); - auto *buffer = new cinn_buffer_t(); - buffer->device = cinn_nvgpu_device; - buffer->memory_size = data.nbytes(); - CUDA_CALL(cudaMalloc(&buffer->memory, data.nbytes())); - CUDA_CALL(cudaMemcpy( - buffer->memory, data.data(), data.nbytes(), cudaMemcpyHostToDevice)); - return buffer; -#else - PADDLE_THROW(phi::errors::Fatal( - "To use CUDA backends, you need to set WITH_CUDA ON!")); -#endif - } else { - CINN_NOT_IMPLEMENTED - } + return InterfaceCreateBufferFromNumpy(target.arch, data); } void BufferCopyTo(const cinn_buffer_t &buffer, py::array array) { diff --git a/paddle/cinn/runtime/cpu/CMakeLists.txt b/paddle/cinn/runtime/cpu/CMakeLists.txt index 72fa5f51bb0ca..804ee29ca5377 100644 --- a/paddle/cinn/runtime/cpu/CMakeLists.txt +++ b/paddle/cinn/runtime/cpu/CMakeLists.txt @@ -4,8 +4,8 @@ gather_srcs(cinnapi_src SRCS host_intrinsics.cc thread_backend.cc) if(WITH_MKL_CBLAS) gather_srcs(cinnapi_src SRCS mkl_math.cc cblas.cc) - if(WITH_MKLDNN) - gather_srcs(cinnapi_src SRCS mkldnn_math.cc) + if(WITH_ONEDNN) + gather_srcs(cinnapi_src SRCS onednn_math.cc) endif() endif() @@ -15,8 +15,8 @@ if(WITH_MKL_CBLAS) cinn_cc_test(test_mkl_math SRCS mkl_math_test.cc mkl_math.cc DEPS cinncore) endif() - if(WITH_MKLDNN) - cinn_cc_test(test_mkldnn_math SRCS mkldnn_math_test.cc mkldnn_math.cc DEPS + if(WITH_ONEDNN) + cinn_cc_test(test_onednn_math SRCS onednn_math_test.cc onednn_math.cc DEPS cinncore) endif() endif() diff --git a/paddle/cinn/runtime/cpu/mkl_math_test.cc b/paddle/cinn/runtime/cpu/mkl_math_test.cc index 50798ebb39029..f9149dab3a615 100644 --- a/paddle/cinn/runtime/cpu/mkl_math_test.cc +++ b/paddle/cinn/runtime/cpu/mkl_math_test.cc @@ -78,7 +78,7 @@ void TestCallElementwise(const std::string &fn_name, auto stages = CreateStages(lower_args); auto target = cinn::common::DefaultHostTarget(); - target.arch = Target::Arch::X86; + target.arch = cinn::common::X86Arch{}; ir::Module::Builder builder("module0", target); auto func = Lower("fn", stages, lower_args); builder.AddFunction(func); @@ -216,7 +216,7 @@ TEST(cinn_cpu_mkl_gemm_fp32, test) { auto stages = CreateStages({call, out}); auto target = cinn::common::DefaultHostTarget(); - target.arch = Target::Arch::X86; + target.arch = cinn::common::X86Arch{}; ir::Module::Builder builder("module0", target); auto func = Lower("fn", stages, {A, B, out, call}); diff --git a/paddle/cinn/runtime/cpu/mkldnn_math.cc b/paddle/cinn/runtime/cpu/onednn_math.cc similarity index 96% rename from paddle/cinn/runtime/cpu/mkldnn_math.cc rename to paddle/cinn/runtime/cpu/onednn_math.cc index f20e56e32f1e6..66af7029d7e58 100644 --- a/paddle/cinn/runtime/cpu/mkldnn_math.cc +++ b/paddle/cinn/runtime/cpu/onednn_math.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/cinn/runtime/cpu/mkldnn_math.h" +#include "paddle/cinn/runtime/cpu/onednn_math.h" #include @@ -25,7 +25,7 @@ using dnnl::memory; using tag = memory::format_tag; using dt = memory::data_type; -void cinn_cpu_mkldnn_softmax_fp32(int batch, +void cinn_cpu_onednn_softmax_fp32(int batch, int channel, int h, int w, @@ -75,7 +75,7 @@ void cinn_cpu_mkldnn_softmax_fp32(int batch, engine_stream.wait(); } -void cinn_cpu_mkldnn_conv2d_nchw_fp32(int batch_size, +void cinn_cpu_onednn_conv2d_nchw_fp32(int batch_size, int c_in, int input_h, int input_w, @@ -157,7 +157,7 @@ void cinn_cpu_mkldnn_conv2d_nchw_fp32(int batch_size, cpu_stream.wait(); } -CINN_REGISTER_HELPER(cinn_cpu_mkldnn) { +CINN_REGISTER_HELPER(cinn_cpu_onednn) { using namespace cinn; // NOLINT using backends::FunctionProto; auto host_target = cinn::common::DefaultHostTarget(); @@ -195,7 +195,7 @@ CINN_REGISTER_HELPER(cinn_cpu_mkldnn) { return shape; }; - REGISTER_EXTERN_FUNC_HELPER(cinn_cpu_mkldnn_conv2d_nchw_fp32, host_target) + REGISTER_EXTERN_FUNC_HELPER(cinn_cpu_onednn_conv2d_nchw_fp32, host_target) .SetRetType() .AddInputType() // batch_size .AddInputType() // c_in @@ -217,7 +217,7 @@ CINN_REGISTER_HELPER(cinn_cpu_mkldnn) { .SetShapeInference(inference_shape_conv2d_nchw) .End(); - REGISTER_EXTERN_FUNC_HELPER(cinn_cpu_mkldnn_softmax_fp32, host_target) + REGISTER_EXTERN_FUNC_HELPER(cinn_cpu_onednn_softmax_fp32, host_target) .SetRetType() .AddInputType() // batch_size .AddInputType() // c_in diff --git a/paddle/cinn/runtime/cpu/mkldnn_math.h b/paddle/cinn/runtime/cpu/onednn_math.h similarity index 95% rename from paddle/cinn/runtime/cpu/mkldnn_math.h rename to paddle/cinn/runtime/cpu/onednn_math.h index 9a37d13d57865..7d95b6461b0a9 100644 --- a/paddle/cinn/runtime/cpu/mkldnn_math.h +++ b/paddle/cinn/runtime/cpu/onednn_math.h @@ -21,7 +21,7 @@ // define some C APIs extern "C" { -void cinn_cpu_mkldnn_softmax_fp32(int batch, +void cinn_cpu_onednn_softmax_fp32(int batch, int channel, int h, int w, @@ -29,7 +29,7 @@ void cinn_cpu_mkldnn_softmax_fp32(int batch, cinn_buffer_t* inputs, cinn_buffer_t* out); -void cinn_cpu_mkldnn_conv2d_nchw_fp32(int batch_size, +void cinn_cpu_onednn_conv2d_nchw_fp32(int batch_size, int c_in, int input_h, int input_w, diff --git a/paddle/cinn/runtime/cpu/mkldnn_math_test.cc b/paddle/cinn/runtime/cpu/onednn_math_test.cc similarity index 96% rename from paddle/cinn/runtime/cpu/mkldnn_math_test.cc rename to paddle/cinn/runtime/cpu/onednn_math_test.cc index 15574a9028042..cbfa19ffb4762 100644 --- a/paddle/cinn/runtime/cpu/mkldnn_math_test.cc +++ b/paddle/cinn/runtime/cpu/onednn_math_test.cc @@ -42,7 +42,7 @@ cinn_buffer_t *CreateBuffer(const std::vector shape, return cinn::common::BufferBuilder(Float(32), shape).set_zero().Build(); } -TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { +TEST(cinn_cpu_onednn_conv2d_nchw_fp32, test) { int n(1); int c_in(3); int i_h(224); @@ -65,7 +65,7 @@ TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { auto call = Compute( {Expr(1)}, [=]() -> Expr { - return lang::CallExtern("cinn_cpu_mkldnn_conv2d_nchw_fp32", + return lang::CallExtern("cinn_cpu_onednn_conv2d_nchw_fp32", { Expr(n), // batch_size Expr(c_in), // c_in @@ -85,7 +85,7 @@ TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { weights.tensor() // weights }); }, - "cinn_cpu_mkldnn_conv2d_nchw_fp32"); + "cinn_cpu_onednn_conv2d_nchw_fp32"); auto out = call->TupleGet(0); out->WithBuffer(Float(32)); @@ -93,7 +93,7 @@ TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { auto stages = CreateStages({call, out}); auto target = cinn::common::DefaultHostTarget(); - target.arch = Target::Arch::X86; + target.arch = cinn::common::X86Arch{}; ir::Module::Builder builder("module0", target); auto func = Lower("fn", stages, {input, weights, out, call}); diff --git a/paddle/cinn/runtime/cpu/use_extern_funcs.h b/paddle/cinn/runtime/cpu/use_extern_funcs.h index e708864f5b36a..4c65e5ff30501 100644 --- a/paddle/cinn/runtime/cpu/use_extern_funcs.h +++ b/paddle/cinn/runtime/cpu/use_extern_funcs.h @@ -21,7 +21,7 @@ CINN_USE_REGISTER(host_intrinsics) CINN_USE_REGISTER(mkl_math) CINN_USE_REGISTER(cinn_cpu_mkl) #ifdef CINN_WITH_DNNL -CINN_USE_REGISTER(cinn_cpu_mkldnn) +CINN_USE_REGISTER(cinn_cpu_onednn) #endif #endif CINN_USE_REGISTER(cinn_backend_parallel) diff --git a/paddle/cinn/runtime/flags.cc b/paddle/cinn/runtime/flags.cc index c310a47f5f180..9427d0eda7195 100644 --- a/paddle/cinn/runtime/flags.cc +++ b/paddle/cinn/runtime/flags.cc @@ -75,7 +75,7 @@ PD_DEFINE_bool(group_schedule_tiling_first, "Whether to enable new group scheduler tiling first strategy."); PD_DEFINE_bool(cinn_new_cluster_op_method, - BoolFromEnv("FLAGS_cinn_new_cluster_op_method", false), + BoolFromEnv("FLAGS_cinn_new_cluster_op_method", true), "Whether to enable newly developed clustering method of group " "op for cinn."); @@ -343,17 +343,38 @@ bool IsCompiledWithCUDNN() { #endif } +void CheckCompileOptionImpl(cinn::common::UnknownArch) { + PADDLE_THROW(phi::errors::Fatal("unknown architecture")); +} + +void CheckCompileOptionImpl(cinn::common::X86Arch) { + // Do nothing. +} + +void CheckCompileOptionImpl(cinn::common::ARMArch) { + // Do nothing. +} + +void CheckCompileOptionImpl(cinn::common::NVGPUArch) { +#if defined(CINN_WITH_CUDNN) + // Do nothing; +#else + PADDLE_THROW(phi::errors::Fatal( + "Current CINN version does not support NVGPU, please try to " + "recompile with -DWITH_CUDA.")); +#endif +} + +void CheckCompileOption(cinn::common::Arch arch) { + return std::visit([](const auto& impl) { CheckCompileOptionImpl(impl); }, + arch.variant()); +} + cinn::common::Target CurrentTarget::target_ = cinn::common::DefaultTarget(); void CurrentTarget::SetCurrentTarget(const cinn::common::Target& target) { - if (!IsCompiledWithCUDA() && - target.arch == cinn::common::Target::Arch::NVGPU) { - PADDLE_THROW(phi::errors::Fatal( - "Current CINN version does not support NVGPU, please try to " - "recompile with -DWITH_CUDA.")); - } else { - target_ = target; - } + CheckCompileOption(target.arch); + target_ = target; } cinn::common::Target& CurrentTarget::GetCurrentTarget() { return target_; } diff --git a/paddle/common/enforce.cc b/paddle/common/enforce.cc index 0719035db4c49..6dd4f0372e2b3 100644 --- a/paddle/common/enforce.cc +++ b/paddle/common/enforce.cc @@ -64,10 +64,11 @@ int GetCallStackLevel() { return FLAGS_call_stack_level; } std::string SimplifyErrorTypeFormat(const std::string& str) { std::ostringstream sout; size_t type_end_pos = str.find(':', 0); - if (str.substr(type_end_pos - 5, type_end_pos) == "Error:") { + if (type_end_pos != str.npos && type_end_pos >= 5 && + str.substr(type_end_pos - 5, 6) == "Error:") { // Remove "Error:", add "()" // Examples: - // InvalidArgumentError: xxx -> (InvalidArgument): xxx + // InvalidArgumentError: xxx -> (InvalidArgument) xxx sout << "(" << str.substr(0, type_end_pos - 5) << ")" << str.substr(type_end_pos + 1); } else { diff --git a/paddle/common/enforce.h b/paddle/common/enforce.h index 6076e9089df83..b3027d55c8065 100644 --- a/paddle/common/enforce.h +++ b/paddle/common/enforce.h @@ -362,47 +362,5 @@ inline bool is_error(const T& stat) { } namespace pir { -class IrNotMetException : public std::exception { - public: - explicit IrNotMetException(const std::string& str) - : err_str_(str + ::common::enforce::GetCurrentTraceBackString()) {} - - const char* what() const noexcept override { return err_str_.c_str(); } - - private: - std::string err_str_; - ::common::enforce::details::PaddleFatalGuard paddle_fatal_guard_; -}; - -#define IR_THROW(...) \ - do { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occurred at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } while (0) - -#define IR_ENFORCE(COND, ...) \ - do { \ - bool __cond__(COND); \ - if (UNLIKELY(is_error(__cond__))) { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occurred at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } \ - } while (0) - +#define IR_THROW(...) PADDLE_THROW(phi::errors::Fatal(__VA_ARGS__)) } // namespace pir diff --git a/paddle/common/flags.cc b/paddle/common/flags.cc index 16057b5ef598f..770b51e6fd3f1 100644 --- a/paddle/common/flags.cc +++ b/paddle/common/flags.cc @@ -629,6 +629,10 @@ PHI_DEFINE_EXPORTED_uint64( "The real chunk size is max(request_size, " "FLAGS_auto_growth_chunk_size_in_mb)."); +PHI_DEFINE_EXPORTED_bool(custom_device_mem_record, + false, + "Enable mem record event on custom device"); + #endif /** @@ -737,13 +741,13 @@ PHI_DEFINE_EXPORTED_bool(set_to_1d, false, "set 0D Tensor to 1D numpy"); /** * Debug related FLAG - * Name: tracer_mkldnn_ops_on + * Name: tracer_onednn_ops_on * Since Version: 2.0.0 * Value Range: string, default=empty * Example: * Note: Holds list of operation types with OneDNN kernels to be enabled. */ -PHI_DEFINE_EXPORTED_string(tracer_mkldnn_ops_on, +PHI_DEFINE_EXPORTED_string(tracer_onednn_ops_on, "", "List of OneDNN operation types to be turned on"); @@ -761,13 +765,13 @@ PHI_DEFINE_EXPORTED_string(static_runtime_data_save_path, /** * Debug related FLAG - * Name: tracer_mkldnn_ops_off + * Name: tracer_onednn_ops_off * Since Version: 2.0.0 * Value Range: string, default=empty * Example: * Note: Holds list of operation types with OneDNN kernels to be disabled. */ -PHI_DEFINE_EXPORTED_string(tracer_mkldnn_ops_off, +PHI_DEFINE_EXPORTED_string(tracer_onednn_ops_off, "", "List of OneDNN operation types to be turned off"); @@ -1021,6 +1025,19 @@ PHI_DEFINE_EXPORTED_string(deny_cinn_ops, "", "It controls the cinn op subset to be not used."); +/* + * CINN related FLAG + * Name: FLAGS_deny_cinn_ops + * Since Version: 3.0 Beta + * Value Range: bool, default=true + * Example: FLAGS_enable_cinn_compile_cache=true would reuse cached Kernel + * function + */ +PHI_DEFINE_EXPORTED_bool( + enable_cinn_compile_cache, + true, + "It controls whether to enable cinn compilation cache."); + /* * CINN related FLAG * Name: FLAGS_enable_pe_launch_cinn @@ -1244,6 +1261,17 @@ PHI_DEFINE_EXPORTED_bool(benchmark_nccl, */ PHI_DEFINE_EXPORTED_bool(use_autotune, false, "Whether enable autotune."); +/** + * CINN training related FLAG + * Name: FLAGS_disable_dyshape_in_train + * Since Version: 2.7.0 + * Value Range: bool, default=false + * Example: + */ +PHI_DEFINE_EXPORTED_bool(disable_dyshape_in_train, + false, + "Whether disable dyshape in training."); + /** * Conv Search cache max number related FLAG * Name: FLAGS_search_cache_max_number @@ -1345,6 +1373,19 @@ PHI_DEFINE_EXPORTED_bool(use_shm_cache, false, "Use shm cache in mmap_allocator."); +/** + * mmap_allocator related FLAG + * Name: dataloader_use_file_descriptor + * Since Version: 2.6.2 + * Value Range: bool, default=false + * Example: + * Note: . If True, mmap_allocator will use file descripor to open shared memory + * operation. + */ +PHI_DEFINE_EXPORTED_bool(dataloader_use_file_descriptor, + false, + "Use file descriptor in mmap_allocator."); + /** * Tensor operants related FLAG * Name: tensor_operants_mode @@ -1367,7 +1408,7 @@ PHI_DEFINE_EXPORTED_string(tensor_operants_mode, * Since Version: 2.6.0 * Value Range: bool, default=false * Example: - * Note: If Ture, executor will use new IR + * Note: If True, executor will use new IR */ PHI_DEFINE_EXPORTED_bool(enable_pir_in_executor, false, @@ -1380,7 +1421,7 @@ PHI_DEFINE_EXPORTED_bool(enable_pir_in_executor, * Since Version: 2.6.0 * Value Range: bool, default=true * Example: - * Note: If Ture, program will be translated to pir program + * Note: If True, program will be translated to pir program * and then run in executor for dy2st mode. */ PHI_DEFINE_EXPORTED_bool(enable_pir_with_pt_in_dy2st, @@ -1530,7 +1571,7 @@ PHI_DEFINE_EXPORTED_int64(alloc_fill_value, * Since Version: 3.0.0 * Value Range: bool, default=false * Example: - * Note: If Ture, will apply shape_optimization pass to new IR. + * Note: If True, will apply shape_optimization pass to new IR. */ PHI_DEFINE_EXPORTED_bool(pir_apply_shape_optimization_pass, false, diff --git a/paddle/common/union_find_set.h b/paddle/common/union_find_set.h new file mode 100644 index 0000000000000..b00c8ae7de8f5 --- /dev/null +++ b/paddle/common/union_find_set.h @@ -0,0 +1,72 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace common { + +template +class UnionFindSet { + public: + const T& Find(const T& x) const { + if (parent_.find(x) == parent_.end()) { + return x; + } + if (parent_.at(x) != x) return Find(parent_.at(x)); + return parent_.at(x); + } + + const T& Find(const T& x) { + if (parent_.find(x) == parent_.end()) { + return x; + } + if (parent_[x] != x) { + parent_[x] = Find(parent_[x]); + } + return parent_.at(x); + } + + void Union(const T& p, const T& q) { + if (parent_.find(p) == parent_.end()) { + parent_[p] = p; + } + if (parent_.find(q) == parent_.end()) { + parent_[q] = q; + } + parent_[Find(q)] = Find(p); + } + + template + void VisitCluster(const DoEachClusterT& DoEachCluster) const { + std::unordered_map> clusters_map; + for (auto it = parent_.begin(); it != parent_.end(); it++) { + clusters_map[Find(it->first)].emplace_back(it->first); + } + for (const auto& [_, clusters] : clusters_map) { + DoEachCluster(clusters); + } + } + + bool HasSameRoot(const T& p, const T& q) const { return Find(p) == Find(q); } + + std::unordered_map* GetMap() { return &parent_; } + + private: + std::unordered_map parent_; +}; + +} // namespace common diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec deleted file mode 100644 index d10ff999f6eb2..0000000000000 --- a/paddle/fluid/API.spec +++ /dev/null @@ -1,33 +0,0 @@ -paddle.incubate.optimizer.PipelineOptimizer (paddle.incubate.optimizer.PipelineOptimizer, ('document', '2e55a29dbeb874934f7a1a1af3a22b8c')) -paddle.incubate.optimizer.PipelineOptimizer.__init__ (ArgSpec(args=['self', 'optimizer', 'num_microbatches', 'start_cpu_core_id'], varargs=None, keywords=None, defaults=(1, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.incubate.optimizer.PipelineOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.audio.features (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.features.layers.LogMelSpectrogram (ArgSpec(), ('document', 'c38b53606aa89215c4f00d3833e158b8')) -paddle.audio.features.layers.LogMelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', '6c14f6f78dc697a6981cf90412e2f1ea')) -paddle.audio.features.layers.LogMelSpectrogram.load_dict (ArgSpec(args=[], varargs='args', varkw='kwargs', defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '01221a60445ee437f439a8cbe293f759')) -paddle.audio.features.layers.LogMelSpectrogram.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers', 'structured_name_prefix', 'use_hook'], varargs=None, varkw=None, defaults=(None, True, '', True), kwonlyargs=[], kwonlydefaults=None, annotations={}), ('document', '0c01cb0c12220c9426ae49549b145b0b')) -paddle.audio.features.layers.MFCC (ArgSpec(), ('document', 'bcbe6499830d9228a4f746ddd63b6c0f')) -paddle.audio.features.layers.MFCC.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', 'd86bcaa345f26851089bfdb3efecd9e7')) -paddle.audio.features.layers.MelSpectrogram (ArgSpec(), ('document', 'adf4012310984568ae9da6170aa89f91')) -paddle.audio.features.layers.MelSpectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', '458e9d454c8773091567c6b400f48cf5')) -paddle.audio.features.layers.Spectrogram (ArgSpec(), ('document', '83811af6da032099bf147e3e01a458e1')) -paddle.audio.features.layers.Spectrogram.forward (ArgSpec(args=['self', 'x'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'x': }), ('document', 'ab11e318fca1410f743b5432394dea35')) -paddle.audio.functional (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.functional.functional.compute_fbank_matrix (ArgSpec(args=['sr', 'n_fft', 'n_mels', 'f_min', 'f_max', 'htk', 'norm', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, None, False, 'slaney', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'sr': , 'n_fft': , 'n_mels': , 'f_min': , 'f_max': typing.Union[float, NoneType], 'htk': , 'norm': typing.Union[str, float], 'dtype': }), ('document', '3c5411caa6baedb68860b09c81e0147c')) -paddle.audio.functional.functional.create_dct (ArgSpec(args=['n_mfcc', 'n_mels', 'norm', 'dtype'], varargs=None, varkw=None, defaults=('ortho', 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'n_mfcc': , 'n_mels': , 'norm': typing.Union[str, NoneType], 'dtype': }), ('document', 'c9c57550671f9725b053769411d2f65a')) -paddle.audio.functional.functional.fft_frequencies (ArgSpec(args=['sr', 'n_fft', 'dtype'], varargs=None, varkw=None, defaults=('float32',), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'sr': , 'n_fft': , 'dtype': }), ('document', '057b990e79c9c780622407267c0a43c6')) -paddle.audio.functional.functional.hz_to_mel (ArgSpec(args=['freq', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[paddle.Tensor, float], 'freq': typing.Union[paddle.Tensor, float], 'htk': }), ('document', '7ca01521dd0bf26cd3f72c67f7168dc4')) -paddle.audio.functional.functional.mel_frequencies (ArgSpec(args=['n_mels', 'f_min', 'f_max', 'htk', 'dtype'], varargs=None, varkw=None, defaults=(64, 0.0, 11025.0, False, 'float32'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'n_mels': , 'f_min': , 'f_max': , 'htk': , 'dtype': }), ('document', '2af3cf997ed1274214ec240b2b59a98d')) -paddle.audio.functional.functional.mel_to_hz (ArgSpec(args=['mel', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[float, paddle.Tensor], 'mel': typing.Union[float, paddle.Tensor], 'htk': }), ('document', 'e93b432d382f98c60d7c7599489e7072')) -paddle.audio.functional.functional.power_to_db (ArgSpec(args=['spect', 'ref_value', 'amin', 'top_db'], varargs=None, varkw=None, defaults=(1.0, 1e-10, 80.0), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'spect': , 'ref_value': , 'amin': , 'top_db': typing.Union[float, NoneType]}), ('document', '28bbb1973e8399e856bfaea0415cecb9')) -paddle.audio.functional.window.get_window (ArgSpec(args=['window', 'win_length', 'fftbins', 'dtype'], varargs=None, varkw=None, defaults=(True, 'float64'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'window': typing.Union[str, typing.Tuple[str, float]], 'win_length': , 'fftbins': , 'dtype': }), ('document', '2418d63da10c0cd5da9ecf0a88ddf783')) -paddle.audio.backends (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.backends.init_backend.get_current_audio_backend (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': }), ('document', '3ff9fd62e8be1f3dc7e34afaf50e1645')) -paddle.audio.backends.init_backend.list_available_backends (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.List[str]}), ('document', '8eba49f1b69f7ec7fa139a0714a2724e')) -paddle.audio.backends.init_backend.set_backend (ArgSpec(args=['backend_name'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'backend_name': }), ('document', '9680247dd97274d345dee415e2787527')) -paddle.audio.backends.wave_backend.info (ArgSpec(args=['filepath', 'format'], varargs=None, varkw=None, defaults=(None,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': , 'filepath': , 'format': typing.Union[str, NoneType]}), ('document', 'e0ffd3accd942a9b0a4c08463a9f60f6')) -paddle.audio.backends.wave_backend.load (ArgSpec(args=['filepath', 'frame_offset', 'num_frames', 'normalize', 'channels_first', 'format'], varargs=None, varkw=None, defaults=(0, -1, True, True, None), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Tuple[paddle.Tensor, int], 'filepath': typing.Union[str, pathlib.Path], 'frame_offset': , 'num_frames': , 'normalize': , 'channels_first': , 'format': typing.Union[str, NoneType]}), ('document', '4de50575ca516b4b7c7c82c7fdec808f')) -paddle.audio.backends.wave_backend.save (ArgSpec(args=['filepath', 'src', 'sample_rate', 'channels_first', 'compression', 'format', 'encoding', 'bits_per_sample'], varargs=None, varkw=None, defaults=(True, None, None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={'filepath': , 'src': , 'sample_rate': , 'channels_first': , 'compression': typing.Union[float, NoneType], 'format': typing.Union[str, NoneType], 'encoding': typing.Union[str, NoneType], 'bits_per_sample': typing.Union[int, NoneType]}), ('document', '4c85cfcd29a0dcdfc32e74db8c0c3961')) -paddle.audio.datasets (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e')) -paddle.audio.datasets.TESS (ArgSpec(), ('document', '3605f3aa2191ede7ddbe594cd27bb067')) -paddle.audio.datasets.TESS.meta_info (ArgSpec(), ('document', '60d548a6f71629c3b69bcda3a30d4819')) diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index 5e2be03108294..2d7326f825acc 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -39,7 +39,7 @@ void ComputeInterceptor::PrepareDeps() { for (int64_t i = 0; i < node_->max_run_times(); ++i) { ready_size_map.emplace(i, 0); } - in_readys_.emplace(up.first, std::make_pair(up.second, ready_size_map)); + in_readies_.emplace(up.first, std::make_pair(up.second, ready_size_map)); } for (auto down : downstream) { out_buffs_.emplace(down.first, std::make_pair(down.second, 0)); @@ -106,11 +106,11 @@ InterceptorMessage ComputeInterceptor::PrepareVarsMsg() { } void ComputeInterceptor::IncreaseReady(int64_t up_id, int64_t scope_id) { - auto it = in_readys_.find(up_id); + auto it = in_readies_.find(up_id); PADDLE_ENFORCE_NE(it, - in_readys_.end(), + in_readies_.end(), platform::errors::NotFound( - "Cannot find upstream=%lld in in_readys.", up_id)); + "Cannot find upstream=%lld in in_readies.", up_id)); auto max_ready_size = it->second.first; const auto& ready_scope_map = it->second.second; @@ -171,7 +171,7 @@ bool ComputeInterceptor::IsInputReady() { for (int64_t i = start_micro_step; i < start_micro_step + num_micro_step; ++i) { bool flag = true; - for (auto& ins : in_readys_) { + for (auto& ins : in_readies_) { auto ready_size_map = ins.second.second; flag = flag && (ready_size_map.at(i) != 0); } @@ -268,7 +268,7 @@ void ComputeInterceptor::SendDataReadyToDownStream() { } void ComputeInterceptor::ReplyCompletedToUpStream() { - for (auto& ins : in_readys_) { + for (auto& ins : in_readies_) { auto up_id = ins.first; auto ready_size = ins.second.second.at(cur_scope_id_); ready_size -= 1; diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h index 26205d5ac8264..bb26c62061734 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h @@ -41,7 +41,7 @@ class ComputeInterceptor : public Interceptor { // upstream_id-->(max_ready_size, scope-->ready_size) std::map>> - in_readys_{}; + in_readies_{}; // downstream_id-->(max_buffer_size, used_size) std::map> out_buffs_{}; diff --git a/paddle/fluid/distributed/index_dataset/CMakeLists.txt b/paddle/fluid/distributed/index_dataset/CMakeLists.txt index 0bd11cc214de4..7d6f963e48634 100644 --- a/paddle/fluid/distributed/index_dataset/CMakeLists.txt +++ b/paddle/fluid/distributed/index_dataset/CMakeLists.txt @@ -3,11 +3,11 @@ cc_library( index_wrapper SRCS index_wrapper.cc DEPS index_dataset_proto framework_io) -if(WITH_MKLDNN) +if(WITH_ONEDNN) cc_library( index_sampler SRCS index_sampler.cc - DEPS xxhash index_wrapper eigen3 mkldnn) + DEPS xxhash index_wrapper eigen3 onednn) else() cc_library( index_sampler diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index eac2585416d8b..42d9dbce2f4d8 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -11,6 +11,10 @@ else() endif() +if(WITH_PSCORE AND NOT WITH_HETERPS) + set(BRPC_DEPS ${BRPC_DEPS} ps_service) +endif() + brpc_library( sendrecv_rpc SRCS diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index a6bb716e6b7ad..64950443c0efc 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(generator) -set(EAGER_GENERETOR_DEPS +set(EAGER_GENERATOR_DEPS ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} pybind @@ -13,12 +13,12 @@ set(EAGER_GENERETOR_DEPS imperative_flag) if(WITH_CUSTOM_DEVICE) - set(EAGER_GENERETOR_DEPS ${EAGER_GENERETOR_DEPS} + set(EAGER_GENERATOR_DEPS ${EAGER_GENERATOR_DEPS} custom_device_common_op_registry) endif() add_executable(eager_generator eager_generator.cc) -target_link_libraries(eager_generator ${EAGER_GENERETOR_DEPS}) +target_link_libraries(eager_generator ${EAGER_GENERATOR_DEPS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(eager_generator ${os_dependency_modules}) @@ -93,13 +93,13 @@ if(WIN32) list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/openblas.dll) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) message("Copied mkldnn.dll for Eager AutoCodeGen") add_custom_command( OUTPUT ${eager_generator_path}/mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${eager_generator_path} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/mkldnn.dll) endif() diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py index f6892628f3b78..47bed1595a465 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py @@ -57,6 +57,7 @@ "conv3d_double_grad", "depthwise_conv2d_grad_grad", "concat_double_grad", + "stack_double_grad", "expand_grad", "argsort_grad", "eigh_grad", diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 128f159e1d0e1..c272e09a9579f 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -48,7 +48,7 @@ # so we should check parameter(output) with rule of inplace. # But because there is no check in old dygraph mode, in order to # keeping the code compatible, here we also skip inplace check in new dygraph temporarily, -# and this will be fixed in the futrue. +# and this will be fixed in the future. inplace_check_blacklist = {"assign_out_"} # Black Ops list that's NO NEED to apply code generation @@ -75,9 +75,12 @@ "tanh_triple_grad", "minimum_double_grad", "maximum_double_grad", + "abs_triple_grad", + "exp_double_grad", + "log_double_grad", ] -# white ops list whose kernel can automaically do type promotion. +# white ops list whose kernel can automatically do type promotion. # future will get this list from same place with static graph. type_promote_white_list = { "add": ["x", "y"], @@ -85,8 +88,8 @@ "where": ["x", "y"], } -# dict of special api that forward api's output will affect bacward api's output -# bacward api's output usually affected by backward api's input +# dict of special api that forward api's output will affect backward api's output +# backward api's output usually affected by backward api's input special_prune_dict = { "matmul_grad": {"x": "grad_y", "y": "grad_x"}, } @@ -289,7 +292,7 @@ class {} : public egr::GradNodeBase {{ // Forward API Call {} - // Log memory infomation + // Log memory information {} // Check NaN and Inf if needed {} @@ -343,7 +346,7 @@ class {} : public egr::GradNodeBase {{ {} // Forward API Call {} - // Log memory infomation + // Log memory information {} // Check NaN and Inf if needed {} @@ -535,8 +538,8 @@ class {} : public egr::GradNodeBase {{ """ TYPE_PROMOTION_LOGIC_TEMPLATE = """ if (phi::NeedTypePromotion({x}.dtype(), {y}.dtype())) {{ - VLOG(5) << "got different data type, run type protmotion automatically."; - LOG_FIRST_N(WARNING, 1) << "got different data type, run type protmotion automatically, this may cause data type been changed."; + VLOG(5) << "got different data type, run type promotion automatically."; + LOG_FIRST_N(WARNING, 1) << "got different data type, run type promotion automatically, this may cause data type been changed."; {op_name} auto promotion_type = phi::GetPromoteDtype(op_name, {x}.dtype(), {y}.dtype()); @@ -1128,7 +1131,7 @@ def GenerateNodeCreationCodes(self, for_backward=False, is_inplaced=False): need_pre_contiguous_set.add(name) set_tensor_wrappers = f"{indent}grad_node->SetTensorWrapper_{name}({name}_tmp);" set_input_tensor_wrappers_list.append(set_tensor_wrappers) - else: # Forwad's output as backward's input + else: # Forward's output as backward's input if num_fwd_outputs > 1: # Aligned with forward output position assert name in forward_outputs_position_map, AssertMessage( @@ -1830,9 +1833,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced): f"return {forward_ad_function_name}({amp_inputs_call_args_str});" ) if is_inplaced or (forward_api_name == "cast"): - amp_logic_str = "\n VLOG(5) << \" No AMP for {} because it is a inplace or cast api. \"; ".format( - forward_ad_function_name - ) + amp_logic_str = f"\n VLOG(5) << \" No AMP for {forward_ad_function_name} because it is a inplace or cast api. \"; " else: amp_logic_str = AMP_LOGIC_TEMPLATE.format( kernel_trans2_op_name_str, @@ -1859,11 +1860,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced): return_value=type_promote_call_list, ) else: - type_promotion_logic_str = ( - "\n VLOG(5) << \" No Type Promotion for {} api. \"; ".format( - forward_ad_function_name - ) - ) + type_promotion_logic_str = f"\n VLOG(5) << \" No Type Promotion for {forward_ad_function_name} api. \"; " # Forward layout autotune layout_autotune_list_str = " ".join( layout_autotune_list @@ -1897,9 +1894,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced): # Generate forward_definition_str and forward_declaration_str if self.is_forward_only: if len(amp_tensors_vector_list) == 0: - amp_logic_str = "\n VLOG(7) << \" No AMP for {} because it has no input. \"; ".format( - forward_ad_function_name - ) + amp_logic_str = f"\n VLOG(7) << \" No AMP for {forward_ad_function_name} because it has no input. \"; " self.forward_definition_str += ( FORWARD_ONLY_FUNCTION_TEMPLATE.format( returns_type_str, @@ -3063,7 +3058,7 @@ def GenerateForwardHFile(filepath, forward_function_declaration_str): for i in range(len(api_yaml_paths)): api_yaml_path = api_yaml_paths[i] - # string api is forwrad only + # string api is forward only if not api_yaml_path.endswith('strings_ops.yaml'): backward_yaml_path = backward_yaml_paths[i] else: diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 2a97f5bf35e90..ce7f7caf1f44c 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -261,6 +261,106 @@ void GradNodeBase::SetGradInMeta(const std::vector& fwd_out, } } +void GradNodeBase::SetGradInMeta(const std::vector& fwd_out, + size_t slot_rank) { + VLOG(7) << "Set GradSlotMeta for Grad Inputs"; + size_t slot_size = fwd_out.size(); + PADDLE_ENFORCE_LE( + slot_rank, + (bwd_in_meta_.size() - 1), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_in_meta_ size, since " + "bwd_in_meta_ is designed to hold as same num as backward " + "inputs.")); + auto& metas = bwd_in_meta_.at(slot_rank); + // Init stop gradient vector before use to avoid push back + if (metas.size() < slot_size) { + VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; + metas.resize(slot_size); + } + for (size_t i = 0; i < slot_size; i++) { + auto& meta = metas[i]; + const auto& fwd_out_tensor = *fwd_out[i]; + auto* fwd_out_meta = + egr::EagerUtils::nullable_autograd_meta(fwd_out_tensor); + PADDLE_ENFORCE_NOT_NULL(fwd_out_meta, + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be called while " + "autograd_meta is not null. If you got this " + "error, it indicates bugs in framework.")); + if (fwd_out_meta && fwd_out_meta->StopGradient()) { + // Set Stop Gradient only when its true or non-initialized autograd_meta, + // since all default value is false. + meta.SetStopGradient(fwd_out_meta->StopGradient()); + } + + if (!fwd_out_tensor.initialized()) { + if (fwd_out_tensor.defined() && fwd_out_tensor.is_dist_tensor() && + phi::distributed::NeedComputationClipForPP(fwd_out_tensor.impl())) { + VLOG(3) << "Tensor " << fwd_out_tensor.name() << " is DistTensor," + << " and needs computation clip for pipeline parallel." + << " Still SetGradInMeta for it."; + } else { + VLOG(7) << "Skip Configuring GradSlotMeta for uninitialized GradInput " + "Tensor"; + return; + } + } + + // Record TensorMeta + if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_out_tensor.impl().get()); + + PADDLE_ENFORCE_NE( + dense_tensor->meta().dtype, + phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_out_tensor.place()); + + if (dense_tensor->type() == phi::DataType::COMPLEX64 || + dense_tensor->type() == phi::DataType::COMPLEX128) { + need_complex_to_real_ = true; + } + } else if (phi::distributed::DistTensor::classof( + fwd_out_tensor.impl().get())) { + // Only Copy Meta + meta.SetDistAttr(static_cast( + fwd_out_tensor.impl().get()) + ->dist_attr()); + meta.SetDistTensorGlobalDims(static_cast( + fwd_out_tensor.impl().get()) + ->dims()); + SetIsRunAutoParallel(true); + + auto dense_tensor = static_cast( + fwd_out_tensor.impl().get()) + ->value(); + + PADDLE_ENFORCE_NE( + dense_tensor.meta().dtype, + phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor.meta()); + meta.SetPlace(fwd_out_tensor.place()); + + if (dense_tensor.type() == phi::DataType::COMPLEX64 || + dense_tensor.type() == phi::DataType::COMPLEX128) { + need_complex_to_real_ = true; + } + } else { + VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " + "with non-DenseTensor argument."; + } + } +} + void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in, size_t slot_rank) { auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in); diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 7b5e36f4d5cdc..73eedaba9e4f3 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -250,7 +250,8 @@ class GradNodeBase { void SetGradInMeta(const std::vector& fwd_out, size_t slot_rank); void SetGradInMeta(const paddle::Tensor& fwd_out, size_t slot_rank); - + void SetGradInMeta(const std::vector& fwd_out, + size_t slot_rank); void SetGradOutMeta(const std::vector& fwd_in, size_t slot_rank); void SetGradOutMeta(const std::vector& fwd_in, diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index 71a72db60d8cb..18e72c4f0782a 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -142,33 +142,6 @@ class TensorWrapper { } } -#ifndef PADDLE_NO_PYTHON - TensorWrapper(const TensorWrapper& other) { - no_need_buffer_ = other.no_need_buffer_; - intermidiate_tensor_ = other.intermidiate_tensor_; - weak_grad_node_ = other.weak_grad_node_; - inplace_version_snapshot_ = other.inplace_version_snapshot_; - packed_value_ = other.packed_value_; - unpack_hook_ = other.unpack_hook_; - if (packed_value_) { - packed_value_->inc_ref(); - } - } - - TensorWrapper& operator=(const TensorWrapper& other) { - no_need_buffer_ = other.no_need_buffer_; - intermidiate_tensor_ = other.intermidiate_tensor_; - weak_grad_node_ = other.weak_grad_node_; - inplace_version_snapshot_ = other.inplace_version_snapshot_; - packed_value_ = other.packed_value_; - unpack_hook_ = other.unpack_hook_; - if (packed_value_) { - packed_value_->inc_ref(); - } - return *this; - } -#endif - paddle::Tensor recover() { VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name() << " for wrapper"; diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 39ec0e7fe31a3..eaee4e9984f8d 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -22,7 +22,6 @@ #include "paddle/fluid/framework/tensor_ref_array.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/ir_adaptor/translator/program_translator.h" -#include "paddle/fluid/operators/run_program_op.h" #include "paddle/fluid/pir/transforms/pd_op_to_kernel_pass.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -33,9 +32,14 @@ #include "paddle/pir/include/core/program.h" #include "paddle/pir/include/core/value.h" +#ifdef PADDLE_WITH_DNNL +#include "paddle/fluid/platform/onednn_helper.h" +#endif + COMMON_DECLARE_bool(enable_pir_with_pt_in_dy2st); COMMON_DECLARE_bool(enable_pir_in_executor); COMMON_DECLARE_bool(print_ir); +COMMON_DECLARE_bool(use_mkldnn); namespace details { using Tensor = paddle::Tensor; @@ -91,39 +95,45 @@ static bool IsVariableRefArray(const Tensor &tensor) { static auto GetNameFromValue(const ::pir::Block *block, const std::vector<::pir::Value> &values, - bool is_input) { + bool allow_input, + bool allow_output) { + PADDLE_ENFORCE_EQ( + allow_input || allow_output, + true, + paddle::platform::errors::InvalidArgument( + "GetNameFromValue should allow input or output at least one.")); // we use name here, later value is used directly. std::unordered_map<::pir::Value, std::string> value2name; - if (is_input) { + if (allow_input) { for (auto &kwarg : block->kwargs()) { value2name[kwarg.second] = kwarg.first; } } for (auto &op : *block) { std::string name; - if (is_input && op.name() == "pd_op.data") { + if (allow_input && op.name() == "pd_op.data") { name = op.attributes().at("name").dyn_cast().AsString(); value2name[op.results()[0].Value::impl()] = name; - } else if (!is_input && op.name() == "builtin.set_parameter") { + } else if (allow_output && op.name() == "builtin.set_parameter") { name = op.attributes() .at("parameter_name") .dyn_cast() .AsString(); value2name[op.operand(0).source()] = name; - } else if (!is_input && op.name() == "builtin.shadow_output") { + } else if (allow_output && op.name() == "builtin.shadow_output") { name = op.attributes() .at("output_name") .dyn_cast() .AsString(); value2name[op.operand(0).source()] = name; - } else if (is_input && op.name() == "builtin.parameter") { + } else if (allow_input && op.name() == "builtin.parameter") { name = op.attributes() .at("parameter_name") .dyn_cast() .AsString(); value2name[op.result(0).Value::impl()] = name; - } else if (is_input && op.name() == "builtin.constant") { + } else if (allow_input && op.name() == "builtin.constant") { if (op.isa()) { name = op.dyn_cast().tensor_name(); value2name[op.result(0).Value::impl()] = name; @@ -248,12 +258,7 @@ static void ShareTensorsIntoScopeByValue( const std::vector &tensors, const std::vector<::pir::Value> &values, paddle::framework::Scope *scope) { - auto names = GetNameFromValue(block, values, true); - if (VLOG_IS_ON(4)) { - for (auto &s : names) { - VLOG(4) << "ShareTensorIntoScopeByValue name: " << s; - } - } + auto names = GetNameFromValue(block, values, true, false); ShareTensorsIntoScopeWithName(tensors, names, scope); } @@ -262,11 +267,16 @@ static void ShareTensorsFromScopeByValue( const std::vector &tensors, const std::vector<::pir::Value> &values, paddle::framework::Scope *scope) { - auto names = GetNameFromValue(block, values, false); + // NOTE(SigureMo): If the program has an inplace chain connecting + // an input value to an output value, the output value will be + // replaced with the input value, so we set the `allow_input` to + // `true` in `GetNameFromValue` + auto names = GetNameFromValue(block, values, true, true); for (size_t i = 0; i < tensors.size(); ++i) { auto &name = names[i]; auto &value = values[i]; - VLOG(2) << "share " << name << " from scope"; + VLOG(4) << "Share Tensor From Scope: " << name; + if (value.impl() == nullptr) { // skip stop_gradient. continue; @@ -524,20 +534,20 @@ inline void PirRunProgramAPI( // *backward_program); // update interpretercore skip_gc_var - auto skip_names = - details::GetNameFromValue(forward_global_block, middle_values, false); + auto skip_names = details::GetNameFromValue( + forward_global_block, middle_values, false, true); auto skip_names_set = std::set(skip_names.begin(), skip_names.end()); auto no_need_buffer_values = PADDLE_GET_CONST(std::vector<::pir::Value>, attrs.at("no_need_buffers")); auto no_need_buffer_names = details::GetNameFromValue( - forward_global_block, no_need_buffer_values, false); + forward_global_block, no_need_buffer_values, false, true); for (auto &name : no_need_buffer_names) { VLOG(4) << "Find no need buffer vars with name:" << name; skip_names_set.erase(name); } - skip_names = - details::GetNameFromValue(forward_global_block, output_values, false); + skip_names = details::GetNameFromValue( + forward_global_block, output_values, false, true); skip_names_set.insert(skip_names.begin(), skip_names.end()); details::print_collection(skip_names_set); interpreter_core->SetSkipGcVars(skip_names_set); @@ -1092,6 +1102,11 @@ inline void PirRunProgramGradAPI( // Step 1. share input_vars & parameters into scope auto passed_kernel_program = paddle::framework::ApplyIrPass(backward_program, place); + + const auto &new_block = passed_kernel_program->block(); + passed_kernel_program = paddle::framework::ApplyRemoveShadowFeedPass( + std::move(passed_kernel_program), new_block, place, global_inner_scope); + if (FLAGS_print_ir) { std::ostringstream print_stream; print_stream << "LoweredProgram( AfterPass | Backward ) is :\n"; @@ -1127,11 +1142,11 @@ inline void PirRunProgramGradAPI( // get all eager gc vars std::set skip_eager_delete_vars; - auto skip_names = - details::GetNameFromValue(backward_global_block, x_grad_values, false); + auto skip_names = details::GetNameFromValue( + backward_global_block, x_grad_values, false, true); skip_eager_delete_vars.insert(skip_names.begin(), skip_names.end()); - skip_names = - details::GetNameFromValue(backward_global_block, p_grad_values, false); + skip_names = details::GetNameFromValue( + backward_global_block, p_grad_values, false, true); skip_eager_delete_vars.insert(skip_names.begin(), skip_names.end()); interpreter_core->SetSkipGcVars(skip_eager_delete_vars); cache.UpdateSkipEagerDeleteVars(program_id, diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 4dc0db770727a..1659430d6216f 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -717,8 +717,8 @@ std::string EagerUtils::GradNodeStr(const egr::GradNodeBase& node) { in_slot_str += paddle::string::Sprintf(SLOT_INFO_TEMPLATE, i, sg_str, edges_str); } - std::string in_meta_str = - paddle::string::Sprintf(GRAD_SLOT_META_TEMPLATE, in_slot_str); + std::string in_meta_str = paddle::string::Sprintf( + GRAD_SLOT_META_TEMPLATE, in_metas.size(), in_slot_str); return paddle::string::Sprintf( GRAD_NODE_TEMPLATE, out_meta_str, in_meta_str); } else if (VLOG_IS_ON(5)) { diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4dfd8312f6153..62459827d3c39 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -153,10 +153,10 @@ if(WITH_XPU) target_link_libraries(var_type_traits dynload_xpti) endif() -# every source file that includes "dnnl.h" must depends on mkldnn -# or, the first one should depends on mkldnn -if(WITH_MKLDNN) - add_dependencies(var_type_traits mkldnn) +# every source file that includes "dnnl.h" must depends on onednn +# or, the first one should depends on onednn +if(WITH_ONEDNN) + add_dependencies(var_type_traits onednn) endif() set(BRPC_DEPS "") @@ -273,10 +273,10 @@ cc_library( SRCS shape_inference.cc DEPS phi common attribute selected_rows_utils) -# every source file that includes "dnnl.h" must depends on mkldnn -# or, the first one should depends on mkldnn -if(WITH_MKLDNN) - add_dependencies(shape_inference mkldnn) +# every source file that includes "dnnl.h" must depends on onednn +# or, the first one should depends on onednn +if(WITH_ONEDNN) + add_dependencies(shape_inference onednn) endif() cc_library( @@ -954,8 +954,8 @@ cc_library( DEPS common) target_link_libraries(type_info pir op_dialect) add_dependencies(type_info framework_proto auto_parallel_proto xxhash) -if(WITH_MKLDNN) - add_dependencies(type_info mkldnn) +if(WITH_ONEDNN) + add_dependencies(type_info onednn) endif() set(FLUID_FRAMEWORK_MODULES diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 039ed3ffc2441..1a70dca1ff4f1 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -26,7 +26,7 @@ class Variable; } // namespace paddle #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index d771a12411adb..20c1444f238eb 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -54,8 +54,8 @@ set(op_handle_deps selected_rows_utils reference_count_pass_helper) -if(WITH_MKLDNN) - set(op_handle_deps ${op_handle_deps} mkldnn) +if(WITH_ONEDNN) + set(op_handle_deps ${op_handle_deps} onednn) endif() if(WITH_DGC) @@ -161,6 +161,6 @@ cc_library( SRCS build_strategy.cc DEPS pass_builder ${IR_PASS_DEPS}) -if(WITH_MKLDNN) - target_link_libraries(build_strategy mkldnn_placement_pass) +if(WITH_ONEDNN) + target_link_libraries(build_strategy onednn_placement_pass) endif() diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index f49936bf44739..79578e5653a22 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -61,8 +61,6 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { // Note: This is a trick to support 0D-Tensor for CINN. This pass will be // removed in the near future. AppendPass("cinn_zero_tensor_trick_pass"); - // Note: This pass is used to enable cinn. - AppendPass("build_cinn_pass"); AppendPrintGraphPass("graph_viz_pass", "_build_cinn_graph"); } #endif @@ -78,7 +76,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { AppendMultiDevPass(); AppendMultiGraphOptPasses(); - AppendPassToSetMkldnnAttr("mkldnn_placement_pass"); + AppendPassToSetMkldnnAttr("onednn_placement_pass"); // runtime_context_cache pass should be the last pass to enable the attr of // all original and fused operators. But no operators can be enabled this // attr if putting it after MultiDevPass. @@ -179,7 +177,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { "delete_dropout_op_x_pass"); AppendPassWithCheck( strategy_.enable_inference_pass_ && strategy_.use_mkldnn_, - "mkldnn_placement_pass"); + "onednn_placement_pass"); // 2. trainning pass #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -480,7 +478,7 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, "GPU, skipped."; continue; } - } else if (pass->Type() == "mkldnn_placement_pass") { + } else if (pass->Type() == "onednn_placement_pass") { pass->Set("mkldnn_enabled_op_types", new std::unordered_set(mkldnn_enabled_op_types_)); } else if (pass->Type() == "backward_optimizer_op_deps_pass") { @@ -548,7 +546,7 @@ USE_PASS(build_cinn_pass); USE_PASS(fused_feedforward_pass); #endif #ifdef PADDLE_WITH_DNNL -USE_PASS(mkldnn_placement_pass); +USE_PASS(onednn_placement_pass); #endif #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && \ !defined(_WIN32) && !defined(__APPLE__) diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index e954fd6a7a348..c0c7e6765b4dc 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -141,8 +141,8 @@ struct BuildStrategy { // Fuse ResUnit bool fuse_resunit_{false}; // mkldnn_enabled_op_types specify the operator type list to - // use MKLDNN acceleration. It is null in default, means - // that all the operators supported by MKLDNN will be + // use OneDNN acceleration. It is null in default, means + // that all the operators supported by OneDNN will be // accelerated. And it should not be set when // FLAGS_use_mkldnn=false std::unordered_set mkldnn_enabled_op_types_; diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index fbc2565e755fa..9d6ac59018856 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -26,7 +26,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/common/flags.h" #include "paddle/fluid/framework/executor_gc_helper.h" @@ -609,7 +609,7 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { } #else LOG(WARNING) - << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"; + << "'MKLDNN' is not supported, Please re-compile with WITH_ONEDNN option"; #endif } } // namespace framework diff --git a/paddle/fluid/framework/executor_cache.cc b/paddle/fluid/framework/executor_cache.cc index 0be2a603502cb..9045ca0f6a17d 100644 --- a/paddle/fluid/framework/executor_cache.cc +++ b/paddle/fluid/framework/executor_cache.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/ir_adaptor/translator/translate.h" #include "paddle/fluid/pir/transforms/general/inplace_pass.h" +#include "paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.h" #include "paddle/fluid/pir/transforms/pd_op_to_kernel_pass.h" #include "paddle/pir/include/core/program.h" #include "paddle/pir/include/core/value.h" @@ -384,6 +385,28 @@ std::unique_ptr<::pir::Program> ApplyIrPass(::pir::Program *program, return ir_res; } +std::unique_ptr<::pir::Program> ApplyRemoveShadowFeedPass( + std::unique_ptr<::pir::Program> program, + const pir::Block *block, + const phi::Place &place, + const paddle::framework::Scope *scope) { + ::pir::PassManager pm(::pir::IrContext::Instance(), 3); + auto pass = ::pir::CreateRemoveShadowFeedPass(); + pass->SetNotOwned("top_block", block); + pass->SetNotOwned(pir::Pass::kPlaceAttr, &place); + pass->SetNotOwned(pir::Pass::kParamScopeAttr, scope); + pm.AddPass(std::move(pass)); + pm.Run(program.get()); + + if (FLAGS_print_ir) { + std::cout << "IR After RemoveShadowFeedPass -------------------" + << std::endl; + std::cout << *program << std::endl; + } + + return program; +} + std::unique_ptr<::pir::Program> ConstructForwardIrProgram( const paddle::framework::BlockDesc *forward_global_block, const paddle::framework::BlockDesc *backward_global_block, diff --git a/paddle/fluid/framework/executor_cache.h b/paddle/fluid/framework/executor_cache.h index f9afaabec79dc..1e5136892d13f 100644 --- a/paddle/fluid/framework/executor_cache.h +++ b/paddle/fluid/framework/executor_cache.h @@ -273,6 +273,12 @@ std::shared_ptr CreatePirInterpreterCoreInfoToCache( std::unique_ptr<::pir::Program> ApplyIrPass(::pir::Program* program, phi::Place place); +std::unique_ptr<::pir::Program> ApplyRemoveShadowFeedPass( + const std::unique_ptr<::pir::Program> program, + const pir::Block* block, + const phi::Place& place, + const paddle::framework::Scope* scope); + std::unique_ptr<::pir::Program> ConstructForwardIrProgram( const paddle::framework::BlockDesc* forward_global_block, const paddle::framework::BlockDesc* backward_global_block, diff --git a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu index 7ad502c89af92..58ab45db3e940 100644 --- a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu +++ b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu @@ -355,22 +355,26 @@ void AccessorWrapper::CopyForPushImpl( int64_t* gpu_len = reinterpret_cast(buf_length->ptr()); int* d_slot_vector = reinterpret_cast(buf_slot_vector->ptr()); int* d_mf_dim_vector = reinterpret_cast(buf_mf_dim_vector->ptr()); - cudaMemcpy(gpu_values, - grad_values.data(), - grad_values.size() * sizeof(float*), - cudaMemcpyHostToDevice); - cudaMemcpy(gpu_len, - slot_lengths_lod.data(), - slot_lengths.size() * sizeof(int64_t), - cudaMemcpyHostToDevice); - cudaMemcpy(d_slot_vector, - slot_vector.data(), - slot_lengths_lod.size() * sizeof(int), - cudaMemcpyHostToDevice); - cudaMemcpy(d_mf_dim_vector, - slot_mf_dim_vector.data(), - slot_lengths_lod.size() * sizeof(int), - cudaMemcpyHostToDevice); + cudaMemcpyAsync(gpu_values, + grad_values.data(), + grad_values.size() * sizeof(float*), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(gpu_len, + slot_lengths_lod.data(), + slot_lengths.size() * sizeof(int64_t), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(d_slot_vector, + slot_vector.data(), + slot_lengths_lod.size() * sizeof(int), + cudaMemcpyHostToDevice, + stream); + cudaMemcpyAsync(d_mf_dim_vector, + slot_mf_dim_vector.data(), + slot_lengths_lod.size() * sizeof(int), + cudaMemcpyHostToDevice, + stream); PushCopyWithPool<<<(total_length + 1024 - 1) / 1024, 1024, 0, stream>>>( total_grad_values_gpu, gpu_values, diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h index 069dfeeec157b..49a1592348895 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h @@ -1631,6 +1631,7 @@ void HeterComm::pull_merge_sparse( val_type_size); } + AnyDeviceGuard guard2(dev_id); auto d_merged_vals = MemoryAlloc(place, uniq_len * val_type_size); auto d_merged_vals_ptr = reinterpret_cast(d_merged_vals->ptr()); heter_comm_kernel_->dy_mf_fill_dvals(d_shard_vals_ptr, diff --git a/paddle/fluid/framework/io/CMakeLists.txt b/paddle/fluid/framework/io/CMakeLists.txt index 82f879bce353b..8d55a10ee3310 100644 --- a/paddle/fluid/framework/io/CMakeLists.txt +++ b/paddle/fluid/framework/io/CMakeLists.txt @@ -14,6 +14,6 @@ cc_library( SRCS ${framework_io_srcs} DEPS ${framework_io_deps}) -if(WITH_MKLDNN) - add_dependencies(framework_io mkldnn) +if(WITH_ONEDNN) + add_dependencies(framework_io onednn) endif() diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index cb8093298d9bb..95c5d1ec796cc 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -174,42 +174,42 @@ if(WITH_GPU OR WITH_ROCM) pass_library(embedding_eltwise_layernorm_fuse_pass inference) endif() -if(WITH_MKLDNN) - pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) - pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) - pass_library(conv_affine_channel_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn) - pass_library(params_quantization_mkldnn_pass inference DIR mkldnn) - pass_library(scale_matmul_fuse_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_pass inference DIR mkldnn) - pass_library(fc_mkldnn_pass inference DIR mkldnn) - pass_library(interpolate_mkldnn_pass inference DIR mkldnn) - pass_library(softplus_activation_onednn_fuse_pass inference DIR mkldnn) - pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) - pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elementwise_act_onednn_fuse_pass inference DIR mkldnn) - pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(operator_scale_onednn_fuse_pass inference DIR mkldnn) - pass_library(quant_transpose2_dequant_onednn_fuse_pass inference DIR mkldnn) - pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR mkldnn) - pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR mkldnn) - pass_library(operator_reshape2_onednn_fuse_pass inference DIR mkldnn) - pass_library(cpu_quantize_placement_pass base DIR mkldnn) - pass_library(cpu_quantize_pass inference DIR mkldnn) - pass_library(cpu_quantize_squash_pass inference DIR mkldnn) - pass_library(reshape_transpose_matmul_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(matmul_transpose_reshape_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(batch_norm_act_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) - pass_library(quant_dequant_mkldnn_pass inference DIR mkldnn) - pass_library(compute_propagate_scales_mkldnn_pass inference DIR mkldnn) - pass_library(self_attention_fuse_pass inference DIR mkldnn) +if(WITH_ONEDNN) + pass_library(onednn_placement_pass base DEPS placement_pass_base DIR onednn) + pass_library(depthwise_conv_onednn_pass base DIR onednn) + pass_library(conv_affine_channel_onednn_fuse_pass inference DIR onednn) + pass_library(conv_bias_onednn_fuse_pass inference DIR onednn) + pass_library(conv_activation_onednn_fuse_pass inference DIR onednn) + pass_library(conv_elementwise_add_onednn_fuse_pass inference DIR onednn) + pass_library(int8_scale_calculation_onednn_pass inference DIR onednn) + pass_library(params_quantization_onednn_pass inference DIR onednn) + pass_library(scale_matmul_fuse_pass inference DIR onednn) + pass_library(cpu_bfloat16_placement_pass inference DIR onednn) + pass_library(cpu_bfloat16_pass inference DIR onednn) + pass_library(fc_onednn_pass inference DIR onednn) + pass_library(interpolate_onednn_pass inference DIR onednn) + pass_library(softplus_activation_onednn_fuse_pass inference DIR onednn) + pass_library(shuffle_channel_onednn_detect_pass inference DIR onednn) + pass_library(fc_act_onednn_fuse_pass inference DIR onednn) + pass_library(elementwise_act_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_elementwise_add_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_activation_onednn_fuse_pass inference DIR onednn) + pass_library(operator_scale_onednn_fuse_pass inference DIR onednn) + pass_library(quant_transpose2_dequant_onednn_fuse_pass inference DIR onednn) + pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR onednn) + pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR onednn) + pass_library(operator_reshape2_onednn_fuse_pass inference DIR onednn) + pass_library(cpu_quantize_placement_pass base DIR onednn) + pass_library(cpu_quantize_pass inference DIR onednn) + pass_library(cpu_quantize_squash_pass inference DIR onednn) + pass_library(reshape_transpose_matmul_onednn_fuse_pass inference DIR onednn) + pass_library(matmul_transpose_reshape_onednn_fuse_pass inference DIR onednn) + pass_library(batch_norm_act_fuse_pass inference DIR onednn) + pass_library(multi_gru_fuse_pass inference DIR onednn) + pass_library(multi_gru_seq_fuse_pass inference DIR onednn) + pass_library(quant_dequant_onednn_pass inference DIR onednn) + pass_library(compute_propagate_scales_onednn_pass inference DIR onednn) + pass_library(self_attention_fuse_pass inference DIR onednn) if(WITH_AVX AND AVX512F_FOUND AND AVX512F_FLAG) @@ -274,6 +274,8 @@ if(WITH_XPU) ${XPU_PASS_DEPS}) pass_library(decoder_attention_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(cross_attention_xpu_fuse_pass inference DIR xpu DEPS + ${XPU_PASS_DEPS}) pass_library(multi_encoder_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(multi_encoder_xpu_adaptive_seqlen_fuse_pass inference DIR xpu @@ -301,6 +303,8 @@ if(WITH_XPU) ${XPU_PASS_DEPS}) pass_library(add_layernorm_xpu_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(group_norm_silu_xpu_fuse_pass inference DIR xpu DEPS + ${XPU_PASS_DEPS}) pass_library(xpu_delete_cast_op_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(fold_interp_outsize_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) @@ -324,6 +328,8 @@ if(WITH_XPU) pass_library(quant_dequant_xpu_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(roformer_relative_pos_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) + pass_library(spatial_transformer_resblock_xpu_fuse_pass inference DIR xpu + DEPS ${XPU_PASS_DEPS}) endif() cc_library( @@ -536,19 +542,19 @@ if(NOT WIN32) SRCS dense_multihead_matmul_to_sparse_pass_tester.cc DEPS multihead_matmul_fuse_pass dense_multihead_matmul_to_sparse_pass) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) cc_test( - test_depthwise_conv_mkldnn_pass - SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc - DEPS depthwise_conv_mkldnn_pass) + test_depthwise_conv_onednn_pass + SRCS onednn/depthwise_conv_onednn_pass_tester.cc + DEPS depthwise_conv_onednn_pass) cc_test( - test_int8_scale_calculation_mkldnn_pass - SRCS mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc - DEPS int8_scale_calculation_mkldnn_pass pass_test_util) + test_int8_scale_calculation_onednn_pass + SRCS onednn/int8_scale_calculation_onednn_pass_tester.cc + DEPS int8_scale_calculation_onednn_pass pass_test_util) cc_test( - test_params_quantization_mkldnn_pass - SRCS mkldnn/params_quantization_mkldnn_pass_tester.cc - DEPS params_quantization_mkldnn_pass) + test_params_quantization_onednn_pass + SRCS onednn/params_quantization_onednn_pass_tester.cc + DEPS params_quantization_onednn_pass) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass @@ -566,43 +572,43 @@ if(WITH_MKLDNN) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) endif() cc_test( - test_mkldnn_placement_pass - SRCS mkldnn/mkldnn_placement_pass_tester.cc - DEPS mkldnn_placement_pass) + test_onednn_placement_pass + SRCS onednn/onednn_placement_pass_tester.cc + DEPS onednn_placement_pass) cc_test( - test_compute_propagate_scales_mkldnn_pass - SRCS mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc - DEPS compute_propagate_scales_mkldnn_pass naive_executor) + test_compute_propagate_scales_onednn_pass + SRCS onednn/compute_propagate_scales_onednn_pass_tester.cc + DEPS compute_propagate_scales_onednn_pass naive_executor) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will # be build only in CI, so suppose the generator in Windows is Ninja. - copy_onnx(test_compute_propagate_scales_mkldnn_pass) + copy_onnx(test_compute_propagate_scales_onednn_pass) endif() cc_test( test_cpu_quantize_placement_pass - SRCS mkldnn/cpu_quantize_placement_pass_tester.cc + SRCS onednn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) cc_test( test_cpu_quantize_pass - SRCS mkldnn/cpu_quantize_pass_tester.cc + SRCS onednn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) cc_test( test_cpu_quantize_squash_pass - SRCS mkldnn/cpu_quantize_squash_pass_tester.cc + SRCS onednn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) cc_test( - test_shuffle_channel_mkldnn_detect_pass - SRCS mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc - DEPS shuffle_channel_mkldnn_detect_pass) + test_shuffle_channel_onednn_detect_pass + SRCS onednn/shuffle_channel_onednn_detect_pass_tester.cc + DEPS shuffle_channel_onednn_detect_pass) cc_test( test_cpu_bfloat16_placement_pass - SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc + SRCS onednn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass) cc_test( test_cpu_bfloat16_pass - SRCS mkldnn/cpu_bfloat16_pass_tester.cc + SRCS onednn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) endif() diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index eda982bf77866..b91132784b95f 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -773,15 +773,6 @@ bool AutoMixedPrecisionPass::OutputVarsNotConvert( if (std::find(vecs.begin(), vecs.end(), var_name) != vecs.end()) { return true; } - } else if (GetOpOriginalType(op_desc->Type()) == "instance_norm") { - auto vecs = op_desc->Output("SavedMean"); - if (std::find(vecs.begin(), vecs.end(), var_name) != vecs.end()) { - return true; - } - vecs = op_desc->Output("SavedVariance"); - if (std::find(vecs.begin(), vecs.end(), var_name) != vecs.end()) { - return true; - } } return false; diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 4faebacb5f55c..947dc73333e0c 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc index 52ba852a730a5..cd823afa96dd4 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -16,6 +16,9 @@ #include #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -194,7 +197,11 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const { auto new_op_proto = PrepareOpDesc( base_op_desc, bias_name, bias1_name, act_op_type, act_op_out); framework::OpDesc new_op_desc(new_op_proto, nullptr); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); } diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc index a560c0ab52e5a..0f5f2f7cc78b6 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -15,6 +15,9 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -215,7 +218,11 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const { auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out, alpha); framework::OpDesc new_op_desc(new_op_proto, nullptr); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); new_op_desc.Flush(); } diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc index a3defa9f3ed06..4a0dd02db0f24 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -15,6 +15,9 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" #include "paddle/fluid/framework/ir/cutlass_teller.h" #include "paddle/fluid/framework/op_version_registry.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif namespace paddle { namespace framework { @@ -121,14 +124,18 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const { static_cast(Get("model_precision")) == phi::DataType::FLOAT16 || Get("enable_gpu_mixed"); + bool cutlass_enable = Get("use_cutlass"); auto* scope = param_scope(); bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport( conv_op->Op(), scope, act_type, Get("gpu_device_id")); - if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) { + int sm = 0; +#ifdef PADDLE_WITH_CUDA + sm = platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()); +#endif + if (cutlass_can_fuse && cutlass_enable && (is_fp16_precision || sm >= 80)) { new_op_desc.SetAttr("use_cudnn", false); } - auto* elementwise_add_op_desc = elementwise_add_op->Op(); auto out_threshold_attr = elementwise_add_op_desc->GetNullableAttr("out_threshold"); diff --git a/paddle/fluid/framework/ir/generate_pass_tester.cc b/paddle/fluid/framework/ir/generate_pass_tester.cc index 58a3741a924aa..f0f9330259fff 100644 --- a/paddle/fluid/framework/ir/generate_pass_tester.cc +++ b/paddle/fluid/framework/ir/generate_pass_tester.cc @@ -32,8 +32,7 @@ REGISTER_GENERATE_PASS(generate_fc_fuse) { } }; // replace - SUBGRAPH_(replace) = [subgraph = &replace, with_relu]( - VAR_(x), VAR_(y), VAR_(z)) { + SUBGRAPH_(replace) = [subgraph = &replace](VAR_(x), VAR_(y), VAR_(z)) { auto& fc = OP_(fc)({{"Input", x}, {"W", y}, {"Bias", z}}); return fc.Out("Out"); }; diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc index 0398117e08b8f..796e9a5e7f0a9 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc @@ -305,13 +305,6 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const { auto recurrent_op_eager_deletion_pass = ir::PassRegistry::Instance().Get("recurrent_op_eager_deletion_pass"); recurrent_op_eager_deletion_pass->Apply(graph); - -#ifdef PADDLE_WITH_CINN - auto share_varinfo_into_cinn_pass = - ir::PassRegistry::Instance().Get("share_varinfo_into_cinn_pass"); - share_varinfo_into_cinn_pass->SetNotOwned(kMemOptVarInfoMapList, &var_infos); - share_varinfo_into_cinn_pass->Apply(graph); -#endif } } // namespace ir diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc index f48897674143a..4dde29316d23e 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" -#include "paddle/fluid/operators/cinn/cinn_launch_op.h" +#include "paddle/fluid/operators/cinn/cinn_op_helper.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/utils/string/string_helper.h" diff --git a/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h index 44631c54ef556..138d66731b54c 100644 --- a/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_desc.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc index 230971a2dd286..788644dc85876 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h b/paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/batch_norm_act_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc index b94c99c92cdbb..1c733636ca7b0 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h" #include @@ -487,8 +487,8 @@ void ComputePropagateScalesMkldnnPass::PropagateScales( } void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { - VLOG(3) << "Convert paddle model to mkldnn quantized model."; - const std::string pattern_name = "compute_propagate_scales_mkldnn_pass"; + VLOG(3) << "Convert paddle model to onednn quantized model."; + const std::string pattern_name = "compute_propagate_scales_onednn_pass"; FusePassBase::Init(pattern_name, graph); const std::unordered_set scale_immutable_ops = { @@ -520,10 +520,10 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(compute_propagate_scales_mkldnn_pass, +REGISTER_PASS(compute_propagate_scales_onednn_pass, paddle::framework::ir::ComputePropagateScalesMkldnnPass); -REGISTER_PASS_CAPABILITY(compute_propagate_scales_mkldnn_pass) +REGISTER_PASS_CAPABILITY(compute_propagate_scales_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h index 2c2474438bedf..b63c74a884118 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc index c09a2d1ffbb8d..9664647fd4214 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass_tester.cc @@ -15,7 +15,7 @@ #include #include -#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/compute_propagate_scales_onednn_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc index 077a29d113bb7..61c0457f7c740 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -42,18 +42,18 @@ void ConvActivationMkldnnFusePass::FuseConvAct(Graph* graph, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(conv_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(conv_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation conv_act_pattern(gpd.mutable_pattern(), - "conv_activation_mkldnn_fuse"); + "conv_activation_onednn_fuse"); conv_act_pattern(conv_type, act_type); int found_conv_activation_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { if (!IsCompat(subgraph, g)) { - LOG(WARNING) << "conv_activation_mkldnn_fuse_pass op compat failed."; + LOG(WARNING) << "conv_activation_onednn_fuse_pass op compat failed."; return; } @@ -92,12 +92,12 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct( Graph* graph, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("conv2d_concat_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init("conv2d_concat_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; auto pattern = gpd.mutable_pattern(); patterns::OperatorActivation conv_concat_act( - pattern, "conv2d_concat_" + act_type + "_mkldnn_fuse_pass"); + pattern, "conv2d_concat_" + act_type + "_onednn_fuse_pass"); conv_concat_act("concat", act_type); int found_conv_concat_activation_count = 0; @@ -105,7 +105,7 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct( Graph* g) { if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "conv_concat_activation_mkldnn_fuse_pass op compat failed."; + << "conv_concat_activation_onednn_fuse_pass op compat failed."; return; } @@ -377,10 +377,10 @@ ConvActivationMkldnnFusePass::ConvActivationMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(conv_activation_mkldnn_fuse_pass, +REGISTER_PASS(conv_activation_onednn_fuse_pass, paddle::framework::ir::ConvActivationMkldnnFusePass); -REGISTER_PASS_CAPABILITY(conv_activation_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_activation_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h index b50fa8997fdf8..9821421254c66 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_activation_onednn_fuse_pass.h @@ -33,9 +33,9 @@ class ConvActivationMkldnnFusePass : public FusePassBase { void FuseConvAct(Graph *graph, const std::string &conv_type, - std::string &act_type) const; + std::string &act_type) const; // NOLINT - void FuseConvConcatAct(Graph *graph, std::string &act_type) const; + void FuseConvConcatAct(Graph *graph, std::string &act_type) const; // NOLINT }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc index eedb5b3b60bd5..5ee6e361bcc92 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h" #include @@ -313,10 +313,10 @@ void ConvAffineChannelFusePass::FuseConvAffineChannel( } // namespace framework } // namespace paddle -REGISTER_PASS(conv_affine_channel_mkldnn_fuse_pass, +REGISTER_PASS(conv_affine_channel_onednn_fuse_pass, paddle::framework::ir::ConvAffineChannelFusePass); -REGISTER_PASS_CAPABILITY(conv_affine_channel_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_affine_channel_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h index cc0a761c31ed2..49545ad565e52 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_affine_channel_onednn_fuse_pass.h @@ -38,7 +38,7 @@ class ConvAffineChannelFusePass : public FusePassBase { void ApplyImpl(ir::Graph*) const override; void FuseConvAffineChannel(ir::Graph* graph, const std::string& conv_type) const; - const std::string name_scope_{"conv_affine_channel_mkldnn_fuse"}; + const std::string name_scope_{"conv_affine_channel_onednn_fuse"}; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc index 0aa71c3df5fb5..1cf663d13deef 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h" #include #include @@ -448,21 +448,21 @@ void ConvBiasFusePass::FuseConvBias(ir::Graph* graph, } // namespace ir } // namespace framework } // namespace paddle -REGISTER_PASS(conv_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv_bias_onednn_fuse_pass, paddle::framework::ir::ConvBiasFusePass); -REGISTER_PASS_CAPABILITY(conv_bias_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_bias_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) .LE("elementwise_add", 1)); -REGISTER_PASS(conv_transpose_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv_transpose_bias_onednn_fuse_pass, paddle::framework::ir::Conv2DTransposeBiasFusePass); -REGISTER_PASS_CAPABILITY(conv_transpose_bias_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_transpose_bias_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d_transpose", 2) .LE("elementwise_add", 1)); -REGISTER_PASS(conv3d_bias_mkldnn_fuse_pass, +REGISTER_PASS(conv3d_bias_onednn_fuse_pass, paddle::framework::ir::Conv3DBiasFusePass); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h index 4fb8418686299..f53cdf19d29f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/conv_bias_onednn_fuse_pass.h @@ -40,7 +40,7 @@ class ConvBiasFusePass : public FusePassBase { const std::string& conv_type, const std::string& fused_conv) const; - const std::string name_scope_{"conv_bias_mkldnn_fuse"}; + const std::string name_scope_{"conv_bias_onednn_fuse"}; }; /* diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc index fecf4a4eaf5f8..7733730f7d605 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -156,12 +156,12 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConv( if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "conv_elementwise_add_mkldnn_fuse_pass in op compat failed."; + << "conv_elementwise_add_onednn_fuse_pass in op compat failed."; return; } if (residual_data->Var()->GetShape() != conv_output->Var()->GetShape()) { - LOG(WARNING) << "conv_elementwise_add_mkldnn_fuse_pass doesn't support " - + LOG(WARNING) << "conv_elementwise_add_onednn_fuse_pass doesn't support " - "broadcasting"; return; } @@ -235,7 +235,7 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "op compat for conv_elementwise_add_mkldnn_fuse_pass failed."; + << "op compat for conv_elementwise_add_onednn_fuse_pass failed."; return; } @@ -309,9 +309,9 @@ void ResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass, +REGISTER_PASS(conv_elementwise_add_onednn_fuse_pass, paddle::framework::ir::ResidualConnectionMKLDNNFusePass); -REGISTER_PASS_CAPABILITY(conv_elementwise_add_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(conv_elementwise_add_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/conv_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc index 528ba5747218a..1cebbfc1617a0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.cc @@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h" #include #include diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc index 951d064364ce3..c31e59b39216a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_pass.h" #include "paddle/fluid/imperative/type_defs.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc index 8741b00f689f5..a07887dafb276 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h" #include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc index c420c616a9ca6..e2de24cc398e0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass_tester.cc @@ -14,8 +14,8 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/cpu_bfloat16_placement_pass.h" +#include "paddle/fluid/platform/onednn_helper.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc index 0e9c452455de3..a512f4b8021f4 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h" #include #include #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc index c7e15e24216aa..3c1f4d8d60925 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_pass_tester.cc @@ -16,7 +16,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc index 2071f284126b7..56ba19a5cc22b 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h" #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc index 5cbd64c49d200..bd5db7c0e3df2 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass_tester.cc @@ -14,8 +14,8 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_placement_pass.h" +#include "paddle/fluid/platform/onednn_helper.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc index 578ab67f2a3b7..91f878a16abd0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.cc @@ -13,13 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h" #include #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc index 89e57108b17ef..fc57bdb6b52ef 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" +#include "paddle/fluid/framework/ir/onednn/cpu_quantize_squash_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/phi/common/place.h" @@ -120,8 +120,9 @@ ProgramDesc BuildConvRequantProgramDesc(bool use_mkldnn, float scale_out, float scale_in) { ProgramDesc prog; - for (auto& v : std::initializer_list( - {"a", "w1", "b1", "d", "e", "f", "w2", "b2", "i"})) { + const std::vector values = { + "a", "w1", "b1", "d", "e", "f", "w2", "b2", "i"}; + for (auto& v : values) { auto* var = prog.MutableBlock(0)->Var(v); if (v.find("w") == 0 || v.find("b") == 0) { var->SetPersistable(true); @@ -240,7 +241,7 @@ ProgramDesc BuildOpRequantProgramDesc(bool use_mkldnn, {"h"}, use_mkldnn, {matmul_scale, requant_scale3}); - SetOp(&prog, "concat", "Concat", {"c", "f", "h"}, {"g"}, {use_mkldnn}); + SetOp(&prog, "concat", "Concat", {"c", "f", "h"}, {"g"}, use_mkldnn); return prog; } @@ -683,7 +684,7 @@ ProgramDesc BuildRequantOpProgramDesc(bool use_mkldnn, {"h"}, use_mkldnn, {op_scale_in, op_scale_out}); - SetOp(&prog, "concat", "Concat", {"b", "e", "h"}, {"i"}, {use_mkldnn}); + SetOp(&prog, "concat", "Concat", {"b", "e", "h"}, {"i"}, use_mkldnn); return prog; } diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc similarity index 90% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc index fca71d0bd6900..703a2c685e770 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -76,7 +76,7 @@ DepthwiseConvMKLDNNPass::DepthwiseConvMKLDNNPass() { // NOLINT void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL( graph, platform::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("depthwise_conv_mkldnn_pass", graph); + FusePassBase::Init("depthwise_conv_onednn_pass", graph); GraphPatternDetector gpd; auto* pattern = gpd.mutable_pattern(); @@ -84,7 +84,7 @@ void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { ->assert_is_op("depthwise_conv2d") ->assert_op_attr("use_mkldnn", true); - int found_depthwise_conv_mkldnn_count = 0; + int found_depthwise_conv_onednn_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { if (!IsCompat(subgraph, g)) { @@ -94,20 +94,20 @@ void DepthwiseConvMKLDNNPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "handle DepthwiseConvMKLDNN fuse"; GET_NODE(depthwise_conv, (*pattern)); depthwise_conv->Op()->SetType("conv2d"); - found_depthwise_conv_mkldnn_count++; + found_depthwise_conv_onednn_count++; }; gpd(graph, handler); - AddStatis(found_depthwise_conv_mkldnn_count); + AddStatis(found_depthwise_conv_onednn_count); } } // namespace ir } // namespace framework } // namespace paddle -REGISTER_PASS(depthwise_conv_mkldnn_pass, +REGISTER_PASS(depthwise_conv_onednn_pass, paddle::framework::ir::DepthwiseConvMKLDNNPass); -REGISTER_PASS_CAPABILITY(depthwise_conv_mkldnn_pass) +REGISTER_PASS_CAPABILITY(depthwise_conv_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "depthwise_conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc similarity index 89% rename from paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc index f74e95fff10d8..5fdb7ad959921 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/depthwise_conv_onednn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -105,7 +105,7 @@ ProgramDesc BuildProgramDesc() { TEST(DepthwiseConvMKLDNNPass, pass_op_version_check) { ASSERT_TRUE( paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance() - .IsPassCompatible("depthwise_conv_mkldnn_pass")); + .IsPassCompatible("depthwise_conv_onednn_pass")); } TEST(DepthwiseConvMKLDNNPass, basic) { @@ -113,12 +113,12 @@ TEST(DepthwiseConvMKLDNNPass, basic) { std::unique_ptr graph(new ir::Graph(prog)); - auto pass = PassRegistry::Instance().Get("depthwise_conv_mkldnn_pass"); + auto pass = PassRegistry::Instance().Get("depthwise_conv_onednn_pass"); struct counters { - int mkldnn_depthwise_conv_nodes; + int onednn_depthwise_conv_nodes; int other_depthwise_conv_nodes; - int mkldnn_conv_nodes; + int onednn_conv_nodes; int other_conv_nodes; }; @@ -134,12 +134,12 @@ TEST(DepthwiseConvMKLDNNPass, basic) { auto* op = node->Op(); if (op->Type() == "conv2d") { if (PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn"))) - after.mkldnn_conv_nodes++; + after.onednn_conv_nodes++; else after.other_conv_nodes++; } else if (op->Type() == "depthwise_conv2d") { if (PADDLE_GET_CONST(bool, op->GetAttr("use_mkldnn"))) - after.mkldnn_depthwise_conv_nodes++; + after.onednn_depthwise_conv_nodes++; else after.other_depthwise_conv_nodes++; } @@ -149,13 +149,13 @@ TEST(DepthwiseConvMKLDNNPass, basic) { EXPECT_EQ(after.other_depthwise_conv_nodes, before.other_depthwise_conv_nodes); EXPECT_EQ(after.other_conv_nodes, before.other_conv_nodes); - EXPECT_EQ(after.mkldnn_depthwise_conv_nodes, - before.mkldnn_depthwise_conv_nodes - 1); - EXPECT_EQ(after.mkldnn_conv_nodes, before.mkldnn_conv_nodes + 1); + EXPECT_EQ(after.onednn_depthwise_conv_nodes, + before.onednn_depthwise_conv_nodes - 1); + EXPECT_EQ(after.onednn_conv_nodes, before.onednn_conv_nodes + 1); } } // namespace ir } // namespace framework } // namespace paddle -USE_PASS(depthwise_conv_mkldnn_pass); +USE_PASS(depthwise_conv_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc index b6e84145aebff..3f0423870d366 100644 --- a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -43,7 +43,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( const std::string &act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(elt_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(elt_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation elementwise_act_pattern(gpd.mutable_pattern(), diff --git a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/elementwise_act_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc similarity index 89% rename from paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc index 47c76289d187c..aa4ee8cb5e767 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -34,11 +34,11 @@ void FuseFCActOneDNNPass::FuseFCAct(Graph *graph, const std::string &act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init("fc_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init("fc_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation fc_act_pattern( - gpd.mutable_pattern(), "fc_" + act_type + "_mkldnn_fuse_pass"); + gpd.mutable_pattern(), "fc_" + act_type + "_onednn_fuse_pass"); fc_act_pattern("fc", act_type); int found_fc_act_count = 0; @@ -70,9 +70,9 @@ void FuseFCActOneDNNPass::FuseFCAct(Graph *graph, } // namespace framework } // namespace paddle -REGISTER_PASS(fc_act_mkldnn_fuse_pass, +REGISTER_PASS(fc_act_onednn_fuse_pass, paddle::framework::ir::FuseFCActOneDNNPass); -REGISTER_PASS_CAPABILITY(fc_act_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(fc_act_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("fc", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/fc_act_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc index f4396d6d8175a..082579428a01a 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/fc_onednn_pass.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -33,10 +33,10 @@ void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - Init("fc_mkldnn_pass", graph); + Init("fc_onednn_pass", graph); GraphPatternDetector gpd; - patterns::FCMKLDNN fc_pattern(gpd.mutable_pattern(), "fc_mkldnn_pass"); + patterns::FCMKLDNN fc_pattern(gpd.mutable_pattern(), "fc_onednn_pass"); // searching for fc+residual doesn't make sense at this stage fc_pattern(false /*with_residual*/); @@ -89,4 +89,4 @@ void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(fc_mkldnn_pass, paddle::framework::ir::FCMKLDNNPass); +REGISTER_PASS(fc_onednn_pass, paddle::framework::ir::FCMKLDNNPass); diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/fc_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/fc_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc index a219e47072782..499a7734d71d6 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/core/enforce.h" namespace paddle { @@ -110,10 +110,10 @@ void Int8ScaleCalculationMkldnnPass::Int8ScaleImpl( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init("int8_scale_calculation_mkldnn_pass", graph); + FusePassBase::Init("int8_scale_calculation_onednn_pass", graph); GraphPatternDetector gpd; patterns::Conv conv_pattern(gpd.mutable_pattern(), - "int8_scale_calculation_mkldnn_pass"); + "int8_scale_calculation_onednn_pass"); conv_pattern(conv_type); int found_int8_scales_count = 0; @@ -214,9 +214,9 @@ void Int8ScaleCalculationMkldnnPass::Int8ScaleImpl( } // namespace framework } // namespace paddle -REGISTER_PASS(int8_scale_calculation_mkldnn_pass, +REGISTER_PASS(int8_scale_calculation_onednn_pass, paddle::framework::ir::Int8ScaleCalculationMkldnnPass); -REGISTER_PASS_CAPABILITY(int8_scale_calculation_mkldnn_pass) +REGISTER_PASS_CAPABILITY(int8_scale_calculation_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc index fde7fb07b9108..e015276ac1f67 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/int8_scale_calculation_onednn_pass.h" namespace paddle { namespace framework { @@ -95,7 +95,7 @@ void MainTest(bool convWithExistingBias, auto prog = BuildProgramDesc(convWithExistingBias, scale_weights); std::unique_ptr graph(new ir::Graph(prog)); auto pass = - PassRegistry::Instance().Get("int8_scale_calculation_mkldnn_pass"); + PassRegistry::Instance().Get("int8_scale_calculation_onednn_pass"); int original_nodes_num = graph->Nodes().size(); graph.reset(pass->Apply(graph.release())); int current_nodes_num = graph->Nodes().size(); @@ -153,4 +153,4 @@ TEST(Int8ScaleCalculationMkldnnPass, } // namespace framework } // namespace paddle -USE_PASS(int8_scale_calculation_mkldnn_pass); +USE_PASS(int8_scale_calculation_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc similarity index 90% rename from paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc index 04a6f8d6b770d..8f384931a589c 100644 --- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h" #include #include @@ -36,12 +36,12 @@ void InterpolateOneDNNPass::ApplyImpl(ir::Graph* graph) const { platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); if (!(graph->Has("use_mkldnn") && graph->Get("use_mkldnn"))) { - VLOG(3) << "Do not handle interpolate_mkldnn_pass"; + VLOG(3) << "Do not handle interpolate_onednn_pass"; return; } - VLOG(4) << "Handle interpolate_mkldnn_pass"; + VLOG(4) << "Handle interpolate_onednn_pass"; - Init("interpolate_mkldnn_pass", graph); + Init("interpolate_onednn_pass", graph); int found_count = 0; const std::vector interpolate_op_types = {"bilinear_interp", @@ -69,5 +69,5 @@ void InterpolateOneDNNPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(interpolate_mkldnn_pass, +REGISTER_PASS(interpolate_onednn_pass, paddle::framework::ir::InterpolateOneDNNPass); diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/interpolate_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc index d547f6fdd1ba2..66c96c268141d 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -39,11 +39,11 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( Graph* graph, const std::string& matmul_type, std::string& act_type) const { PADDLE_ENFORCE_NOT_NULL( graph, phi::errors::InvalidArgument("Graph cannot be nullptr.")); - FusePassBase::Init(matmul_type + "_" + act_type + "_mkldnn_fuse_pass", graph); + FusePassBase::Init(matmul_type + "_" + act_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::OperatorActivation matmul_act_pattern( - gpd.mutable_pattern(), "matmul_activation_mkldnn_fuse"); + gpd.mutable_pattern(), "matmul_activation_onednn_fuse"); matmul_act_pattern(matmul_type, act_type); int found_matmul_activation_count = 0; @@ -52,7 +52,7 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( VLOG(4) << "handle " + matmul_type + "+" + act_type + " fuse"; if (!IsCompat(subgraph, g)) { - LOG(WARNING) << "matmul_activation_mkldnn_fuse_pass op compat failed."; + LOG(WARNING) << "matmul_activation_onednn_fuse_pass op compat failed."; return; } @@ -288,10 +288,10 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_activation_mkldnn_fuse_pass, +REGISTER_PASS(matmul_activation_onednn_fuse_pass, paddle::framework::ir::MatmulActivationMkldnnFusePass); -REGISTER_PASS_CAPABILITY(matmul_activation_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_activation_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h index ebef63e292438..eec62d9e066fa 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/onednn/matmul_activation_onednn_fuse_pass.h @@ -33,7 +33,7 @@ class MatmulActivationMkldnnFusePass : public FusePassBase { void FuseMatmulAct(Graph *graph, const std::string &matmul_type, - std::string &act_type) const; + std::string &act_type) const; // NOLINT }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc index 5bb153d3ece0b..8d80eb57e5032 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" @@ -62,7 +62,7 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd( if (FindFuseOption(*matmul, *elementwise_add) != FUSE_MKLDNN) return; if (!IsCompat(subgraph, g)) { LOG(WARNING) - << "op compat for matmul_elementwise_add_mkldnn_fuse_pass failed."; + << "op compat for matmul_elementwise_add_onednn_fuse_pass failed."; return; } @@ -167,9 +167,9 @@ MatmulElementwiseAddMKLDNNFusePass::MatmulElementwiseAddMKLDNNFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_elementwise_add_mkldnn_fuse_pass, +REGISTER_PASS(matmul_elementwise_add_onednn_fuse_pass, paddle::framework::ir::MatmulElementwiseAddMKLDNNFusePass); -REGISTER_PASS_CAPABILITY(matmul_elementwise_add_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_elementwise_add_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc index a899744672b4b..0b742d763bebc 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -37,12 +37,12 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init(matmul_type + "_transpose_reshape_mkldnn_fuse_pass", + FusePassBase::Init(matmul_type + "_transpose_reshape_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::MatmulTransposeReshapePattern mtrp( gpd.mutable_pattern(), - matmul_type + "_transpose_reshape_mkldnn_fuse_pass"); + matmul_type + "_transpose_reshape_onednn_fuse_pass"); mtrp(matmul_type); int found_matmul_transpose_reshape_count = 0; @@ -206,10 +206,10 @@ MatmulTransposeReshapeMKLDNNPass::MatmulTransposeReshapeMKLDNNPass() { } // namespace framework } // namespace paddle -REGISTER_PASS(matmul_transpose_reshape_mkldnn_fuse_pass, +REGISTER_PASS(matmul_transpose_reshape_onednn_fuse_pass, paddle::framework::ir::MatmulTransposeReshapeMKLDNNPass); -REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fused_matmul", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/matmul_transpose_reshape_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc index a1f74d3423006..3b95f27a2d302 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h" #include diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h b/paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/multi_gru_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc index 7af7b67c4da49..214b8e12fd0b1 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h" #include #include @@ -23,7 +23,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h b/paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/multi_gru_seq_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/onednn/onednn_pass_util.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h rename to paddle/fluid/framework/ir/onednn/onednn_pass_util.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc index 23e5497b12fde..7ff379f5e9120 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_placement_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/operator.h" @@ -94,10 +94,10 @@ bool MKLDNNPlacementPass::IsSupport(const Node* op) const { } // namespace framework } // namespace paddle -REGISTER_PASS(mkldnn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass) +REGISTER_PASS(onednn_placement_pass, paddle::framework::ir::MKLDNNPlacementPass) .RequirePassAttr("mkldnn_enabled_op_types"); -REGISTER_PASS_CAPABILITY(mkldnn_placement_pass) +REGISTER_PASS_CAPABILITY(onednn_placement_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "fusion_gru", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h b/paddle/fluid/framework/ir/onednn/onednn_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc index b7697252a67c4..052c59ef84a99 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/onednn_placement_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_placement_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/utils/tribool.h" @@ -133,7 +133,7 @@ class PlacementPassTest { RegisterOpKernel({"conv2d", "pool2d", "concat", "relu"}); std::unique_ptr graph(new ir::Graph(prog)); - auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass"); + auto pass = PassRegistry::Instance().Get("onednn_placement_pass"); pass->Set("mkldnn_enabled_op_types", new std::unordered_set(mkldnn_enabled_op_types)); @@ -156,7 +156,7 @@ class PlacementPassTest { } void PlacementNameTest() { - auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass"); + auto pass = PassRegistry::Instance().Get("onednn_placement_pass"); EXPECT_EQ(static_cast(pass.get())->GetPlacementName(), "MKLDNN"); } @@ -186,4 +186,4 @@ TEST(MKLDNNPlacementPass, placement_name) { } // namespace framework } // namespace paddle -USE_PASS(mkldnn_placement_pass); +USE_PASS(onednn_placement_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc index b128159237546..a21ddd579be3c 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_reshape2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc index 4f6c2bfe0507b..2910849af5f8d 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_scale_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc index 28b01bc065b37..2aea55f473fd4 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/operator_unsqueeze2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc index 11eba402b55d4..2255458535071 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { @@ -131,7 +131,7 @@ void ParamsQuantizationMkldnnPass::QuantizeConv(ir::Graph* graph, LOG(WARNING) << "Pass in op compat failed."; return; } - VLOG(4) << "handle convolution in params_quantization_mkldnn_pass"; + VLOG(4) << "handle convolution in params_quantization_onednn_pass"; GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); @@ -179,9 +179,9 @@ void ParamsQuantizationMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(params_quantization_mkldnn_pass, +REGISTER_PASS(params_quantization_onednn_pass, paddle::framework::ir::ParamsQuantizationMkldnnPass); -REGISTER_PASS_CAPABILITY(params_quantization_mkldnn_pass) +REGISTER_PASS_CAPABILITY(params_quantization_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination().LE( "conv2d", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h index e681d9701b8d8..c8bf17cb081ec 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h @@ -37,7 +37,7 @@ class ParamsQuantizationMkldnnPass : public FusePassBase { bool with_residual_connection) const; private: - const std::string name_scope_ = "params_quantization_mkldnn_pass"; + const std::string name_scope_ = "params_quantization_onednn_pass"; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc index 72b07fc8934de..36ff2110e582f 100755 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass_tester.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT +#include "paddle/fluid/framework/ir/onednn/params_quantization_onednn_pass.h" // NOLINT #include "paddle/fluid/imperative/type_defs.h" #include "paddle/phi/common/place.h" @@ -39,8 +39,8 @@ struct Data { const std::vector& getData() const { return data; } private: - const std::vector shape; - const std::vector data; + const std::vector shape{}; + const std::vector data{}; }; struct TestScope { @@ -245,7 +245,7 @@ struct ParamsQuantizationMkldnnPassTestFixture : public ::testing::Test { void RunPassTest(std::unique_ptr program) { auto graph = program->CreateGraph(); - auto pass = PassRegistry::Instance().Get("params_quantization_mkldnn_pass"); + auto pass = PassRegistry::Instance().Get("params_quantization_onednn_pass"); graph.reset(pass->Apply(graph.release())); program->CheckGraph(graph); @@ -384,4 +384,4 @@ TEST_F(ParamsQuantizationMkldnnPassTestFixture, conv_with_bias_2g2o2i1h1ws) { } // namespace framework } // namespace paddle -USE_PASS(params_quantization_mkldnn_pass); +USE_PASS(params_quantization_onednn_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc rename to paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc index 734915b0dfe95..6ffd3963504f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h" #include #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -703,8 +703,8 @@ void QuantDequantMkldnnPass::RemoveCtrlVars(ir::Graph* graph) const { } void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { - VLOG(3) << "Convert paddle slim quantized model to mkldnn quantized model."; - const std::string pattern_name = "quant_dequant_mkldnn_pass"; + VLOG(3) << "Convert paddle slim quantized model to onednn quantized model."; + const std::string pattern_name = "quant_dequant_onednn_pass"; FusePassBase::Init(pattern_name, graph); const std::unordered_set skip_ops = {"conv2d", @@ -753,7 +753,7 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { RemoveCtrlVars(graph); // save var_quant_scales in the temporary save op's attr - // for compute_propagate_scales_mkldnn_pass + // for compute_propagate_scales_onednn_pass SaveInfoInTheTmpOp( graph, "has_quant_info", "var_quant_scales", var_quant_scales); } @@ -762,10 +762,10 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(quant_dequant_mkldnn_pass, +REGISTER_PASS(quant_dequant_onednn_pass, paddle::framework::ir::QuantDequantMkldnnPass); -REGISTER_PASS_CAPABILITY(quant_dequant_mkldnn_pass) +REGISTER_PASS_CAPABILITY(quant_dequant_onednn_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("conv2d", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h b/paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h rename to paddle/fluid/framework/ir/onednn/quant_dequant_onednn_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc index 5d5edb83a9134..37dfec26b36f2 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/quant_transpose2_dequant_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/quant_transpose2_dequant_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc index 07675a3f4efeb..f3250c32604c6 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" @@ -53,13 +53,13 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( "Pointer to graph argument should not be NULL.")); - FusePassBase::Init("reshape_transpose_" + matmul_type + "_mkldnn_fuse_pass", + FusePassBase::Init("reshape_transpose_" + matmul_type + "_onednn_fuse_pass", graph); GraphPatternDetector gpd; patterns::ReshapeTransposeMatmulPattern rtm_pattern( gpd.mutable_pattern(), - "reshape_transpose_" + matmul_type + "_mkldnn_fuse_pass"); + "reshape_transpose_" + matmul_type + "_onednn_fuse_pass"); rtm_pattern(matmul_type, with_reshape_xshape, with_transpose_xshape); @@ -68,7 +68,7 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( Graph *g) { if (!IsCompat(subgraph, g)) { LOG(WARNING) << "Op compatible check in reshape_transpose_" << matmul_type - << "_mkldnn_fuse_pass failed."; + << "_onednn_fuse_pass failed."; return; } @@ -268,10 +268,10 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() { } // namespace framework } // namespace paddle -REGISTER_PASS(reshape_transpose_matmul_mkldnn_fuse_pass, +REGISTER_PASS(reshape_transpose_matmul_onednn_fuse_pass, paddle::framework::ir::ReshapeTransposeMatmulMkldnnFusePass); -REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("reshape2", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/reshape_transpose_matmul_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc index 9f50aefc46ce5..7ae647c6d28f7 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.cc @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h" #include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h b/paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/scale_matmul_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc index e02b167a19e3b..4e409f764491c 100644 --- a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h" #include #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h b/paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/self_attention_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/self_attention_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc index 764712a2fcd8a..7bce1813fed8a 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" +#include "paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h" #include @@ -235,9 +235,9 @@ void ShuffleChannelMKLDNNDetectPass::ApplyImpl(ir::Graph* graph) const { } // namespace framework } // namespace paddle -REGISTER_PASS(shuffle_channel_mkldnn_detect_pass, +REGISTER_PASS(shuffle_channel_onednn_detect_pass, paddle::framework::ir::ShuffleChannelMKLDNNDetectPass); -REGISTER_PASS_CAPABILITY(shuffle_channel_mkldnn_detect_pass) +REGISTER_PASS_CAPABILITY(shuffle_channel_onednn_detect_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("reshape2", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc rename to paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc index 4c6fc3774e840..da389d3a1353c 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass_tester.cc @@ -16,7 +16,7 @@ #include -#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" +#include "paddle/fluid/framework/ir/onednn/shuffle_channel_onednn_detect_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { @@ -55,7 +55,7 @@ void MainTest() { int original_nodes_num = graph->Nodes().size(); auto pass = - PassRegistry::Instance().Get("shuffle_channel_mkldnn_detect_pass"); + PassRegistry::Instance().Get("shuffle_channel_onednn_detect_pass"); graph.reset(pass->Apply(graph.release())); int current_nodes_num = graph->Nodes().size(); @@ -82,4 +82,4 @@ TEST(ShuffleChannelOneDNNDetectPass, ShuffleChannelOneDNNDetectPassTest) { } // namespace framework } // namespace paddle -USE_PASS(shuffle_channel_mkldnn_detect_pass); +USE_PASS(shuffle_channel_onednn_detect_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc index 2030a7dadc02e..d18765ff27bdd 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/activation_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/core/enforce.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/softplus_activation_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/softplus_activation_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc similarity index 96% rename from paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc index 1aafcc0614afb..4af9c6a770436 100644 --- a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.cc @@ -11,8 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h b/paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/onednn/squeeze2_transpose2_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index 779d9986ef8a1..0b3ebd324dc7a 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -29,7 +29,7 @@ class Graph; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -68,6 +68,7 @@ static const std::vector xpu_support_subgraph_passes = { "constant_folding_pass", "delete_elementwise_mul_op_pass", "generate_sequence_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass", "embedding_with_eltwise_add_xpu_fuse_pass", "multi_encoder_xpu_fuse_pass", "multi_encoder_xpu_adaptive_seqlen_fuse_pass", @@ -82,6 +83,7 @@ static const std::vector xpu_support_subgraph_passes = { "fc_xpu_fuse_pass", "link_xpu_op_max_pass", "xpu_delete_cast_op_pass", + "spatial_transformer_resblock_xpu_fuse_pass", }; static std::vector support_subgraph_generate_passes; diff --git a/paddle/fluid/framework/ir/transfer_layout_pass.cc b/paddle/fluid/framework/ir/transfer_layout_pass.cc index c31737958dffb..b989f51dfe8f9 100644 --- a/paddle/fluid/framework/ir/transfer_layout_pass.cc +++ b/paddle/fluid/framework/ir/transfer_layout_pass.cc @@ -107,13 +107,17 @@ void TransferLayoutPass::ApplyImpl(ir::Graph *graph) const { FusePassBase::Init("fused_conv2d_add_act_layout_transfer", graph); auto *scope = param_scope(); - // only float16 compute precision need insert transfer_layout. + // float16 for all(cutlass cudnn), float32 for cutlass. + // why? + // In the case of cudnn nhwc fp32, performance degradation will occur bool is_fp16_precision = static_cast(Get("model_precision")) == phi::DataType::FLOAT16 || Get("enable_gpu_mixed"); - if (!is_fp16_precision) return; + bool cutlass_enable = Get("use_cutlass"); + + if (!is_fp16_precision && !cutlass_enable) return; PADDLE_ENFORCE_EQ(graph->IsMainGraph(), true, diff --git a/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..cea83dae5e8bf --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.cc @@ -0,0 +1,666 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h" + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace patterns { + +struct CrossAttentionFusePattern : public PatternBase { + CrossAttentionFusePattern(PDPattern* pattern, + const std::string& name_scope, + bool with_q_scale); + + // declare operator node's name + PATTERN_DECL_NODE(q_mul); + PATTERN_DECL_NODE(k_mul); + PATTERN_DECL_NODE(v_mul); + PATTERN_DECL_NODE(q_add); + PATTERN_DECL_NODE(k_add); + PATTERN_DECL_NODE(v_add); + PATTERN_DECL_NODE(reshape_1); + PATTERN_DECL_NODE(reshape_2); + PATTERN_DECL_NODE(reshape_3); + PATTERN_DECL_NODE(transpose_1); + PATTERN_DECL_NODE(transpose_2); + PATTERN_DECL_NODE(transpose_3); + PATTERN_DECL_NODE(scale); + PATTERN_DECL_NODE(qk_matmul); + PATTERN_DECL_NODE(qk_add); + PATTERN_DECL_NODE(qk_softmax); + PATTERN_DECL_NODE(qkv_matmul); + PATTERN_DECL_NODE(transpose_4); + PATTERN_DECL_NODE(reshape_4); + + // declare variable node's name + PATTERN_DECL_NODE(input_q); + PATTERN_DECL_NODE(input_kv); + PATTERN_DECL_NODE(mask); + PATTERN_DECL_NODE(q_mul_w); + PATTERN_DECL_NODE(k_mul_w); + PATTERN_DECL_NODE(v_mul_w); + PATTERN_DECL_NODE(q_mul_out); + PATTERN_DECL_NODE(k_mul_out); + PATTERN_DECL_NODE(v_mul_out); + PATTERN_DECL_NODE(q_add_bias); + PATTERN_DECL_NODE(k_add_bias); + PATTERN_DECL_NODE(v_add_bias); + PATTERN_DECL_NODE(q_add_out); + PATTERN_DECL_NODE(k_add_out); + PATTERN_DECL_NODE(v_add_out); + PATTERN_DECL_NODE(reshape_1_out); + PATTERN_DECL_NODE(reshape_2_out); + PATTERN_DECL_NODE(reshape_3_out); + PATTERN_DECL_NODE(transpose_1_out); + PATTERN_DECL_NODE(transpose_2_out); + PATTERN_DECL_NODE(transpose_3_out); + PATTERN_DECL_NODE(scale_out); + PATTERN_DECL_NODE(qk_matmul_out); + PATTERN_DECL_NODE(qk_add_out); + PATTERN_DECL_NODE(qk_softmax_out); + PATTERN_DECL_NODE(qkv_matmul_out); + PATTERN_DECL_NODE(transpose_4_out); + PATTERN_DECL_NODE(output); + + private: + bool with_q_scale_{false}; +}; + +CrossAttentionFusePattern::CrossAttentionFusePattern( + PDPattern* pattern, const std::string& name_scope, bool with_q_scale) + : PatternBase(pattern, name_scope, name_scope), + with_q_scale_(with_q_scale) { + auto* input_q = pattern->NewNode(input_q_repr()) + ->assert_is_op_input("matmul_v2", "X") + ->AsInput(); + auto* input_kv = pattern->NewNode(input_kv_repr()) + ->assert_is_op_input("matmul_v2", "X") + ->AsInput(); + auto* mask = pattern->NewNode(mask_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto* q_mul_w = + pattern->NewNode(q_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* q_mul = pattern->NewNode(q_mul_repr())->assert_is_op("matmul_v2"); + auto* q_mul_out = pattern->NewNode(q_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* k_mul_w = + pattern->NewNode(k_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* k_mul = pattern->NewNode(k_mul_repr())->assert_is_op("matmul_v2"); + auto* k_mul_out = pattern->NewNode(k_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* v_mul_w = + pattern->NewNode(v_mul_w_repr())->assert_is_op_input("matmul_v2", "Y"); + auto* v_mul = pattern->NewNode(v_mul_repr())->assert_is_op("matmul_v2"); + auto* v_mul_out = pattern->NewNode(v_mul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* q_add = pattern->NewNode(q_add_repr())->assert_is_op("elementwise_add"); + auto* q_add_bias = pattern->NewNode(q_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* q_add_out = pattern->NewNode(q_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* k_add = pattern->NewNode(k_add_repr())->assert_is_op("elementwise_add"); + auto* k_add_bias = pattern->NewNode(k_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* k_add_out = pattern->NewNode(k_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* v_add = pattern->NewNode(v_add_repr())->assert_is_op("elementwise_add"); + auto* v_add_bias = pattern->NewNode(v_add_bias_repr()) + ->assert_is_op_input("elementwise_add", "Y"); + auto* v_add_out = pattern->NewNode(v_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* reshape_1 = + pattern->NewNode(reshape_1_repr())->assert_is_op("reshape2"); + auto* reshape_1_out = pattern->NewNode(reshape_1_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* reshape_2 = + pattern->NewNode(reshape_2_repr())->assert_is_op("reshape2"); + auto* reshape_2_out = pattern->NewNode(reshape_2_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* reshape_3 = + pattern->NewNode(reshape_3_repr())->assert_is_op("reshape2"); + auto* reshape_3_out = pattern->NewNode(reshape_3_out_repr()) + ->assert_is_op_output("reshape2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* transpose_1 = + pattern->NewNode(transpose_1_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + + auto* transpose_2 = + pattern->NewNode(transpose_2_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + auto* transpose_2_out = pattern->NewNode(transpose_2_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "Y"); + auto* transpose_3 = + pattern->NewNode(transpose_3_repr()) + ->assert_is_op("transpose2") + ->assert_more([](Node* node) { + auto* op_desc = node->Op(); + auto axis = op_desc->GetAttrIfExists>("axis"); + size_t axis_rank = axis.size(); + return axis_rank == 4 && axis[0] == 0 && axis[1] == 2 && + axis[2] == 1 && axis[3] == 3; + }); + auto* transpose_3_out = pattern->NewNode(transpose_3_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "Y"); + PDNode* transpose_1_out = nullptr; + PDNode* scale = nullptr; + PDNode* scale_out = nullptr; + if (with_q_scale_) { + transpose_1_out = pattern->NewNode(transpose_1_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("scale", "X"); + scale = pattern->NewNode(scale_repr())->assert_is_op("scale"); + scale_out = pattern->NewNode(scale_out_repr()) + ->assert_is_op_output("scale", "Out") + ->assert_is_op_input("matmul_v2", "X"); + } else { + transpose_1_out = pattern->NewNode(transpose_1_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("matmul_v2", "X"); + } + auto* qk_matmul = + pattern->NewNode(qk_matmul_repr())->assert_is_op("matmul_v2"); + auto* qk_matmul_out = pattern->NewNode(qk_matmul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("elementwise_add", "X"); + auto* qk_add = + pattern->NewNode(qk_add_repr())->assert_is_op("elementwise_add"); + auto* qk_add_out = pattern->NewNode(qk_add_out_repr()) + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input("softmax", "X"); + auto* qk_softmax = + pattern->NewNode(qk_softmax_repr())->assert_is_op("softmax"); + auto* qk_softmax_out = pattern->NewNode(qk_softmax_out_repr()) + ->assert_is_op_output("softmax", "Out") + ->assert_is_op_input("matmul_v2", "X"); + auto* qkv_matmul = + pattern->NewNode(qkv_matmul_repr())->assert_is_op("matmul_v2"); + auto* qkv_matmul_out = pattern->NewNode(qkv_matmul_out_repr()) + ->assert_is_op_output("matmul_v2", "Out") + ->assert_is_op_input("transpose2", "X"); + auto* transpose_4 = + pattern->NewNode(transpose_4_repr())->assert_is_op("transpose2"); + auto* transpose_4_out = pattern->NewNode(transpose_4_out_repr()) + ->assert_is_op_output("transpose2", "Out") + ->assert_is_op_input("reshape2", "X"); + auto* reshape_4 = + pattern->NewNode(reshape_4_repr())->assert_is_op("reshape2"); + auto* output = pattern->NewNode(output_repr()) + ->AsOutput() + ->assert_is_op_output("reshape2", "Out"); + + // link nodes + q_mul->LinksFrom({input_q, q_mul_w}).LinksTo({q_mul_out}); + q_add->LinksFrom({q_mul_out, q_add_bias}).LinksTo({q_add_out}); + reshape_1->LinksFrom({q_add_out}).LinksTo({reshape_1_out}); + transpose_1->LinksFrom({reshape_1_out}).LinksTo({transpose_1_out}); + k_mul->LinksFrom({input_kv, k_mul_w}).LinksTo({k_mul_out}); + k_add->LinksFrom({k_mul_out, k_add_bias}).LinksTo({k_add_out}); + reshape_2->LinksFrom({k_add_out}).LinksTo({reshape_2_out}); + transpose_2->LinksFrom({reshape_2_out}).LinksTo({transpose_2_out}); + if (with_q_scale_) { + scale->LinksFrom({transpose_1_out}).LinksTo({scale_out}); + qk_matmul->LinksFrom({scale_out, transpose_2_out}).LinksTo({qk_matmul_out}); + } else { + qk_matmul->LinksFrom({transpose_1_out, transpose_2_out}) + .LinksTo({qk_matmul_out}); + } + qk_add->LinksFrom({qk_matmul_out, mask}).LinksTo({qk_add_out}); + qk_softmax->LinksFrom({qk_add_out}).LinksTo({qk_softmax_out}); + v_mul->LinksFrom({input_kv, v_mul_w}).LinksTo({v_mul_out}); + v_add->LinksFrom({v_mul_out, v_add_bias}).LinksTo({v_add_out}); + reshape_3->LinksFrom({v_add_out}).LinksTo({reshape_3_out}); + transpose_3->LinksFrom({reshape_3_out}).LinksTo({transpose_3_out}); + qkv_matmul->LinksFrom({qk_softmax_out, transpose_3_out}) + .LinksTo({qkv_matmul_out}); + transpose_4->LinksFrom({qkv_matmul_out}).LinksTo({transpose_4_out}); + reshape_4->LinksFrom({transpose_4_out}).LinksTo({output}); +} + +} // namespace patterns + +void CrossAttentionXPUFusePass::PrepareQKVWeight(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* w, + Node** real_w, + Node** w_max) const { + phi::DenseTensor w_tensor; + phi::DenseTensor w_int16_tensor; + phi::DenseTensor w_max_tensor; + + Assign(scope->Var(w->Name())->Get(), &w_tensor); + CastToFp32(&w_tensor, &w_int16_tensor); + ConvertWithQuant( + &w_int16_tensor, &w_max_tensor, nullptr, false); + + size_t real_w_hash = HashTensor(w_int16_tensor); + size_t w_max_hash = HashTensor(w_max_tensor); + std::string real_w_name = std::to_string(real_w_hash); + std::string w_max_name = std::to_string(w_max_hash); + + *real_w = FindNodeWithName(graph, real_w_name); + + if (*real_w == nullptr) { + // Create real_w node + // Update real_w var_desc in block + VarDesc real_w_desc(real_w_name); + real_w_desc.SetPersistable(true); + real_w_desc.SetShape(common::vectorize(w_int16_tensor.dims())); + real_w_desc.SetDataType( + framework::TransToProtoVarType(w_int16_tensor.dtype())); + *real_w = graph->CreateVarNode(&real_w_desc); + auto* block_real_w_desc = block->Var(real_w_name); + block_real_w_desc->SetPersistable(real_w_desc.Persistable()); + block_real_w_desc->SetShape(real_w_desc.GetShape()); + block_real_w_desc->SetDataType(real_w_desc.GetDataType()); + // Create w_max node + // Update w_max var_desc in block + VarDesc w_max_desc(w_max_name); + w_max_desc.SetPersistable(true); + w_max_desc.SetShape(common::vectorize(w_max_tensor.dims())); + w_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + *w_max = graph->CreateVarNode(&w_max_desc); + auto* block_w_max_desc = block->Var(w_max_name); + block_w_max_desc->SetPersistable(w_max_desc.Persistable()); + block_w_max_desc->SetShape(w_max_desc.GetShape()); + block_w_max_desc->SetDataType(w_max_desc.GetDataType()); + + // Find real_w/w_max variable in scope + auto* w_var = scope->FindVar(real_w_name); + if (w_var == nullptr) { + // Create qkv_w_intx/qkv_w_max variable/tensor + Assign(w_int16_tensor, + scope->Var(real_w_name)->GetMutable()); + Assign(w_max_tensor, + scope->Var(w_max_name)->GetMutable()); + } else { + // Share the same variable + PADDLE_ENFORCE_NOT_NULL( + scope->FindVar(w_max_name), + platform::errors::Fatal( + "w_max(%s) variable should not be nullptr if real_w(%s) " + "variable is exist.", + w_max_name, + real_w_name)); + } + } else { + *w_max = FindNodeWithName(graph, w_max_name); + PADDLE_ENFORCE_NOT_NULL( + *w_max, + platform::errors::Fatal( + "w_max(%s) variable should not be nullptr if real_w(%s) " + "variable is exist.", + w_max_name, + real_w_name)); + } +} + +void CrossAttentionXPUFusePass::PrepareQKVBias(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* q_bias, + Node* k_bias, + Node* v_bias, + Node** real_q_bias, + Node** real_k_bias, + Node** real_v_bias) const { + phi::DenseTensor* q_bias_tensor; + phi::DenseTensor* k_bias_tensor; + phi::DenseTensor* v_bias_tensor; + phi::DenseTensor q_bias_fp32_tensor; + phi::DenseTensor k_bias_fp32_tensor; + phi::DenseTensor v_bias_fp32_tensor; + q_bias_tensor = scope->Var(q_bias->Name())->GetMutable(); + k_bias_tensor = scope->Var(k_bias->Name())->GetMutable(); + v_bias_tensor = scope->Var(v_bias->Name())->GetMutable(); + CastToFp32(q_bias_tensor, &q_bias_fp32_tensor); + CastToFp32(k_bias_tensor, &k_bias_fp32_tensor); + CastToFp32(v_bias_tensor, &v_bias_fp32_tensor); + + size_t q_bias_hash = HashTensor(q_bias_fp32_tensor); + std::string q_bias_name = std::to_string(q_bias_hash); + *real_q_bias = FindNodeWithName(graph, q_bias_name); + + size_t k_bias_hash = HashTensor(k_bias_fp32_tensor); + std::string k_bias_name = std::to_string(k_bias_hash); + *real_k_bias = FindNodeWithName(graph, k_bias_name); + + size_t v_bias_hash = HashTensor(v_bias_fp32_tensor); + std::string v_bias_name = std::to_string(v_bias_hash); + *real_v_bias = FindNodeWithName(graph, v_bias_name); + if (*real_q_bias == nullptr) { + // Create q_bias node + // Update q_bias var_desc in block + VarDesc q_bias_desc(q_bias_name); + q_bias_desc.SetPersistable(true); + q_bias_desc.SetShape(common::vectorize(q_bias_fp32_tensor.dims())); + q_bias_desc.SetDataType( + framework::TransToProtoVarType(q_bias_fp32_tensor.dtype())); + *real_q_bias = graph->CreateVarNode(&q_bias_desc); + auto* block_q_bias_desc = block->Var(q_bias_name); + block_q_bias_desc->SetPersistable(q_bias_desc.Persistable()); + block_q_bias_desc->SetShape(q_bias_desc.GetShape()); + block_q_bias_desc->SetDataType(q_bias_desc.GetDataType()); + Assign(q_bias_fp32_tensor, + scope->Var(q_bias_name)->GetMutable()); + } + if (*real_k_bias == nullptr) { + // Create k_bias node + // Update k_bias var_desc in block + VarDesc k_bias_desc(k_bias_name); + k_bias_desc.SetPersistable(true); + k_bias_desc.SetShape(common::vectorize(k_bias_fp32_tensor.dims())); + k_bias_desc.SetDataType( + framework::TransToProtoVarType(k_bias_fp32_tensor.dtype())); + *real_k_bias = graph->CreateVarNode(&k_bias_desc); + auto* block_k_bias_desc = block->Var(k_bias_name); + block_k_bias_desc->SetPersistable(k_bias_desc.Persistable()); + block_k_bias_desc->SetShape(k_bias_desc.GetShape()); + block_k_bias_desc->SetDataType(k_bias_desc.GetDataType()); + Assign(k_bias_fp32_tensor, + scope->Var(k_bias_name)->GetMutable()); + } + if (*real_v_bias == nullptr) { + // Create v_bias node + // Update v_bias var_desc in block + VarDesc v_bias_desc(v_bias_name); + v_bias_desc.SetPersistable(true); + v_bias_desc.SetShape(common::vectorize(v_bias_fp32_tensor.dims())); + v_bias_desc.SetDataType( + framework::TransToProtoVarType(v_bias_fp32_tensor.dtype())); + *real_v_bias = graph->CreateVarNode(&v_bias_desc); + auto* block_v_bias_desc = block->Var(v_bias_name); + block_v_bias_desc->SetPersistable(v_bias_desc.Persistable()); + block_v_bias_desc->SetShape(v_bias_desc.GetShape()); + block_v_bias_desc->SetDataType(v_bias_desc.GetDataType()); + Assign(v_bias_fp32_tensor, + scope->Var(v_bias_name)->GetMutable()); + } +} + +void CrossAttentionXPUFusePass::ApplyCrossAttentionXPUFuse( + ir::Graph* graph, bool with_q_scale) const { + GraphPatternDetector gpd; + patterns::CrossAttentionFusePattern pattern( + gpd.mutable_pattern(), name_scope_, with_q_scale); + int found_subgraph_count = 0; + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle CrossAttentionXPUFusePass"; + + // declare operator node's name + GET_IR_NODE(q_mul); + GET_IR_NODE(k_mul); + GET_IR_NODE(v_mul); + GET_IR_NODE(q_add); + GET_IR_NODE(k_add); + GET_IR_NODE(v_add); + GET_IR_NODE(reshape_1); + GET_IR_NODE(reshape_2); + GET_IR_NODE(reshape_3); + GET_IR_NODE(transpose_1); + GET_IR_NODE(transpose_2); + GET_IR_NODE(transpose_3); + GET_IR_NODE(scale); + GET_IR_NODE(qk_matmul); + GET_IR_NODE(qk_add); + GET_IR_NODE(qk_softmax); + GET_IR_NODE(qkv_matmul); + GET_IR_NODE(transpose_4); + GET_IR_NODE(reshape_4); + + // declare variable node's name + GET_IR_NODE(input_q); + GET_IR_NODE(input_kv); + GET_IR_NODE(mask); + GET_IR_NODE(q_mul_w); + GET_IR_NODE(k_mul_w); + GET_IR_NODE(v_mul_w); + GET_IR_NODE(q_mul_out); + GET_IR_NODE(k_mul_out); + GET_IR_NODE(v_mul_out); + GET_IR_NODE(q_add_bias); + GET_IR_NODE(k_add_bias); + GET_IR_NODE(v_add_bias); + GET_IR_NODE(q_add_out); + GET_IR_NODE(k_add_out); + GET_IR_NODE(v_add_out); + GET_IR_NODE(reshape_1_out); + GET_IR_NODE(reshape_2_out); + GET_IR_NODE(reshape_3_out); + GET_IR_NODE(transpose_1_out); + GET_IR_NODE(transpose_2_out); + GET_IR_NODE(transpose_3_out); + GET_IR_NODE(scale_out); + GET_IR_NODE(qk_matmul_out); + GET_IR_NODE(qk_add_out); + GET_IR_NODE(qk_softmax_out); + GET_IR_NODE(qkv_matmul_out); + GET_IR_NODE(transpose_4_out); + GET_IR_NODE(output); + + // generate fuse op + auto* scope = param_scope(); + auto* block = q_mul->Op()->Block(); + framework::OpDesc fused_op_desc(block); + fused_op_desc.SetType("cross_attention_xpu"); + + Node* real_q_w = nullptr; + Node* q_w_max = nullptr; + Node* real_k_w = nullptr; + Node* k_w_max = nullptr; + Node* real_v_w = nullptr; + Node* v_w_max = nullptr; + PrepareQKVWeight(graph, scope, block, q_mul_w, &real_q_w, &q_w_max); + PrepareQKVWeight(graph, scope, block, k_mul_w, &real_k_w, &k_w_max); + PrepareQKVWeight(graph, scope, block, v_mul_w, &real_v_w, &v_w_max); + + std::vector fc_weight_nodes = {real_q_w, real_k_w, real_v_w}; + std::vector fc_weight_names; + for (auto* node : fc_weight_nodes) { + if (node) { + fc_weight_names.push_back(node->Name()); + } + } + std::vector fc_weight_max_nodes = {q_w_max, k_w_max, v_w_max}; + std::vector fc_weight_max_names; + for (auto* node : fc_weight_max_nodes) { + if (node) { + fc_weight_max_names.push_back(node->Name()); + } + } + + Node* q_add_bias_fp32 = nullptr; + Node* k_add_bias_fp32 = nullptr; + Node* v_add_bias_fp32 = nullptr; + PrepareQKVBias(graph, + scope, + block, + q_add_bias, + k_add_bias, + v_add_bias, + &q_add_bias_fp32, + &k_add_bias_fp32, + &v_add_bias_fp32); + std::vector fc_bias_nodes = { + q_add_bias_fp32, k_add_bias_fp32, v_add_bias_fp32}; + std::vector fc_bias_names; + for (auto* node : fc_bias_nodes) { + if (node) { + fc_bias_names.push_back(node->Name()); + } + } + + // set input of fuse_op + fused_op_desc.SetInput("input_q", {input_q->Name()}); + fused_op_desc.SetInput("input_kv", {input_kv->Name()}); + fused_op_desc.SetInput("fc_weight", fc_weight_names); + fused_op_desc.SetInput("fc_weight_max", fc_weight_max_names); + fused_op_desc.SetInput("fc_bias", fc_bias_names); + fused_op_desc.SetInput("mask", {mask->Name()}); + + // set attributes of fuse_op + if (with_q_scale) { + float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); + fused_op_desc.SetAttr("alpha", scale_val); + VLOG(4) << "while with_q_scale, scale_val = " << scale_val; + } else { + // in xdnn, 0.0f is default value of NewBaseAttnParam.alpha + fused_op_desc.SetAttr("alpha", 0.0f); + } + fused_op_desc.SetAttr( + "head_num", static_cast(transpose_1_out->Var()->GetShape()[1])); + fused_op_desc.SetAttr( + "head_dim", static_cast(transpose_1_out->Var()->GetShape()[3])); + // TODO(tianrui): support more out_dtype + fused_op_desc.SetAttr("out_dtype", input_q->Var()->GetDataType()); + + // set output of fuse_op + VarDesc fused_op_out_max_desc("qkv_max"); + Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); + fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); + fused_op_desc.SetOutput("qkv", {output->Name()}); + + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + + // link input of fuse_op + IR_NODE_LINK_TO(input_q, fused_op); + IR_NODE_LINK_TO(input_kv, fused_op); + for (auto* node : fc_weight_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + for (auto* node : fc_weight_max_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + for (auto* node : fc_bias_nodes) { + if (node) { + IR_NODE_LINK_TO(node, fused_op); + } + } + // link output of fuse_op + IR_NODE_LINK_TO(fused_op, output); + IR_NODE_LINK_TO(fused_op, fused_op_out_max); + + // delete useless node + std::unordered_set del_node_set; + del_node_set.insert(q_mul); + del_node_set.insert(q_mul_out); + del_node_set.insert(k_mul); + del_node_set.insert(k_mul_out); + del_node_set.insert(v_mul); + del_node_set.insert(v_mul_out); + del_node_set.insert(q_add); + del_node_set.insert(q_add_out); + del_node_set.insert(k_add); + del_node_set.insert(k_add_out); + del_node_set.insert(v_add); + del_node_set.insert(v_add_out); + del_node_set.insert(reshape_1); + del_node_set.insert(reshape_1_out); + del_node_set.insert(reshape_2); + del_node_set.insert(reshape_2_out); + del_node_set.insert(reshape_3); + del_node_set.insert(reshape_3_out); + del_node_set.insert(transpose_1); + del_node_set.insert(transpose_1_out); + del_node_set.insert(transpose_2); + del_node_set.insert(transpose_2_out); + del_node_set.insert(transpose_3); + del_node_set.insert(transpose_3_out); + del_node_set.insert(qk_matmul); + del_node_set.insert(qk_matmul_out); + del_node_set.insert(qk_add); + del_node_set.insert(qk_add_out); + del_node_set.insert(qk_softmax); + del_node_set.insert(qk_softmax_out); + del_node_set.insert(qkv_matmul); + del_node_set.insert(qkv_matmul_out); + del_node_set.insert(transpose_4); + del_node_set.insert(transpose_4_out); + del_node_set.insert(reshape_4); + if (with_q_scale) { + del_node_set.insert(scale); + del_node_set.insert(scale_out); + } + GraphSafeRemoveNodes(graph, del_node_set); + + found_subgraph_count++; + }; + + gpd(graph, handler); + AddStatis(found_subgraph_count); +} + +void CrossAttentionXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + + for (auto with_q_scale : {true, false}) { + ApplyCrossAttentionXPUFuse(graph, with_q_scale); + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(cross_attention_xpu_fuse_pass, + paddle::framework::ir::CrossAttentionXPUFusePass); + +REGISTER_PASS_CAPABILITY(cross_attention_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "cross_attention_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h new file mode 100644 index 0000000000000..9a04275294ea8 --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/cross_attention_xpu_fuse_pass.h @@ -0,0 +1,126 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { + +/* +This pass is used to fuse the cross attention op into one op in decoder. +models . + +Origin subgraph: + + mask input_q input_kv + | | | | + | | |-----------| + | matmul matmul matmul + | |q |k |v + | | | | + | | | | + | add add add + | | | | + | | | | + | reshape reshape reshape + | | | | + | | | | + | transpose transpose transpose + | | | | + | | | | + | (scale) | | + | | | | + \ |(x) |(y) | + \ \ / | + \ qk_matmul | + \ | | + \ | | + add / + | / + | / + softmax / + \ / + \ / + qkv_matmul + | + | + transpose + | + | + reshape + | + | + output + +------------------------------------------------------- +Fused subgraph: + input_q input_kv + | | + | | + | | + cross_attention_xpu + | + | + | + output + +*/ + +class CrossAttentionXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + void ApplyCrossAttentionXPUFuse(ir::Graph* graph, bool with_q_scale) const; + + // 1. Generate q/k/v_w_max tensor + // 2. Quant q/k/v_w to int16 + void PrepareQKVWeight(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* w, + Node** real_w, + Node** w_max) const; + + // Cast fc_bias to fp32 + void PrepareQKVBias(Graph* graph, + Scope* scope, + BlockDesc* block, + Node* q_bias, + Node* k_bias, + Node* v_bias, + Node** real_q_bias, + Node** real_k_bias, + Node** real_v_bias) const; + + const std::string name_scope_{"cross_attention_xpu_fuse_pass"}; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc index ad8dd1a55a868..c86180e24088a 100644 --- a/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/decoder_attention_xpu_fuse_pass.cc @@ -17,6 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/ir/xpu/pass_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -162,15 +163,15 @@ DecoderAttentionFusePattern::DecoderAttentionFusePattern( // link nodes reshape2_1->LinksFrom({input_q}).LinksTo({reshape2_1_out}); - reshape2_2->LinksFrom({input_k}).LinksTo({reshape2_2_out}); - reshape2_3->LinksFrom({input_v}).LinksTo({reshape2_3_out}); transpose2_1->LinksFrom({reshape2_1_out}).LinksTo({transpose2_1_out}); + reshape2_2->LinksFrom({input_k}).LinksTo({reshape2_2_out}); transpose2_2->LinksFrom({reshape2_2_out}).LinksTo({transpose2_2_out}); - transpose2_3->LinksFrom({reshape2_3_out}).LinksTo({transpose2_3_out}); qk_matmul->LinksFrom({transpose2_1_out, transpose2_2_out}) .LinksTo({qk_matmul_out}); scale->LinksFrom({qk_matmul_out}).LinksTo({scale_out}); qk_softmax->LinksFrom({scale_out}).LinksTo({qk_softmax_out}); + reshape2_3->LinksFrom({input_v}).LinksTo({reshape2_3_out}); + transpose2_3->LinksFrom({reshape2_3_out}).LinksTo({transpose2_3_out}); qkv_matmul->LinksFrom({qk_softmax_out, transpose2_3_out}) .LinksTo({qkv_matmul_out}); transpose2_4->LinksFrom({qkv_matmul_out}).LinksTo({transpose2_4_out}); @@ -222,6 +223,7 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( GET_IR_NODE(output); // Generate fuse op + auto* scope = param_scope(); auto* block = reshape2_1->Op()->Block(); framework::OpDesc fused_op_desc(block); fused_op_desc.SetType("qkv_attention_xpu"); @@ -230,6 +232,54 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( fused_op_desc.SetInput("q", {input_q->Name()}); fused_op_desc.SetInput("k", {input_k->Name()}); fused_op_desc.SetInput("v", {input_v->Name()}); + std::unordered_map> var_quant_scales = + GetQuantInfoFromTheGraph(graph, "has_quant_info", "var_quant_scales"); + // recored q/k/v max, qk_max, and qkv_max + std::vector input_max_nodes; + if (var_quant_scales.find(input_q->Name()) != var_quant_scales.end() && + var_quant_scales.find(input_k->Name()) != var_quant_scales.end() && + var_quant_scales.find(input_v->Name()) != var_quant_scales.end() && + var_quant_scales.find(qk_matmul_out->Name()) != + var_quant_scales.end() && + var_quant_scales.find(qkv_matmul_out->Name()) != + var_quant_scales.end()) { + std::vector input_max_vec; + input_max_vec.push_back(var_quant_scales.at(input_q->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(input_k->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(input_v->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qk_matmul_out->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qkv_matmul_out->Name())[0]); + std::vector quant_max_names = { + "q_max", "k_max", "v_max", "qk_max", "qkv_max"}; + for (size_t i = 0; i < input_max_vec.size(); i++) { + std::string input_max_name = + input_q->Name() + "_" + std::to_string(i) + "_max_in"; + int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1); + VarDesc input_max_desc(input_max_name); + input_max_desc.SetPersistable(true); + input_max_desc.SetShape({static_cast(max_ptr_size)}); + input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + Node* input_max_in = graph->CreateVarNode(&input_max_desc); + auto* block_input_max_in_desc = block->Var(input_max_name); + block_input_max_in_desc->SetPersistable(input_max_desc.Persistable()); + block_input_max_in_desc->SetShape(input_max_desc.GetShape()); + block_input_max_in_desc->SetDataType(input_max_desc.GetDataType()); + phi::DenseTensor input_max_in_cpu_tensor; + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + input_max_in_cpu_tensor.set_type(phi::DataType::FLOAT32); + input_max_in_cpu_tensor.Resize({max_ptr_size}); + std::vector input_max(max_ptr_size, input_max_vec[i]); + memcpy(cpu_ctx->Alloc(&input_max_in_cpu_tensor), + input_max.data(), + max_ptr_size * sizeof(float)); + Assign(input_max_in_cpu_tensor, + scope->Var(input_max_name)->GetMutable()); + fused_op_desc.SetInput(quant_max_names[i], {input_max_name}); + + input_max_nodes.push_back(input_max_in); + } + } // set attributes of fuse_op float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); @@ -245,9 +295,6 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( fused_op_desc.SetAttr("out_dtype", input_q->Var()->GetDataType()); // set output of fuse_op - VarDesc fused_op_out_max_desc("qkv_max"); - Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); - fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); fused_op_desc.SetOutput("qkv", {output->Name()}); auto* fused_op = graph->CreateOpNode(&fused_op_desc); @@ -256,7 +303,9 @@ void DecoderAttentionXPUFusePass::ApplyDecoderAttentionXPUFuse( IR_NODE_LINK_TO(input_k, fused_op); IR_NODE_LINK_TO(input_v, fused_op); IR_NODE_LINK_TO(fused_op, output); - IR_NODE_LINK_TO(fused_op, fused_op_out_max); + for (size_t i = 0; i < input_max_nodes.size(); i++) { + IR_NODE_LINK_TO(input_max_nodes[i], fused_op); + } // delete useless node std::unordered_set del_node_set; diff --git a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc index 04b645a4d33d8..2010d4cb48de0 100644 --- a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc @@ -841,6 +841,35 @@ int FcXPUFusePass::ApplyImpl(ir::Graph* graph, } else if (filter_data_type == phi::DataType::FLOAT16) { op_weights_precision = "float16"; } + if (op_weights_precision == "float32" && + AreScalesPresentForNodes(&var_quant_scales, {mul_w})) { + // convert weight to int8 + auto* var = scope->FindVar(mul_w_name); + PADDLE_ENFORCE_NOT_NULL( + var, + platform::errors::NotFound( + "The input persistable [%s] var of [%s] op is not found.", + mul_w_name)); + auto* weight_tensor = var->GetMutable(); + float* fp32_weight_data = weight_tensor->data(); + std::vector weight_data; + weight_data.resize(weight_tensor->numel()); + for (int i = 0; i < weight_tensor->numel(); i++) { + weight_data[i] = static_cast(fp32_weight_data[i]); + } + const auto weight_dims = weight_tensor->dims(); + weight_tensor->clear(); // clear int weight + weight_tensor->set_type(phi::DataType::INT8); + weight_tensor->Resize(common::make_ddim(common::vectorize(weight_dims))); + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + auto* new_weight_data = cpu_ctx->Alloc(weight_tensor); + memcpy(new_weight_data, + weight_data.data(), + weight_tensor->numel() * sizeof(int8_t)); + op_weights_precision = "int8"; + } + VLOG(4) << "FC fusion fuse pass is running on " << op_weights_precision << " precision!"; auto* block = mul->Op()->Block(); diff --git a/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..86fef3fd0c2ae --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/group_norm_silu_xpu_fuse_pass.cc @@ -0,0 +1,208 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/ir/xpu/quant_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { +namespace patterns { + +/* +fuse gn + activation block in to xpu_ele_fusion op +For example: +graph: + X + Scale | Bias + \ | / + group norm + / | \ + / | \ + variance | mean + | + silu + | + output +------------------------------------------------------ +After the pass is applied: + X + Scale | Bias + \ | / + gn_silu_fusion + | + Out +*/ +struct GroupNormalizeSiluXPUPattern : public PatternBase { + GroupNormalizeSiluXPUPattern(PDPattern* pattern, + const std::string& name_scope); + // declare operator node's name + PATTERN_DECL_NODE(gn); + PATTERN_DECL_NODE(silu); + // declare variable node's name + PATTERN_DECL_NODE(gn_x); + PATTERN_DECL_NODE(gn_bias); + PATTERN_DECL_NODE(gn_scale); + PATTERN_DECL_NODE(gn_y); + PATTERN_DECL_NODE(gn_mean); + PATTERN_DECL_NODE(gn_variance); + PATTERN_DECL_NODE(silu_out); +}; + +GroupNormalizeSiluXPUPattern::GroupNormalizeSiluXPUPattern( + PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, name_scope) { + auto gn = pattern->NewNode(gn_repr())->assert_is_op("group_norm"); + auto gn_x = pattern->NewNode(gn_x_repr()) + ->assert_is_op_input("group_norm", "X") + ->AsInput(); + auto gn_bias = pattern->NewNode(gn_bias_repr()) + ->assert_is_op_input("group_norm", "Bias") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_scale = pattern->NewNode(gn_scale_repr()) + ->assert_is_op_input("group_norm", "Scale") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_y = pattern->NewNode(gn_y_repr()) + ->assert_is_op_output("group_norm", "Y") + ->assert_is_op_input("silu", "X") + ->assert_has_n_outputs(1); + auto gn_mean = pattern->NewNode(gn_mean_repr()) + ->assert_is_op_output("group_norm", "Mean") + ->assert_has_n_outputs(0); + auto gn_variance = pattern->NewNode(gn_variance_repr()) + ->assert_is_op_output("group_norm", "Variance") + ->assert_has_n_outputs(0); + gn->LinksFrom({gn_x, gn_bias, gn_scale}) + .LinksTo({gn_y, gn_mean, gn_variance}); + + auto silu = pattern->NewNode(silu_repr())->assert_is_op("silu"); + auto silu_out = pattern->NewNode(silu_out_repr()) + ->AsOutput() + ->assert_is_op_output("silu", "Out"); + silu->LinksFrom({gn_y}).LinksTo({silu_out}); +} + +} // namespace patterns + +class GroupNormalizeSiluXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + void FuseGroupNormalizeSilu(ir::Graph* graph) const; + + const std::string name_scope_{"group_norm_silu_xpu_fuse_pass"}; +}; + +void GroupNormalizeSiluXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + + FuseGroupNormalizeSilu(graph); +} + +void GroupNormalizeSiluXPUFusePass::FuseGroupNormalizeSilu( + ir::Graph* graph) const { + GraphPatternDetector gpd; + patterns::GroupNormalizeSiluXPUPattern pattern(gpd.mutable_pattern(), + name_scope_); + + int found_subgraph_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle GroupNormalizeSiluXPUFusePass fuse"; + // declare operator node's name + GET_IR_NODE(gn); + GET_IR_NODE(silu); + // declare variable node's name + GET_IR_NODE(gn_x); + GET_IR_NODE(gn_bias); + GET_IR_NODE(gn_scale); + GET_IR_NODE(gn_y); + GET_IR_NODE(gn_mean); + GET_IR_NODE(gn_variance); + GET_IR_NODE(silu_out); + + auto* block = gn->Op()->Block(); + auto* scope = param_scope(); + PADDLE_ENFORCE_NOT_NULL( + scope, platform::errors::InvalidArgument("Scope cannot be nullptr.")); + // delete useless node + std::unordered_set delete_nodes; + + float eps = PADDLE_GET_CONST(float, gn->Op()->GetAttr("epsilon")); + int groups = PADDLE_GET_CONST(int, gn->Op()->GetAttr("groups")); + + std::string fused_op_out_name; + fused_op_out_name = silu_out->Name(); + // Generate add_layernorm fused op + framework::OpDesc fused_op_desc(block); + + fused_op_desc.SetType("group_norm_silu_xpu"); + // set attrs for fused op + fused_op_desc.SetInput("x", {gn_x->Name()}); + fused_op_desc.SetInput("bias", {gn_bias->Name()}); + fused_op_desc.SetInput("scale", {gn_scale->Name()}); + fused_op_desc.SetAttr("epsilon", eps); + fused_op_desc.SetAttr("groups", groups); + fused_op_desc.SetOutput("out", {fused_op_out_name}); + // relink fused op + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + IR_NODE_LINK_TO(gn_x, fused_op); + IR_NODE_LINK_TO(gn_bias, fused_op); + IR_NODE_LINK_TO(gn_scale, fused_op); + IR_NODE_LINK_TO(fused_op, silu_out); + + delete_nodes.insert({gn, silu, gn_y, gn_mean, gn_variance}); + GraphSafeRemoveNodes(graph, delete_nodes); + found_subgraph_count++; + }; + + gpd(graph, handler); + AddStatis(found_subgraph_count); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(group_norm_silu_xpu_fuse_pass, + paddle::framework::ir::GroupNormalizeSiluXPUFusePass); + +REGISTER_PASS_CAPABILITY(group_norm_silu_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "group_norm_silu_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc index 2ca1d081aab89..2d56306e97faa 100644 --- a/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/qk_qkv_attention_xpu_fuse_pass.cc @@ -17,6 +17,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/ir/xpu/pass_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -212,6 +213,7 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( GET_IR_NODE(output); // Generate fuse op + auto* scope = param_scope(); auto* block = reshape_1->Op()->Block(); framework::OpDesc fused_op_desc(block); fused_op_desc.SetType("qkv_attention_xpu"); @@ -219,6 +221,57 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( fused_op_desc.SetInput("q", {input->Name()}); fused_op_desc.SetInput("k", {input->Name()}); fused_op_desc.SetInput("v", {input->Name()}); + std::unordered_map> var_quant_scales = + GetQuantInfoFromTheGraph(graph, "has_quant_info", "var_quant_scales"); + // recored q/k/v max, qk_max, and qkv_max + std::vector input_max_nodes; + if (var_quant_scales.find(input->Name()) != var_quant_scales.end() && + var_quant_scales.find(qk_matmul_out->Name()) != + var_quant_scales.end() && + var_quant_scales.find(qkv_matmul_out->Name()) != + var_quant_scales.end()) { + std::vector input_max_vec; + input_max_vec.push_back(var_quant_scales.at(input->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qk_matmul_out->Name())[0]); + input_max_vec.push_back(var_quant_scales.at(qkv_matmul_out->Name())[0]); + std::vector quant_max_names = { + "input_max", "qk_max", "qkv_max"}; + for (size_t i = 0; i < input_max_vec.size(); i++) { + std::string input_max_name = + input->Name() + "_" + std::to_string(i) + "_max_in"; + int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1); + VarDesc input_max_desc(input_max_name); + input_max_desc.SetPersistable(true); + input_max_desc.SetShape({static_cast(max_ptr_size)}); + input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); + Node* input_max_in = graph->CreateVarNode(&input_max_desc); + auto* block_input_max_in_desc = block->Var(input_max_name); + block_input_max_in_desc->SetPersistable(input_max_desc.Persistable()); + block_input_max_in_desc->SetShape(input_max_desc.GetShape()); + block_input_max_in_desc->SetDataType(input_max_desc.GetDataType()); + + phi::DenseTensor input_max_in_cpu_tensor; + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + input_max_in_cpu_tensor.set_type(phi::DataType::FLOAT32); + input_max_in_cpu_tensor.Resize({max_ptr_size}); + std::vector input_max(max_ptr_size, input_max_vec[i]); + memcpy(cpu_ctx->Alloc(&input_max_in_cpu_tensor), + input_max.data(), + max_ptr_size * sizeof(float)); + Assign(input_max_in_cpu_tensor, + scope->Var(input_max_name)->GetMutable()); + if (i == 0) { + fused_op_desc.SetInput("q_max", {input_max_name}); + fused_op_desc.SetInput("k_max", {input_max_name}); + fused_op_desc.SetInput("v_max", {input_max_name}); + } else { + fused_op_desc.SetInput(quant_max_names[i], {input_max_name}); + } + input_max_nodes.push_back(input_max_in); + } + } + // set attributes of fuse_op if (with_q_scale) { float scale_val = PADDLE_GET_CONST(float, scale->Op()->GetAttr("scale")); @@ -239,16 +292,15 @@ void QkQkvAttentionXPUFusePass::ApplyQkQkvAttentionXPUFuse( fused_op_desc.SetAttr("out_dtype", input->Var()->GetDataType()); // set output of fuse_op - VarDesc fused_op_out_max_desc("qkv_max"); - Node* fused_op_out_max = graph->CreateVarNode(&fused_op_out_max_desc); - fused_op_desc.SetOutput("qkv_max", {"qkv_max"}); fused_op_desc.SetOutput("qkv", {output->Name()}); auto* fused_op = graph->CreateOpNode(&fused_op_desc); IR_NODE_LINK_TO(input, fused_op); IR_NODE_LINK_TO(fused_op, output); - IR_NODE_LINK_TO(fused_op, fused_op_out_max); + for (size_t i = 0; i < input_max_nodes.size(); i++) { + IR_NODE_LINK_TO(input_max_nodes[i], fused_op); + } // delete useless node std::unordered_set del_node_set; diff --git a/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc b/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc index 29a222281b217..eecefa6330d69 100644 --- a/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc +++ b/paddle/fluid/framework/ir/xpu/quant_dequant_xpu_pass.cc @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" +#include "paddle/fluid/framework/ir/onednn/onednn_pass_util.h" #include "paddle/fluid/framework/ir/quantize_helper.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc new file mode 100644 index 0000000000000..a80d3763c366d --- /dev/null +++ b/paddle/fluid/framework/ir/xpu/spatial_transformer_resblock_xpu_fuse_pass.cc @@ -0,0 +1,594 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/xpu/pass_utils.h" +#include "paddle/fluid/framework/ir/xpu/quant_utils.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/platform/enforce.h" + +namespace phi { +class DenseTensor; +} // namespace phi + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { +namespace patterns { + +/* +Fuse original subgraph into __xpu__spatial_transformer_resblock op. +Currently there are 3 different original patterns to match. + +Original subgraph (situation 1):(todo) + + ------------Input1 Input2 + | | | + | group_norm silu + | | | + | silu _xpu_fc + | | | + | _xpu_conv2d unsqueeze + | \ / + | \ / + | \ / + | \ / + | elementwise_add + | | + | group_norm + | | + | silu + | | + | _xpu_conv2d + | | + |____________________elementwise_add + | + output + +Original subgraph (situation 2): + + -------------- in + | | + | group_norm_silu_xpu + | | + | conv2d_xpu + | | + | group_norm_silu_x pu + | | + -----------conv2d_xpu + | + out + +Original subgraph (situation 3): + + -------------- in + | | + | group_norm_silu_xpu + | | + | conv2d_xpu + | | + conv2d_xpu group_norm_silu_xpu + | | + -----------conv2d_xpu + | + out + +Fuse to: +(Situation 1):(todo) + Input1 Input2 + \ / + spatial_transformer_resblock_xpu + | + output +or: +(Situation 2 and 3): + in + | + spatial_transformer_resblock_xpu + | + out +*/ +struct SpatialTransformerResBlockXPUPattern : public PatternBase { + SpatialTransformerResBlockXPUPattern(PDPattern* pattern, + const std::string& name_scope, + bool conv_fix = false, + bool input_max = false, + bool has_silu_fc_input = false, + bool include_silu = false); + // declare operator node's name + PATTERN_DECL_NODE(gn_silu_0); + PATTERN_DECL_NODE(conv2d_0); + PATTERN_DECL_NODE(gn_silu_1); + PATTERN_DECL_NODE(conv2d_1); + PATTERN_DECL_NODE(conv2d_2); + // declare variable node's name + PATTERN_DECL_NODE(gn_silu_0_x); + PATTERN_DECL_NODE(gn_silu_0_bias); + PATTERN_DECL_NODE(gn_silu_0_scale); + PATTERN_DECL_NODE(gn_silu_0_out); + PATTERN_DECL_NODE(conv2d_0_bias); + PATTERN_DECL_NODE(conv2d_0_filter); + PATTERN_DECL_NODE(conv2d_0_filter_max); + PATTERN_DECL_NODE(conv2d_0_out); + PATTERN_DECL_NODE(conv2d_0_out_max); + PATTERN_DECL_NODE(gn_silu_1_bias); + PATTERN_DECL_NODE(gn_silu_1_scale); + PATTERN_DECL_NODE(gn_silu_1_out); + PATTERN_DECL_NODE(conv2d_1_bias); + PATTERN_DECL_NODE(conv2d_1_filter); + PATTERN_DECL_NODE(conv2d_1_filter_max); + PATTERN_DECL_NODE(conv2d_1_out); + PATTERN_DECL_NODE(conv2d_1_out_max); + PATTERN_DECL_NODE(conv2d_2_x_max); + PATTERN_DECL_NODE(conv2d_2_bias); + PATTERN_DECL_NODE(conv2d_2_filter); + PATTERN_DECL_NODE(conv2d_2_filter_max); + PATTERN_DECL_NODE(conv2d_2_out); + PATTERN_DECL_NODE(conv2d_2_out_max); + + private: + bool conv_fix_{false}; + bool input_max_{false}; + bool has_silu_fc_input_{false}; + bool include_silu_{false}; +}; + +SpatialTransformerResBlockXPUPattern::SpatialTransformerResBlockXPUPattern( + PDPattern* pattern, + const std::string& name_scope, + bool conv_fix, + bool input_max, + bool has_silu_fc_input, + bool include_silu) + : PatternBase(pattern, name_scope, name_scope), + conv_fix_(conv_fix), + input_max_(input_max), + has_silu_fc_input_(has_silu_fc_input), + include_silu_(include_silu) { + // gn_silu_0 + auto gn_silu_0 = + pattern->NewNode(gn_silu_0_repr())->assert_is_op("group_norm_silu_xpu"); + auto gn_silu_0_x = pattern->NewNode(gn_silu_0_x_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "x") + ->AsInput(); + auto gn_silu_0_bias = pattern->NewNode(gn_silu_0_bias_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "bias") + ->AsInput(); + auto gn_silu_0_scale = + pattern->NewNode(gn_silu_0_scale_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "scale") + ->AsInput(); + auto gn_silu_0_out = pattern->NewNode(gn_silu_0_out_repr()) + ->assert_is_op_output("group_norm_silu_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "x") + ->assert_has_n_outputs(1); + gn_silu_0->LinksFrom({gn_silu_0_x, gn_silu_0_bias, gn_silu_0_scale}) + .LinksTo({gn_silu_0_out}); + + PDNode* conv2d_2_x_max = nullptr; + PDNode* conv2d_2_bias = nullptr; + PDNode* conv2d_2_filter = nullptr; + PDNode* conv2d_2_filter_max = nullptr; + PDNode* conv2d_2_out = nullptr; + PDNode* conv2d_2_out_max = nullptr; + if (conv_fix_) { + gn_silu_0_x->assert_is_op_input("conv2d_xpu", "x"); // conv2d_2 x + if (input_max_) { + conv2d_2_x_max = pattern->NewNode(conv2d_2_x_max_repr()) + ->assert_is_op_input("conv2d_xpu", "x_max") + ->AsInput(); + } + // conv2d_2 + auto conv2d_2 = + pattern->NewNode(conv2d_2_repr())->assert_is_op("conv2d_xpu"); + conv2d_2_bias = pattern->NewNode(conv2d_2_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + conv2d_2_filter = pattern->NewNode(conv2d_2_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + conv2d_2_filter_max = pattern->NewNode(conv2d_2_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + conv2d_2_out = pattern->NewNode(conv2d_2_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "branch") + ->assert_has_n_outputs(1); + conv2d_2_out_max = pattern->NewNode(conv2d_2_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + std::vector conv2d_2_input{ + gn_silu_0_x, conv2d_2_bias, conv2d_2_filter, conv2d_2_filter_max}; + if (input_max_) { + conv2d_2_input.push_back(conv2d_2_x_max); + } + conv2d_2->LinksFrom(conv2d_2_input) + .LinksTo({conv2d_2_out, conv2d_2_out_max}); + } else { + gn_silu_0_x->assert_is_op_input("conv2d_xpu", "branch"); // conv2d_1 branch + conv2d_2_out = gn_silu_0_x; + } + + // conv2d_0 + auto conv2d_0 = pattern->NewNode(conv2d_0_repr())->assert_is_op("conv2d_xpu"); + auto conv2d_0_bias = pattern->NewNode(conv2d_0_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + auto conv2d_0_filter = pattern->NewNode(conv2d_0_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + auto conv2d_0_filter_max = + pattern->NewNode(conv2d_0_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + auto conv2d_0_out = pattern->NewNode(conv2d_0_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out") + ->assert_is_op_input("group_norm_silu_xpu", "x") + ->assert_has_n_outputs(1); + auto conv2d_0_out_max = pattern->NewNode(conv2d_0_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + conv2d_0 + ->LinksFrom( + {gn_silu_0_out, conv2d_0_bias, conv2d_0_filter, conv2d_0_filter_max}) + .LinksTo({conv2d_0_out, conv2d_0_out_max}); + + // gn_silu_1 + auto gn_silu_1 = + pattern->NewNode(gn_silu_1_repr())->assert_is_op("group_norm_silu_xpu"); + auto gn_silu_1_bias = pattern->NewNode(gn_silu_1_bias_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "bias") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_silu_1_scale = + pattern->NewNode(gn_silu_1_scale_repr()) + ->assert_is_op_input("group_norm_silu_xpu", "scale") + ->assert_is_persistable_var() + ->AsInput(); + auto gn_silu_1_out = pattern->NewNode(gn_silu_1_out_repr()) + ->assert_is_op_output("group_norm_silu_xpu", "out") + ->assert_is_op_input("conv2d_xpu", "x") + ->assert_has_n_outputs(1); + gn_silu_1->LinksFrom({conv2d_0_out, gn_silu_1_bias, gn_silu_1_scale}) + .LinksTo({gn_silu_1_out}); + + // conv2d_1 + auto conv2d_1 = pattern->NewNode(conv2d_1_repr())->assert_is_op("conv2d_xpu"); + auto conv2d_1_bias = pattern->NewNode(conv2d_1_bias_repr()) + ->assert_is_op_input("conv2d_xpu", "bias") + ->AsInput(); + auto conv2d_1_filter = pattern->NewNode(conv2d_1_filter_repr()) + ->assert_is_op_input("conv2d_xpu", "filter") + ->AsInput(); + auto conv2d_1_filter_max = + pattern->NewNode(conv2d_1_filter_max_repr()) + ->assert_is_op_input("conv2d_xpu", "filter_max") + ->AsInput(); + auto conv2d_1_out = pattern->NewNode(conv2d_1_out_repr()) + ->assert_is_op_output("conv2d_xpu", "out"); + auto conv2d_1_out_max = pattern->NewNode(conv2d_1_out_max_repr()) + ->assert_is_op_output("conv2d_xpu", "out_max"); + conv2d_1 + ->LinksFrom({gn_silu_1_out, + conv2d_2_out, + conv2d_1_bias, + conv2d_1_filter, + conv2d_1_filter_max}) + .LinksTo({conv2d_1_out, conv2d_1_out_max}); +} + +} // namespace patterns + +namespace { +static std::vector IntVec2DTo1D(const std::vector>& vec) { + std::vector res; + for (const auto& v : vec) { + for (const auto& ele : v) { + res.emplace_back(ele); + } + } + return res; +} + +} // namespace + +class SpatialTransformerResBlockXPUFusePass : public FusePassBase { + protected: + void ApplyImpl(ir::Graph* graph) const override; + + private: + int FuseSpatialTransformerResBlock(ir::Graph* graph, + bool conv_fix = false, + bool input_max = false, + bool has_silu_fc_input = false, + bool include_silu = false) const; + + const std::string name_scope_{"spatial_transformer_resblock_xpu_fuse_pass"}; +}; + +void SpatialTransformerResBlockXPUFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + Init(name_scope_, graph); + int found_subgraph_count = 0; + for (auto conv_fix : {false, true}) { + for (auto has_silu_fc_input : {false}) { + for (auto include_silu : {false}) { + if (conv_fix == true) { + for (auto input_max : {true, false}) { + found_subgraph_count += + FuseSpatialTransformerResBlock(graph, + conv_fix /*true*/, + input_max, + has_silu_fc_input, + include_silu); + } + } else { + found_subgraph_count += + FuseSpatialTransformerResBlock(graph, + conv_fix /*false*/, + false, + has_silu_fc_input, + include_silu); + } + } + } + } + + AddStatis(found_subgraph_count); +} + +int SpatialTransformerResBlockXPUFusePass::FuseSpatialTransformerResBlock( + ir::Graph* graph, + bool conv_fix, + bool input_max, + bool has_silu_fc_input, + bool include_silu) const { + GraphPatternDetector gpd; + patterns::SpatialTransformerResBlockXPUPattern pattern(gpd.mutable_pattern(), + name_scope_, + conv_fix, + input_max, + has_silu_fc_input, + include_silu); + + int found_subgraph_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* graph) { + VLOG(4) << "handle SpatialTransformerResBlockXPUFusePass fuse"; + // declare operator node's name + GET_IR_NODE(gn_silu_0); + GET_IR_NODE(conv2d_0); + GET_IR_NODE(gn_silu_1); + GET_IR_NODE(conv2d_1); + GET_IR_NODE(conv2d_2); + // declare variable node's name + GET_IR_NODE(gn_silu_0_x); + GET_IR_NODE(gn_silu_0_bias); + GET_IR_NODE(gn_silu_0_scale); + GET_IR_NODE(gn_silu_0_out); + GET_IR_NODE(conv2d_0_bias); + GET_IR_NODE(conv2d_0_filter); + GET_IR_NODE(conv2d_0_filter_max); + GET_IR_NODE(conv2d_0_out); + GET_IR_NODE(conv2d_0_out_max); + GET_IR_NODE(gn_silu_1_bias); + GET_IR_NODE(gn_silu_1_scale); + GET_IR_NODE(gn_silu_1_out); + GET_IR_NODE(conv2d_1_bias); + GET_IR_NODE(conv2d_1_filter); + GET_IR_NODE(conv2d_1_filter_max); + GET_IR_NODE(conv2d_1_out); + GET_IR_NODE(conv2d_1_out_max); + GET_IR_NODE(conv2d_2_x_max); + GET_IR_NODE(conv2d_2_bias); + GET_IR_NODE(conv2d_2_filter); + GET_IR_NODE(conv2d_2_filter_max); + GET_IR_NODE(conv2d_2_out); + GET_IR_NODE(conv2d_2_out_max); + + auto* block = gn_silu_1->Op()->Block(); + auto* scope = param_scope(); + PADDLE_ENFORCE_NOT_NULL( + scope, platform::errors::InvalidArgument("Scope cannot be nullptr.")); + // delete useless node + std::unordered_set delete_nodes; + + std::vector> strides; + std::vector> paddings; + std::vector> dilations; + std::vector groups; + std::vector gn_eps; + std::vector gn_groups; + + // get attr + float gn_silu_0_eps = + PADDLE_GET_CONST(float, gn_silu_0->Op()->GetAttr("epsilon")); + gn_eps.emplace_back(std::move(gn_silu_0_eps)); + int gn_silu_0_groups = + PADDLE_GET_CONST(int, gn_silu_0->Op()->GetAttr("groups")); + gn_groups.emplace_back(std::move(gn_silu_0_groups)); + float gn_silu_1_eps = + PADDLE_GET_CONST(float, gn_silu_1->Op()->GetAttr("epsilon")); + gn_eps.emplace_back(std::move(gn_silu_1_eps)); + int gn_silu_1_groups = + PADDLE_GET_CONST(int, gn_silu_1->Op()->GetAttr("groups")); + gn_groups.emplace_back(std::move(gn_silu_1_groups)); + + // conv2d_0 + auto conv2d_0_dilations = PADDLE_GET_CONST( + std::vector, conv2d_0->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_0_dilations)); + int conv2d_0_groups = + PADDLE_GET_CONST(int, conv2d_0->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_0_groups)); + auto conv2d_0_paddings = + PADDLE_GET_CONST(std::vector, conv2d_0->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_0_paddings)); + std::string conv2d_0_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_0->Op()->GetAttr("padding_algorithm")); + auto conv2d_0_strides = + PADDLE_GET_CONST(std::vector, conv2d_0->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_0_strides)); + + // conv2d_1 + auto conv2d_1_dilations = PADDLE_GET_CONST( + std::vector, conv2d_1->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_1_dilations)); + int conv2d_1_groups = + PADDLE_GET_CONST(int, conv2d_1->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_1_groups)); + auto conv2d_1_paddings = + PADDLE_GET_CONST(std::vector, conv2d_1->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_1_paddings)); + std::string conv2d_1_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_1->Op()->GetAttr("padding_algorithm")); + auto conv2d_1_strides = + PADDLE_GET_CONST(std::vector, conv2d_1->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_1_strides)); + + std::vector conv_bias_names{conv2d_0_bias->Name(), + conv2d_1_bias->Name()}; + std::vector conv_filter_names{conv2d_0_filter->Name(), + conv2d_1_filter->Name()}; + std::vector conv_filter_max_names{conv2d_0_filter_max->Name(), + conv2d_1_filter_max->Name()}; + + // conv2d_2 + std::string conv2d_2_padding_algorithm; + if (conv_fix) { + auto conv2d_2_dilations = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("dilations")); + dilations.emplace_back(std::move(conv2d_2_dilations)); + int conv2d_2_groups = + PADDLE_GET_CONST(int, conv2d_2->Op()->GetAttr("groups")); + groups.emplace_back(std::move(conv2d_2_groups)); + auto conv2d_2_paddings = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("paddings")); + paddings.emplace_back(std::move(conv2d_2_paddings)); + conv2d_2_padding_algorithm = PADDLE_GET_CONST( + std::string, conv2d_2->Op()->GetAttr("padding_algorithm")); + auto conv2d_2_strides = PADDLE_GET_CONST( + std::vector, conv2d_2->Op()->GetAttr("strides")); + strides.emplace_back(std::move(conv2d_2_strides)); + + conv_bias_names.emplace_back(std::move(conv2d_2_bias->Name())); + conv_filter_names.emplace_back(std::move(conv2d_2_filter->Name())); + conv_filter_max_names.emplace_back( + std::move(conv2d_2_filter_max->Name())); + } + + std::string fused_op_out_name; + fused_op_out_name = conv2d_1_out->Name(); + // Generate add_layernorm fused op + framework::OpDesc fused_op_desc(block); + + fused_op_desc.SetType("spatial_transformer_resblock_xpu"); + // set attrs for fused op + fused_op_desc.SetInput("x", {gn_silu_0_x->Name()}); + + if (input_max) { + fused_op_desc.SetInput("x_max", {conv2d_2_x_max->Name()}); + } else { + fused_op_desc.SetInput("x_max", {}); + } + + fused_op_desc.SetInput("conv_bias", conv_bias_names); + fused_op_desc.SetInput("conv_filter", conv_filter_names); + fused_op_desc.SetInput("conv_filter_max", conv_filter_max_names); + fused_op_desc.SetInput("gn_bias", + {gn_silu_0_bias->Name(), gn_silu_1_bias->Name()}); + fused_op_desc.SetInput("gn_scale", + {gn_silu_0_scale->Name(), gn_silu_1_scale->Name()}); + fused_op_desc.SetOutput("out", {fused_op_out_name}); + fused_op_desc.SetOutput("out_max", {conv2d_1_out_max->Name()}); + + fused_op_desc.SetAttr("dilations", IntVec2DTo1D(dilations)); + fused_op_desc.SetAttr("paddings", IntVec2DTo1D(paddings)); + fused_op_desc.SetAttr("strides", IntVec2DTo1D(strides)); + fused_op_desc.SetAttr("groups", groups); + fused_op_desc.SetAttr("gn_eps", gn_eps); + fused_op_desc.SetAttr("gn_groups", gn_groups); + fused_op_desc.SetAttr("conv_fix", conv_fix); + fused_op_desc.SetAttr("has_silu_fc_input", has_silu_fc_input); + fused_op_desc.SetAttr("include_silu", include_silu); + + // relink fused op + auto* fused_op = graph->CreateOpNode(&fused_op_desc); + + IR_NODE_LINK_TO(gn_silu_0_x, fused_op); + IR_NODE_LINK_TO(gn_silu_0_bias, fused_op); + IR_NODE_LINK_TO(gn_silu_0_scale, fused_op); + IR_NODE_LINK_TO(conv2d_0_bias, fused_op); + IR_NODE_LINK_TO(conv2d_0_filter, fused_op); + IR_NODE_LINK_TO(conv2d_0_filter_max, fused_op); + IR_NODE_LINK_TO(gn_silu_1_bias, fused_op); + IR_NODE_LINK_TO(gn_silu_1_scale, fused_op); + IR_NODE_LINK_TO(conv2d_1_bias, fused_op); + IR_NODE_LINK_TO(conv2d_1_filter, fused_op); + IR_NODE_LINK_TO(conv2d_1_filter_max, fused_op); + + if (conv_fix) { + if (input_max) { + IR_NODE_LINK_TO(conv2d_2_x_max, fused_op); + } + IR_NODE_LINK_TO(conv2d_2_bias, fused_op); + IR_NODE_LINK_TO(conv2d_2_filter, fused_op); + IR_NODE_LINK_TO(conv2d_2_filter_max, fused_op); + } + + IR_NODE_LINK_TO(fused_op, conv2d_1_out); + IR_NODE_LINK_TO(fused_op, conv2d_1_out_max); + + delete_nodes.insert({gn_silu_0, + gn_silu_1, + conv2d_0, + conv2d_1, + gn_silu_0_out, + conv2d_0_out, + conv2d_0_out_max, + gn_silu_1_out}); + + if (conv_fix) { + delete_nodes.insert({conv2d_2, conv2d_2_out, conv2d_2_out_max}); + } + GraphSafeRemoveNodes(graph, delete_nodes); + found_subgraph_count++; + }; + + gpd(graph, handler); + return found_subgraph_count; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(spatial_transformer_resblock_xpu_fuse_pass, + paddle::framework::ir::SpatialTransformerResBlockXPUFusePass); + +REGISTER_PASS_CAPABILITY(spatial_transformer_resblock_xpu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().EQ( + "spatial_transformer_resblock_xpu", 0)); diff --git a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc index 8a319b8a350a0..381215b857303 100644 --- a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc +++ b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc @@ -164,7 +164,9 @@ void XPUQuantizeOpPass::QuantizeConv(ir::Graph* graph) const { out_var_node = output_node; } } - if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node})) { + if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node}) || + w_var_node->Var()->GetDataType() != + proto::VarType::Type::VarType_Type_INT8) { VLOG(4) << "Skip quantize op: " << n->Name() << "x_var_node_name:" << x_var_node->Name() << " w_var_node_name:" << w_var_node->Name(); @@ -239,8 +241,9 @@ void XPUQuantizeOpPass::QuantizeFC(ir::Graph* graph) const { out_var_node = output_node; } } - if (!AreScalesPresentForNodes(&var_quant_scales_, - {x_var_node, w_var_node})) { + if (!AreScalesPresentForNodes(&var_quant_scales_, {x_var_node}) || + w_var_node->Var()->GetDataType() != + proto::VarType::Type::VarType_Type_INT8) { MarkAndLogCannotQuantizeOp(n, "No scale available for the operator"); continue; } @@ -261,6 +264,71 @@ void XPUQuantizeOpPass::QuantizeFC(ir::Graph* graph) const { } } +void XPUQuantizeOpPass::QuantizeQkvAttention(ir::Graph* graph) const { + for (auto* n : graph->Nodes()) { + if (n->IsOp()) { + auto* op = n->Op(); + if (op->Type() != "qkv_attention_xpu") { + continue; + } + std::vector max_node_names = { + "q_max", "k_max", "v_max", "qk_max"}; + std::unordered_map input_node_map; + for (auto* input_node : n->inputs) { + if (!input_node->IsVar()) { + continue; + } + for (auto input_name : op->InputNames()) { + if (op->Input(input_name)[0] == input_node->Var()->Name()) { + input_node_map[input_name] = input_node; + } + } + } + bool continue_flag = false; + for (auto max_name : max_node_names) { + if (input_node_map.find(max_name) == input_node_map.end()) { + continue_flag = true; + break; + } + } + if (continue_flag) { + continue; + } + Node* out_var_node = nullptr; + for (auto* output_node : n->outputs) { + if (!output_node->IsVar()) { + continue; + } + if (output_node->Var()->Name() == op->Output("qkv")[0]) { + out_var_node = output_node; + } + } + if (input_node_map["q"]->Name() == input_node_map["k"]->Name() && + input_node_map["q"]->Name() == input_node_map["v"]->Name()) { + QuantizeInput(graph, n, input_node_map["q"], "q"); + op->SetInput("k", op->Input("q")); + op->SetInput("v", op->Input("q")); + UnlinkNodes(input_node_map["k"], n); + UnlinkNodes(input_node_map["v"], n); + } else { + QuantizeInput(graph, n, input_node_map["q"], "q"); + QuantizeInput(graph, n, input_node_map["k"], "k"); + QuantizeInput(graph, n, input_node_map["v"], "v"); + } + auto has_output_scale = + AreScalesPresentForNodes(&var_quant_scales_, {out_var_node}); + if (has_output_scale) { + DequantizeOutput(graph, n, out_var_node, "qkv"); + n->Op()->SetAttr( + "out_dtype", + static_cast(proto::VarType::Type::VarType_Type_INT8)); + } else { + n->Op()->SetAttr("out_dtype", + input_node_map["q"]->Var()->GetDataType()); + } + } + } +} void XPUQuantizeOpPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Insert quantize/dequantize op to the graph."; PADDLE_ENFORCE_NOT_NULL( @@ -273,6 +341,7 @@ void XPUQuantizeOpPass::ApplyImpl(ir::Graph* graph) const { GetQuantInfo(graph); QuantizeConv(graph); QuantizeFC(graph); + QuantizeQkvAttention(graph); } } // namespace ir diff --git a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h index 28d0f42e76bde..312b6a540c8cc 100644 --- a/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h +++ b/paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.h @@ -38,6 +38,7 @@ class XPUQuantizeOpPass : public FusePassBase { protected: void ApplyImpl(Graph* graph) const override; void QuantizeConv(Graph* graph) const; + void QuantizeQkvAttention(Graph* graph) const; void QuantizeFC(Graph* graph) const; private: diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 5dae6c1c84514..ccb5e1e5320d5 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/denormal.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #ifdef PADDLE_WITH_TENSORRT #include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h" @@ -32,9 +32,6 @@ #ifdef PADDLE_WITH_NVTX #include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h" #endif -#ifdef PADDLE_WITH_LITE -#include "paddle/fluid/operators/lite/lite_engine_op.h" -#endif namespace paddle { namespace framework { @@ -234,6 +231,20 @@ void NaiveExecutor::RegisterInputHook(const HookFunc &hookfunc) { } } +void NaiveExecutor::RegisterOutputHook(const PirHookFunc &hookfunc) { + pir_output_hookfuncs_.push_back(hookfunc); + if (interpreter_core_) { + interpreter_core_->SetOutputHooks(pir_output_hookfuncs_); + } +} + +void NaiveExecutor::RegisterInputHook(const PirHookFunc &hookfunc) { + pir_input_hookfuncs_.push_back(hookfunc); + if (interpreter_core_) { + interpreter_core_->SetInputHooks(pir_input_hookfuncs_); + } +} + void NaiveExecutor::MakeReusePlan( const std::unordered_map &reuse_table) { std::unordered_map> clusters; @@ -320,38 +331,7 @@ void NaiveExecutor::ResetTrtOps(int num) { #endif } -void NaiveExecutor::CloneLiteEngine(int num, void *stream) { -#ifdef PADDLE_WITH_LITE - for (auto &op : ops_) { - if (op->Type() == "lite_engine") { - operators::LiteEngineOp *lite_op = - dynamic_cast(op.get()); - PADDLE_ENFORCE_NOT_NULL( - lite_op, - phi::errors::InvalidArgument( - "lite_op(type: lite_engine) should be created.")); - std::string engine_key = lite_op->Attr("engine_key"); - std::string new_engine_key = engine_key + "_" + std::to_string(num); - PADDLE_ENFORCE( - paddle::inference::Singleton::Global() - .Has(engine_key), - phi::errors::InvalidArgument( - "lite_engine(key: %s) should be created.", engine_key)); - auto *lite_engine = - paddle::inference::Singleton::Global() - .Get(engine_key); - auto new_lite_engine = lite_engine->Clone(); -#ifdef LITE_SUBGRAPH_WITH_XPU - new_lite_engine->SetStream(TARGET(kXPU), stream); -#endif - paddle::inference::Singleton::Global() - .Set(new_engine_key, new_lite_engine); - lite_op->SetAttr("engine_key", new_engine_key); - lite_op->SetEngine(new_lite_engine.get()); - } - } -#endif -} +void NaiveExecutor::CloneLiteEngine(int num, void *stream) {} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index d36e3042b0b72..47f58924de144 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -45,6 +45,9 @@ class NaiveExecutor { public: using HookFunc = std::function; + using PirHookFunc = + std::function; + explicit NaiveExecutor(const platform::Place& place) : place_(place) {} ~NaiveExecutor(); @@ -94,6 +97,8 @@ class NaiveExecutor { void RegisterOutputHook(const HookFunc& hookfunc); void RegisterInputHook(const HookFunc& hookfunc); + void RegisterOutputHook(const PirHookFunc& hookfunc); + void RegisterInputHook(const PirHookFunc& hookfunc); private: void CreateOps(const ProgramDesc& desc, int block_id); @@ -107,6 +112,9 @@ class NaiveExecutor { std::vector output_hookfuncs_; std::vector input_hookfuncs_; + std::vector pir_output_hookfuncs_; + std::vector pir_input_hookfuncs_; + // Record information that tensor_a should ShareBufferWith tensor_b. std::unordered_map> reuse_cache_; diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index d06fdd8c4c7cd..01c6cd7c12a43 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -5,7 +5,7 @@ if(NOT (WITH_CINN)) ${CMAKE_CURRENT_SOURCE_DIR}/instruction/cinn_jit_instruction.cc) endif() -if(NOT WITH_MKLDNN) +if(NOT WITH_ONEDNN) list( REMOVE_ITEM standalone_executor_srcs @@ -54,6 +54,6 @@ cc_library( add_dependencies(standalone_executor xxhash framework_proto) -if(WITH_MKLDNN) - add_dependencies(standalone_executor mkldnn) +if(WITH_ONEDNN) + add_dependencies(standalone_executor onednn) endif() diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc index db8ef9f2de7bf..8bd67fe50d698 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc @@ -38,7 +38,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -198,6 +198,16 @@ IfInstruction::~IfInstruction() { } } +void IfInstruction::SetOutputHooks(const std::vector& hookfuncs) { + true_branch_inter_->SetOutputHooks(hookfuncs); + false_branch_inter_->SetOutputHooks(hookfuncs); +} + +void IfInstruction::SetInputHooks(const std::vector& hookfuncs) { + true_branch_inter_->SetInputHooks(hookfuncs); + false_branch_inter_->SetInputHooks(hookfuncs); +} + void IfInstruction::Run() { bool cond = true; if (cond_var_->IsType()) { diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h index cf0de0fc3581f..7667c9128a8a7 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h @@ -48,6 +48,10 @@ class IfInstruction : public InstructionBase { PirInterpreter* FalseBranchInterpreter() const { return false_branch_inter_; } + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + private: ::pir::Operation* op_; diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc index 56bf04227d49b..838f6dbce67b6 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/pylayer_instruction.cc @@ -37,7 +37,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc index ae8b0d1df2eee..1385f1d357a3d 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc @@ -38,7 +38,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -240,6 +240,16 @@ void WhileInstruction::ShareDatasToOutputs() { } } +void WhileInstruction::SetOutputHooks( + const std::vector& hookfuncs) { + body_inter_->SetOutputHooks(hookfuncs); +} + +void WhileInstruction::SetInputHooks( + const std::vector& hookfuncs) { + body_inter_->SetInputHooks(hookfuncs); +} + void WhileInstruction::Run() { #ifdef PADDLE_WITH_DNNL // Executor on being destroyed clears oneDNN cache and resets diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h index 849d4ec4d184d..b6f729a784f5a 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h @@ -50,6 +50,10 @@ class WhileInstruction : public InstructionBase { PirInterpreter* BodyInterpreter() const { return body_inter_.get(); } + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + private: // 'output' = 'input' void ShareInputsToOutputs(); diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc index 3bc5893a162b3..00b5410247ddc 100644 --- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc @@ -702,7 +702,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, &arguments); } #ifdef PADDLE_WITH_DNNL - // For input that is Extra, only MKLDNN will use Extra Inputs + // For input that is Extra, only OneDNN will use Extra Inputs auto& extra_input_names = paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap( op_with_kernel->Type()); diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 1e093f7247320..850a038ea790c 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -42,7 +42,7 @@ #include "paddle/phi/core/kernel_factory.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE diff --git a/paddle/fluid/framework/new_executor/interpreter/static_build.cc b/paddle/fluid/framework/new_executor/interpreter/static_build.cc index 131f756bdb1d3..ac58f499e91ca 100644 --- a/paddle/fluid/framework/new_executor/interpreter/static_build.cc +++ b/paddle/fluid/framework/new_executor/interpreter/static_build.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/platform/flags.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(cache_inference_while_scope); diff --git a/paddle/fluid/framework/new_executor/interpreter_base_impl.h b/paddle/fluid/framework/new_executor/interpreter_base_impl.h index e99a02f37136e..1a6fe75fc518a 100644 --- a/paddle/fluid/framework/new_executor/interpreter_base_impl.h +++ b/paddle/fluid/framework/new_executor/interpreter_base_impl.h @@ -104,13 +104,17 @@ class InterpreterBaseImpl { virtual void SetInputHooks(const std::vector& hookfuncs) = 0; + virtual void SetOutputHooks(const std::vector& hookfuncs) = 0; + + virtual void SetInputHooks(const std::vector& hookfuncs) = 0; + virtual std::shared_ptr> GetDependencyCount() const = 0; virtual bool IsSharedResultsBuild() const = 0; - virtual void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) = 0; + virtual void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) = 0; virtual bool IsStaticBuild() const = 0; diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 61151373b2a29..7bf78eed8b04e 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -139,6 +139,15 @@ void InterpreterCore::SetOutputHooks(const std::vector& hookfuncs) { impl_->SetOutputHooks(hookfuncs); } +void InterpreterCore::SetInputHooks(const std::vector& hookfuncs) { + impl_->SetInputHooks(hookfuncs); +} + +void InterpreterCore::SetOutputHooks( + const std::vector& hookfuncs) { + impl_->SetOutputHooks(hookfuncs); +} + void InterpreterCore::Build( const std::vector& feed_names, std::vector* op_func_nodes) { diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index f2b4426b8ebb2..39ad549a78455 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -14,6 +14,7 @@ #pragma once #include "paddle/fluid/framework/new_executor/interpreter_base_impl.h" +#include "paddle/fluid/framework/new_executor/new_executor_defs.h" PD_DECLARE_bool(new_executor_use_local_scope); @@ -88,6 +89,10 @@ class InterpreterCore { void SetInputHooks(const std::vector& hookfuncs); + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + void Build(const std::vector& feed_names, std::vector* op_func_nodes); diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index c416b151aef03..79619828980aa 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -40,9 +40,13 @@ COMMON_DECLARE_bool(dynamic_static_unified_comm); namespace paddle { namespace framework { +class InstructionBase; +class ValueExecutionInfo; using OpKernelComputeFunc = std::function; using HookFunc = std::function; +using PirHookFunc = + std::function; using SchedulingPriority = int64_t; diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index 03439ad6fd417..a8d525ee9e93b 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -37,7 +37,7 @@ #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.h" #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_instruction.h" #include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_instruction.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" @@ -723,8 +723,16 @@ void PirInterpreter::BuildInstruction() { } } else if (op.dialect()->name() == "pd_op") { if (op.isa()) { // NOLINT - vec_instruction_base_.emplace_back(std::make_unique( - op_idx++, place_, &op, value_exe_info_.get(), execution_config_)); + std::unique_ptr if_instr_ptr = + std::make_unique(op_idx++, + place_, + &op, + value_exe_info_.get(), + execution_config_); + if_instr_ptr->SetOutputHooks(pir_output_hookfuncs_); + if_instr_ptr->SetInputHooks(pir_input_hookfuncs_); + vec_instruction_base_.emplace_back(std::move(if_instr_ptr)); + sub_blocks_.insert( {&op.dyn_cast().true_block(), dynamic_cast(vec_instruction_base_.back().get()) @@ -742,8 +750,16 @@ void PirInterpreter::BuildInstruction() { vec_instruction_base_.back().get()) ->ForwardInterpreter()}); } else if (op.isa()) { - vec_instruction_base_.emplace_back(std::make_unique( - op_idx++, place_, &op, value_exe_info_.get(), execution_config_)); + std::unique_ptr while_instr_ptr = + std::make_unique(op_idx++, + place_, + &op, + value_exe_info_.get(), + execution_config_); + while_instr_ptr->SetOutputHooks(pir_output_hookfuncs_); + while_instr_ptr->SetInputHooks(pir_input_hookfuncs_); + vec_instruction_base_.emplace_back(std::move(while_instr_ptr)); + sub_blocks_.insert( {&op.dyn_cast().body(), dynamic_cast(vec_instruction_base_.back().get()) @@ -1183,7 +1199,9 @@ void PirInterpreter::CalculateLastLiveOps() { for (auto& item : ins_and_outs) { for (auto var_id : item.second) { // skip no_need_buffer input vars - if (ins.count(item.first) && instr->NoNeedBuffer().count(item.first)) { + if ((ins.count(item.first) && + instr->NoNeedBuffer().count(item.first)) || + instr->Name() == "builtin_combine_instruction") { continue; } gc_check_vars.insert(var_id); @@ -1764,6 +1782,13 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) { << " runs on " << platform::GetCurrentThreadName() << "\n" << "Before: " << cur_place << " " << instr_node->DebugStringEx(scope_, value_exe_info_.get()); + + if (execution_config_.used_for_inference) { + for (auto& hook : pir_input_hookfuncs_) { + hook(instr_node, value_exe_info_.get(), scope_); + } + } + if (!instr_node->IsArtificial()) { instr_node->Run(); @@ -1789,6 +1814,13 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) { VLOG(4) << "done CheckGC"; memory::LogDeviceMemoryStats(cur_place, instr_node->Name()); } + + if (execution_config_.used_for_inference) { + for (auto& hook : pir_output_hookfuncs_) { + hook(instr_node, value_exe_info_.get(), scope_); + } + } + VLOG(5) << "after run kernel"; instr_node->RecordEvent(cur_place); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -1897,7 +1929,8 @@ Variable* PirInterpreter::DebugVar(const std::string& name) const { void PirInterpreter::Build( const std::vector& feed_names, - std::vector* op_func_nodes) { + std::vector* op_func_nodes, + bool switch_stream) { PADDLE_THROW(platform::errors::Unimplemented( "Build is not implemented in PirInterpreter.")); } diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.h b/paddle/fluid/framework/new_executor/pir_interpreter.h index e28e418b9dd95..819bf7486d685 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.h +++ b/paddle/fluid/framework/new_executor/pir_interpreter.h @@ -96,12 +96,16 @@ class PirInterpreter : public InterpreterBaseImpl { const platform::Place& GetPlace() const override { return place_; } - void SetOutputHooks(const std::vector& hookfuncs) override { - output_hookfuncs_ = hookfuncs; + void SetOutputHooks(const std::vector& hookfuncs) override {} + + void SetInputHooks(const std::vector& hookfuncs) override {} + + void SetOutputHooks(const std::vector& hookfuncs) override { + pir_output_hookfuncs_ = hookfuncs; } - void SetInputHooks(const std::vector& hookfuncs) override { - input_hookfuncs_ = hookfuncs; + void SetInputHooks(const std::vector& hookfuncs) override { + pir_input_hookfuncs_ = hookfuncs; } std::string GetNameByValue(::pir::Value value) const; @@ -138,9 +142,9 @@ class PirInterpreter : public InterpreterBaseImpl { void CheckCUDAGraphBeforeRun(const std::vector& feed_names); void PrepareForCUDAGraphCapture(); - void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) override; + void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) override; bool IsStaticBuild() const override { return static_build_; } @@ -200,8 +204,8 @@ class PirInterpreter : public InterpreterBaseImpl { int64_t onednn_op_num_{-1}; std::vector trace_execute_order_; - std::vector output_hookfuncs_; - std::vector input_hookfuncs_; + std::vector pir_output_hookfuncs_; + std::vector pir_input_hookfuncs_; /// ======================== /// /// For new ir /// diff --git a/paddle/fluid/framework/new_executor/program_interpreter.cc b/paddle/fluid/framework/new_executor/program_interpreter.cc index 8991fd9c3a22d..0bca82f5016e1 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.cc +++ b/paddle/fluid/framework/new_executor/program_interpreter.cc @@ -29,7 +29,7 @@ #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/phi/backends/device_manager.h" @@ -150,7 +150,7 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, is_in_op_profiling_mode_ = enable_op_profiling; std::vector op_func_nodes; - Build(feed_names, &op_func_nodes); + Build(feed_names, &op_func_nodes, switch_stream); if (!is_build_) { SetFeedVarsInplaceSkip(feed_names); @@ -166,7 +166,7 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, } else { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (switch_stream) { - BuildOpFuncNode(&op_func_nodes); + Convert(&op_func_nodes); } #endif RunImpl(); @@ -208,7 +208,8 @@ FetchList ProgramInterpreter::Run(const std::vector& feed_names, void ProgramInterpreter::Build( const std::vector& feed_names, - std::vector* op_func_nodes) { + std::vector* op_func_nodes, + bool switch_stream) { SetDeviceId(place_); CheckCUDAGraphBeforeRun(feed_names); @@ -216,7 +217,7 @@ void ProgramInterpreter::Build( platform::AttachPointerHashToMKLDNNKey(this, place_); #endif - if (!is_build_) { + if (!is_build_ || switch_stream) { LOG_FIRST_N(INFO, 1) << "New Executor is Running."; paddle::framework::interpreter::BuildVariableScope( block_, execution_config_, &var_scope_); @@ -678,7 +679,42 @@ std::tuple ProgramInterpreter::InterpreterRunTime() { void ProgramInterpreter::Convert( std::vector* op_func_nodes) { auto& vec_meta_info = var_scope_.MutableVecMetaInfo(); - BuildOpFuncNode(op_func_nodes); + auto nodes = *op_func_nodes; + auto op_nums = nodes.size(); + vec_instruction_.clear(); + vec_instruction_.reserve(op_nums); + for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { + auto& op_func_node = nodes[op_idx]; + stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); + auto* dev_ctx_ = stream_analyzer_.ParseDeviceContext(op_func_node); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (FLAGS_new_executor_use_cuda_graph) { + auto& op = op_func_node.operator_base_; + auto& op_type = op->Type(); + if (op_type == interpreter::kMemcpyD2H || + op_type == interpreter::kMemcpyH2D) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cuda memory copy d2h/h2d is not allowed while using cuda graph.")); + } + PADDLE_ENFORCE_EQ(typeid(*dev_ctx_) == typeid(phi::GPUContext), + true, + platform::errors::InvalidArgument( + "Device context of op %s must be [%s] while using " + "cuda graph, but got [%s].", + op_type, + typeid(phi::GPUContext).name(), + typeid(*dev_ctx_).name())); + // cuda graph needs to record all stream + phi::backends::gpu::CUDAGraphContextManager::Instance() + .RecordCapturingDeviceContext(dev_ctx_); + } +#endif + vec_instruction_.emplace_back(op_idx, std::move(op_func_node), *dev_ctx_); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + vec_instruction_.back().UpdateRecordStreamForGcInfo(); +#endif + } BuildOperatorDependences(); @@ -715,7 +751,6 @@ void ProgramInterpreter::Convert( } // calculate last_live_ops_ - auto op_nums = (*op_func_nodes).size(); for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { Instruction& instr = vec_instruction_[op_idx]; OpInOutInfo info; @@ -852,46 +887,6 @@ void ProgramInterpreter::Convert( AnalyseExecuteOrderForTrace(); } -void ProgramInterpreter::BuildOpFuncNode( - std::vector* op_func_nodes) { - auto nodes = *op_func_nodes; - auto op_nums = nodes.size(); - vec_instruction_.clear(); - vec_instruction_.reserve(op_nums); - for (size_t op_idx = 0; op_idx < op_nums; ++op_idx) { - auto& op_func_node = nodes[op_idx]; - stream_analyzer_.SetForceEventsToWaitInfo(force_events_to_wait_); - auto* dev_ctx_ = stream_analyzer_.ParseDeviceContext(op_func_node); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (FLAGS_new_executor_use_cuda_graph) { - auto& op = op_func_node.operator_base_; - auto& op_type = op->Type(); - if (op_type == interpreter::kMemcpyD2H || - op_type == interpreter::kMemcpyH2D) { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Cuda memory copy d2h/h2d is not allowed while using cuda graph.")); - } - PADDLE_ENFORCE_EQ(typeid(*dev_ctx_) == typeid(phi::GPUContext), - true, - platform::errors::InvalidArgument( - "Device context of op %s must be [%s] while using " - "cuda graph, but got [%s].", - op_type, - typeid(phi::GPUContext).name(), - typeid(*dev_ctx_).name())); - // cuda graph needs to record all stream - phi::backends::gpu::CUDAGraphContextManager::Instance() - .RecordCapturingDeviceContext(dev_ctx_); - } -#endif - vec_instruction_.emplace_back(op_idx, std::move(op_func_node), *dev_ctx_); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - vec_instruction_.back().UpdateRecordStreamForGcInfo(); -#endif - } -} - void ProgramInterpreter::BuildSkipShareLoDInfo() { for (size_t i = 0; i < vec_instruction_.size(); ++i) { bool can_skip_lod = true; diff --git a/paddle/fluid/framework/new_executor/program_interpreter.h b/paddle/fluid/framework/new_executor/program_interpreter.h index 7e956249e22a3..f72faf54f2b1d 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.h +++ b/paddle/fluid/framework/new_executor/program_interpreter.h @@ -60,9 +60,9 @@ class ProgramInterpreter : public InterpreterBaseImpl { std::shared_ptr GetMutableCopyProgram() override; - void Build( - const std::vector& feed_names, - std::vector* op_func_nodes) override; + void Build(const std::vector& feed_names, + std::vector* op_func_nodes, + bool switch_stream = false) override; void ShareWorkQueueFrom(InterpreterBaseImpl* src) override; @@ -101,6 +101,10 @@ class ProgramInterpreter : public InterpreterBaseImpl { input_hookfuncs_ = hookfuncs; } + void SetOutputHooks(const std::vector& hookfuncs) override {} + + void SetInputHooks(const std::vector& hookfuncs) override {} + std::unordered_map>* GetForceEventsToWaitInfo() { return force_events_to_wait_; @@ -127,8 +131,6 @@ class ProgramInterpreter : public InterpreterBaseImpl { void BuildSkipShareLoDInfo(); void UpdateSyncOpNum(); void AnalyseExecuteOrderForTrace(); - void BuildOpFuncNode( - std::vector* op_func_nodes); // inplace void BuildInplace(); diff --git a/paddle/fluid/framework/new_executor/workqueue/event_count.h b/paddle/fluid/framework/new_executor/workqueue/event_count.h index 9f80b02904dad..6918cc5a42edd 100644 --- a/paddle/fluid/framework/new_executor/workqueue/event_count.h +++ b/paddle/fluid/framework/new_executor/workqueue/event_count.h @@ -121,7 +121,7 @@ class EventCount { CheckState(state, true); uint64_t newstate; if ((state & kSignalMask) != 0) { - // Consume the signal and return immidiately. + // Consume the signal and return immediately. newstate = state - kWaiterInc - kSignalInc; } else { // Remove this thread from pre-wait counter and add to the waiter stack. @@ -148,7 +148,7 @@ class EventCount { CheckState(state, true); uint64_t newstate = state - kWaiterInc; // We don't know if the thread was also notified or not, - // so we should not consume a signal unconditionaly. + // so we should not consume a signal unconditionally. // Only if number of waiters is equal to number of signals, // we know that the thread was notified and we must take away the signal. if (((state & kWaiterMask) >> kWaiterShift) == diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index ce0a138eb1a6a..4839592aa43b7 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -107,7 +107,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { bool ret = (l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r); #ifdef PADDLE_WITH_DNNL - // Layout transform needed for either non-MKLDNN to MKLDNN or vice versa + // Layout transform needed for either non-MKLDNN to OneDNN or vice versa ret |= (l != DataLayout::ONEDNN && r == DataLayout::ONEDNN); ret |= (l == DataLayout::ONEDNN && r != DataLayout::ONEDNN); #endif diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index fe10a16375f34..d5dab65d18d15 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -53,8 +53,8 @@ class DenseTensor; #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_op_list.h" +#include "paddle/fluid/platform/onednn_helper.h" +#include "paddle/fluid/platform/onednn_op_list.h" #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -504,7 +504,7 @@ void RuntimeInferShapeContext::ShareLoD(const std::string& in, // Workaround: // Skip set_layout() when input layout is kMKLDNN // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN - // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // OPKernel. In all OneDNN OPkernel, set_layout(kMKLDNN) should be called // in Compute() if (in_tensor.layout() != DataLayout::ONEDNN) #endif @@ -1571,12 +1571,12 @@ bool OperatorWithKernel::SupportsKernelType( } #endif -// NOTE(jiahongyu): If MKLDNN can be used, the function SupportsKernelType needs -// to check whether current op supports MKLDNN kernel. There are three +// NOTE(jiahongyu): If OneDNN can be used, the function SupportsKernelType needs +// to check whether current op supports OneDNN kernel. There are three // statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && this->CanMKLDNNBeUsed(exe_ctx, kernel_type.data_type_)) { @@ -1771,7 +1771,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // TODO(chenweihang): Now we are still reusing a lot of the original fluid // implementation, this is a gradual replacement process // TODO(chenweihang): in the first phase of project, we only support CPU, CUDA - // and RCOM backend, the XPU, NPU and MKLDNN will be supported in the second + // and RCOM backend, the XPU, NPU and OneDNN will be supported in the second // phase phi::KernelKey phi_kernel_key; std::string phi_kernel_name; @@ -1846,13 +1846,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } else { phi_kernel_name = kernel_signature_->name; -// NOTE(jiahongyu): The registered MKLDNN kernel have library_type = +// NOTE(jiahongyu): The registered OneDNN kernel have library_type = // LibraryType::kMKLDNN and data_layout_ = DataLayout::ONEDNN. But the default // values are kPlain, so we need to modify the library_type and data_layout_ // here. There are three statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && @@ -2121,7 +2121,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } if (FLAGS_enable_unused_var_check) { - // skip op that uses mkldnn because it has different memory reuse strategy. + // skip op that uses onednn because it has different memory reuse strategy. // use attr here because some GradMakers (like ActivationGradOpMaker) add // input when use_mkldnn=true; if (!(HasAttr("use_mkldnn") && Attr("use_mkldnn"))) { @@ -2181,12 +2181,12 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( framework::TransPhiKernelKeyToOpKernelType(phi_kernel_key); // NOTE(jiahongyu): PADDLE_WITH_DNNL codes are moved outside function -// GetExpectedKernelType, so that if MKLDNN can be used, the library_type_ and +// GetExpectedKernelType, so that if OneDNN can be used, the library_type_ and // data_layout_ of expected_kernel_key need to be adjusted. There are three // statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!this->DnnFallback() && !paddle::platform::in_mkldnn_white_list(type_) && this->CanMKLDNNBeUsed(ctx, expected_kernel_key.data_type_)) { @@ -2815,7 +2815,7 @@ Scope* OperatorWithKernel::PrepareData( prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input); } #ifdef PADDLE_WITH_DNNL - // For input that is Extra, only MKLDNN will use Extra Inputs + // For input that is Extra, only OneDNN will use Extra Inputs auto& extra_input_names = paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap( Type()); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 3ad9ec6c9d698..dc025998cc099 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -921,7 +921,7 @@ class OperatorWithKernel : public OperatorBase { mutable std::mutex cache_update_mutex_; mutable bool enable_cache_transfer_scope_ = false; // NOTE(jiahongyu): Whether fallback to plain kernel after calling - // GetExpectedKernelType, use this bool flag to solve mkldnn and cudnn hard + // GetExpectedKernelType, use this bool flag to solve onednn and cudnn hard // code mutable bool dnn_fallback_ = false; // NOTE(chenweihang): Similar op members are used to adapt to diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index 4a0a869b8a2bd..2e4e5083caa36 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -17,8 +17,8 @@ set(paddle2cinn_deps auto_schedule_proto parallel_executor common) -if(WITH_MKLDNN) - set(paddle2cinn ${paddle2cinn} mkldnn) +if(WITH_ONEDNN) + set(paddle2cinn ${paddle2cinn} onednn) endif() if(WITH_TESTING) diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 90567bd728cd5..ce4304b4ec228 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -36,7 +36,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/framework/paddle2cinn/cinn_subgraph_detector.h" -#include "paddle/fluid/operators/cinn/cinn_launch_op.h" +#include "paddle/fluid/operators/cinn/cinn_op_helper.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index 31ab7e1b1bcaa..4b5051a8aadd0 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -79,7 +79,8 @@ cc_library( layout_autotune ops_extra_info phi - common) + common + global_utils) cc_library( basic_engine SRCS basic_engine.cc diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5192e8c773888..a3c5b51b80b3b 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -27,7 +27,7 @@ #include "paddle/fluid/platform/profiler.h" #include "paddle/phi/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(use_mkldnn); diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index a60c81a4c22d9..9f4f46c60cea4 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -26,7 +26,7 @@ #include "paddle/fluid/platform/device/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_op_list.h" +#include "paddle/fluid/platform/onednn_op_list.h" #endif #include "paddle/common/flags.h" #include "paddle/fluid/framework/library_type.h" @@ -166,7 +166,7 @@ PreparedOp PrepareImpl( auto* dev_ctx = pool.Get(place); #ifdef PADDLE_WITH_DNNL - // MKLDNN variant of code reads attributes in some of GetKernelTypeForVar and + // OneDNN variant of code reads attributes in some of GetKernelTypeForVar and // GetKernelType functions, so we need to copy the attributes there. // Const qualifier of Attrs had to be discarded to overwrite it. if (FLAGS_use_mkldnn) { @@ -190,13 +190,13 @@ PreparedOp PrepareImpl( phi::KernelSignature kernel_signature; std::string phi_kernel_name; -// NOTE(jiahongyu): The registered MKLDNN kernel have library_type = +// NOTE(jiahongyu): The registered OneDNN kernel have library_type = // LibraryType::kMKLDNN and data_layout_ = DataLayout::ONEDNN. But the default // values are kPlain, so we need to modify the library_type and data_layout_ // here. There are three statements in if condition: -// 1. Whether mkldnn kernel fallbacks to plain kernel; +// 1. Whether onednn kernel fallbacks to plain kernel; // 2. Whether this op has specific implementation; -// 3. Whether mkldnn kernel can be used. +// 3. Whether onednn kernel can be used. #ifdef PADDLE_WITH_DNNL if (!op.DnnFallback() && !paddle::platform::in_mkldnn_white_list(op.Type()) && op.CanMKLDNNBeUsed(dygraph_exe_ctx, expected_kernel_key.dtype())) { diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 3eff589fee703..7aa4652ec0058 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -36,8 +36,8 @@ #include "paddle/utils/string/string_helper.h" COMMON_DECLARE_bool(use_mkldnn); -COMMON_DECLARE_string(tracer_mkldnn_ops_on); -COMMON_DECLARE_string(tracer_mkldnn_ops_off); +COMMON_DECLARE_string(tracer_onednn_ops_on); +COMMON_DECLARE_string(tracer_onednn_ops_off); COMMON_DECLARE_bool(use_stride_kernel); namespace paddle { @@ -245,12 +245,12 @@ void Tracer::TraceOpImpl(const std::string& type, // if both lists are empty all ops are enabled (default for // FLAGS_use_mkldnn=1) // if ops_on list is not empty only ops from that list are enabled - if (!FLAGS_tracer_mkldnn_ops_on.empty()) { - auto is_on = FLAGS_tracer_mkldnn_ops_on.find(type) != std::string::npos; + if (!FLAGS_tracer_onednn_ops_on.empty()) { + auto is_on = FLAGS_tracer_onednn_ops_on.find(type) != std::string::npos; attrs["use_mkldnn"] = is_on; } else { // if ops_on list is empty all ops are enabled except types from off_list - auto is_off = FLAGS_tracer_mkldnn_ops_off.find(type) != std::string::npos; + auto is_off = FLAGS_tracer_onednn_ops_off.find(type) != std::string::npos; attrs["use_mkldnn"] = !is_off; } } diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 77052155efaa6..1f3544bf702b4 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -132,7 +132,7 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("graph_viz_path", new std::string(std::move(dot_file_path))); pass->Set("optim_cache_dir", new std::string(std::move(optim_cache_dir))); pass_num++; - } else if (pass_name == "mkldnn_placement_pass") { + } else if (pass_name == "onednn_placement_pass") { pass->Set("mkldnn_enabled_op_types", new std::unordered_set( argument->mkldnn_enabled_op_types())); @@ -364,13 +364,13 @@ void IRPassManager::CreatePasses(Argument *argument, argument->nnadapter_model_cache_token())); } else if (pass_name == "fc_fuse_pass") { pass->Set("use_gpu", new bool(argument->use_gpu())); - bool fc_mkldnn_pass = false; + bool fc_onednn_pass = false; for (const std::string &pass_n : passes) { - if (pass_n == "fc_mkldnn_pass") { - fc_mkldnn_pass = true; + if (pass_n == "fc_onednn_pass") { + fc_onednn_pass = true; } } - bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding(); + bool use_fc_padding = !fc_onednn_pass && argument->use_fc_padding(); pass->Set("use_fc_padding", new bool(use_fc_padding)); } else if (pass_name == "fused_multi_transformer_xpu_pass") { int quant_post_dynamic_weight_precision = diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 65a4bea5b1240..c559b6d7e8897 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -4,12 +4,12 @@ endif() add_subdirectory(details) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(mkldnn_quantizer_cfg mkldnn_quantizer_config) - set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn_quantizer.cc) + set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/onednn_quantizer.cc) cc_library( ${mkldnn_quantizer_cfg} - SRCS mkldnn_quantizer_config.cc + SRCS onednn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder) set(mkldnn_quantizer_cfg ${mkldnn_quantizer_cfg} diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index efe7b83f7df16..b8570fa05e7c4 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -505,7 +505,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(dlnne_precision_mode_); CP_MEMBER(dlnne_disable_nodes_by_outputs_); CP_MEMBER(dlnne_input_shape_dict_); - // MKLDNN related. + // OneDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); CP_MEMBER(mkldnn_cache_capacity_); @@ -991,18 +991,18 @@ void AnalysisConfig::Update() { #ifdef PADDLE_WITH_DNNL // Since EnableMKLDNN is default, the pass_builder has created in the first // time. - // Case1: User manually disable mkldnn after pass_builder + // Case1: User manually disable onednn after pass_builder // create.(config.disable_mkldnn()) // Case2: User device is gpu/ipu/xpu, use // EnableXpu(), EnableCUDNN(), PassStrategy has been reset in the above code // block // Case3: pass_builder_ has been created and belongs to - // GpuPassStrategy(or IpuPassStrategy), neither enable mkldnn and - // disable mkldnn will be executed + // GpuPassStrategy(or IpuPassStrategy), neither enable onednn and + // disable onednn will be executed if ((!use_gpu() && !use_xpu() && !use_ipu() && !use_mkldnn_) || (use_mkldnn_ && !phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx2))) { - // User manually disable mkldnn or disable when not support AVX2 + // User manually disable onednn or disable when not support AVX2 use_mkldnn_ = false; pass_builder()->DisableMKLDNN(); } @@ -1054,7 +1054,7 @@ void AnalysisConfig::Update() { if (!use_gpu() && !use_xpu() && !use_ipu()) { if (use_mkldnn_ && enable_ir_optim_) { #ifdef PADDLE_WITH_DNNL - // default enable mkldnn when device is cpu and enable_ir_optim + // default enable onednn when device is cpu and enable_ir_optim pass_builder()->EnableMKLDNN(); #endif } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 9420d84bab558..d4a73175b3222 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -31,6 +31,7 @@ #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/naive_executor.h" +#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" @@ -79,7 +80,7 @@ #endif #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/inference/api/mkldnn_quantizer.h" +#include "paddle/fluid/inference/api/onednn_quantizer.h" #endif #ifdef PADDLE_WITH_ONNXRUNTIME @@ -107,6 +108,7 @@ #ifdef PADDLE_WITH_CINN #include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" #include "paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h" #include "paddle/pir/include/dialect/shape/ir/shape_dialect.h" #endif @@ -407,7 +409,7 @@ bool AnalysisPredictor::Init( root_predictor_id_ = predictor_id_; } - // no matter with or without MKLDNN + // no matter with or without OneDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); // Use Optimized model to inference @@ -618,6 +620,9 @@ void AnalysisPredictor::ClearExtraParams() { config_.shape_range_info_path_); } } + if (op_desc->HasAttr("predictor_id")) { + op_desc->SetAttr("predictor_id", predictor_id_); + } } } @@ -780,10 +785,18 @@ bool AnalysisPredictor::PrepareProgram( executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); // if enable_ir_optim_ is false, - // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will + // the analysis pass(op fuse, graph analysis, trt subgraph, onednn etc) will // not be executed. model_precision_ = paddle::inference::GetModelPrecision(*inference_program_); +#ifdef PADDLE_WITH_TENSORRT + if (config_.tensorrt_engine_enabled()) { + inference::tensorrt::TensorRTEngine::predictor_id_per_thread = + predictor_id_; + VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " + << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; + } +#endif if (config_.use_optimized_model_) { LoadParameters(); ClearExtraParams(); @@ -896,36 +909,35 @@ bool AnalysisPredictor::PrepareExecutor() { }; #ifdef PADDLE_WITH_CINN + auto CreatePassMgr = [&] { + pir::IrContext *ctx = pir::IrContext::Instance(); + ctx->GetOrRegisterDialect(); + ctx->GetOrRegisterDialect(); + auto pass_manager = std::make_shared<::pir::PassManager>( + ::pir::IrContext::Instance(), 2); + if (!config_.glog_info_disabled()) { + pass_manager->EnablePrintStatistics(); + } + if (config_.ir_debug_) { + pass_manager->EnableIRPrinting( + std::make_unique( + ir_printing_conditions, ir_printing_conditions)); + } + return pass_manager; + }; + if (paddle::prim::PrimCommonUtils::IsFwdPrimEnabled()) { VLOG(4) << "[Prim] Decomp program in predictor begin."; DecompProgram decomp_object(pir_program_.get()); decomp_object.decomp_program(); - auto shape_pm = std::make_shared<::pir::PassManager>( - ::pir::IrContext::Instance(), 2); - ::pir::shape::AddShapeOptimizationPass(shape_pm, *pir_program_.get()); - VLOG(4) << "[ShapeDialect] Run AddShapeOptimizationPass"; - shape_pm->Run(pir_program_.get()); + cinn::dialect::ir::CheckInferSymbolicIfNeed(pir_program_.get(), + CreatePassMgr); } if (config_.cinn_enabled()) { VLOG(4) << "[CINN] Begin ApplyCinnPass"; - cinn::dialect::ir::ApplyCinnPass(pir_program_.get(), [&] { - pir::IrContext *ctx = pir::IrContext::Instance(); - ctx->GetOrRegisterDialect(); - ctx->GetOrRegisterDialect(); - auto pass_manager = std::make_shared<::pir::PassManager>( - ::pir::IrContext::Instance(), 2); - if (!config_.glog_info_disabled()) { - pass_manager->EnablePrintStatistics(); - } - if (config_.ir_debug_) { - pass_manager->EnableIRPrinting( - std::make_unique( - ir_printing_conditions, ir_printing_conditions)); - } - return pass_manager; - }); + cinn::dialect::ir::ApplyCinnPass(pir_program_.get(), CreatePassMgr); } #endif @@ -934,16 +946,14 @@ bool AnalysisPredictor::PrepareExecutor() { config_.pm_opt_level_); if (!config_.custom_passes_.empty()) { for (const auto &custom_pass : config_.custom_passes_) { - pass_pm.AddPass( - std::move(pir::PassRegistry::Instance().Get(custom_pass))); + pass_pm.AddPass(pir::PassRegistry::Instance().Get(custom_pass)); } } if (config_.use_gpu()) { // gpu if (!config_.custom_pass_only_) { for (const auto &gpu_pass : kPirGpuPasses) { - pass_pm.AddPass( - std::move(pir::PassRegistry::Instance().Get(gpu_pass))); + pass_pm.AddPass(pir::PassRegistry::Instance().Get(gpu_pass)); } } @@ -963,8 +973,7 @@ bool AnalysisPredictor::PrepareExecutor() { // mkldnn if (!config_.custom_pass_only_) { for (const auto &mkldnn_pass : kPirMkldnnPasses) { - pass_pm.AddPass( - std::move(pir::PassRegistry::Instance().Get(mkldnn_pass))); + pass_pm.AddPass(pir::PassRegistry::Instance().Get(mkldnn_pass)); } } #endif @@ -972,8 +981,7 @@ bool AnalysisPredictor::PrepareExecutor() { // cpu if (!config_.custom_pass_only_) { for (const auto &cpu_pass : kPirCpuPasses) { - pass_pm.AddPass( - std::move(pir::PassRegistry::Instance().Get(cpu_pass))); + pass_pm.AddPass(pir::PassRegistry::Instance().Get(cpu_pass)); } } } @@ -2014,14 +2022,6 @@ void AnalysisPredictor::PrepareArgument() { // NOTE All the members in AnalysisConfig should be copied to Argument. void AnalysisPredictor::OptimizeInferenceProgram() { PrepareArgument(); -#ifdef PADDLE_WITH_TENSORRT - if (config_.tensorrt_engine_enabled()) { - inference::tensorrt::TensorRTEngine::predictor_id_per_thread = - predictor_id_; - VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " - << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; - } -#endif Analyzer().Run(argument_.get()); PADDLE_ENFORCE_EQ( argument_->scope_valid(), @@ -3108,49 +3108,99 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { exe.Run(save_program, scope(), 0, true, true); } -void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { - std::call_once(register_input_hook_flag_, [this] { - executor_->RegisterInputHook( - [this](framework::OperatorBase *op, framework::Scope *scope) { - for (auto &input : op->Inputs()) { - for (auto &var_name : input.second) { +void AnalysisPredictor::RegisterOutputHook( + const OutputTensorHookFunc &hookfunc) { + if (config_.new_ir_enabled()) { + std::call_once(register_output_hook_flag_, [this] { + executor_->RegisterOutputHook( + [this](framework::InstructionBase *instr, + framework::ValueExecutionInfo *value_exe_info, + framework::Scope *scope) { + for (auto &output : instr->Outputs()) { + auto var_name = value_exe_info->GetVarName(output.first); auto *var = scope->FindVar(var_name); if (!var || !var->IsType()) continue; auto dense_tensor = var->Get(); if (!dense_tensor.initialized()) continue; auto tensor = paddle::Tensor( std::make_shared(dense_tensor), var_name); - for (auto &hookfunc : this->input_hookfuncs_) { - hookfunc(op->Type(), var_name, tensor); + for (auto &hookfunc : this->output_hookfuncs_) { + hookfunc(instr->Name() + ":" + std::to_string(instr->Id()), + var_name, + tensor); } } - } - }); - }); - input_hookfuncs_.push_back(hookfunc); + }); + }); + output_hookfuncs_.push_back(hookfunc); + } else { + std::call_once(register_output_hook_flag_, [this] { + executor_->RegisterOutputHook( + [this](framework::OperatorBase *op, framework::Scope *scope) { + for (auto &output : op->Outputs()) { + for (auto &var_name : output.second) { + auto *var = scope->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = paddle::Tensor( + std::make_shared(dense_tensor), var_name); + for (auto &hookfunc : this->output_hookfuncs_) { + hookfunc(op->Type(), var_name, tensor); + } + } + } + }); + }); + output_hookfuncs_.push_back(hookfunc); + } } -void AnalysisPredictor::RegisterOutputHook( - const OutputTensorHookFunc &hookfunc) { - std::call_once(register_output_hook_flag_, [this] { - executor_->RegisterOutputHook( - [this](framework::OperatorBase *op, framework::Scope *scope) { - for (auto &output : op->Outputs()) { - for (auto &var_name : output.second) { +void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { + if (config_.new_ir_enabled()) { + std::call_once(register_input_hook_flag_, [this] { + executor_->RegisterInputHook( + [this](framework::InstructionBase *instr, + framework::ValueExecutionInfo *value_exe_info, + framework::Scope *scope) { + for (auto &input : instr->Inputs()) { + auto var_name = value_exe_info->GetVarName(input.first); auto *var = scope->FindVar(var_name); if (!var || !var->IsType()) continue; auto dense_tensor = var->Get(); if (!dense_tensor.initialized()) continue; auto tensor = paddle::Tensor( std::make_shared(dense_tensor), var_name); - for (auto &hookfunc : this->output_hookfuncs_) { - hookfunc(op->Type(), var_name, tensor); + for (auto &hookfunc : this->input_hookfuncs_) { + hookfunc(instr->Name() + ":" + std::to_string(instr->Id()), + var_name, + tensor); } } - } - }); - }); - output_hookfuncs_.push_back(hookfunc); + }); + }); + input_hookfuncs_.push_back(hookfunc); + } else { + std::call_once(register_input_hook_flag_, [this] { + executor_->RegisterInputHook( + [this](framework::OperatorBase *op, framework::Scope *scope) { + for (auto &input : op->Inputs()) { + for (auto &var_name : input.second) { + auto *var = scope->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = paddle::Tensor( + std::make_shared(dense_tensor), var_name); + for (auto &hookfunc : this->input_hookfuncs_) { + hookfunc(op->Type(), var_name, tensor); + } + } + } + }); + }); + input_hookfuncs_.push_back(hookfunc); + } } template <> @@ -3451,7 +3501,7 @@ uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); } void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) { predictor_->RegisterOutputHook(hookfunc); } -void Predictor::RegisterInputHook(const OutputTensorHookFunc &hookfunc) { +void Predictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { predictor_->RegisterInputHook(hookfunc); } diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index fe494cab93a90..d44ad5cec1a90 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -321,7 +321,7 @@ class AnalysisPredictor : public PaddlePredictor { void RegisterInputHook(const InputTensorHookFunc &hookfunc) override; /// - /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass + /// \brief Initialize onednn quantizer and execute onednn quantization pass /// /// \return Whether the function executed successfully /// diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 1ae582feb4acf..9ae284402f196 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -71,7 +71,7 @@ bool NativePaddlePredictor::Init( platform::EnableProfiler(tracking_device); } - // no matter with or without MKLDNN + // no matter with or without OneDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); if (config_.use_gpu) { diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 833fc98d36dba..5597057c3dc12 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -155,13 +155,13 @@ if(WITH_MKL) ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() - set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") - if(EXISTS ${MKLDNN_PATH}) - include_directories("${MKLDNN_PATH}/include") + set(ONEDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}onednn") + if(EXISTS ${ONEDNN_PATH}) + include_directories("${ONEDNN_PATH}/include") if(WIN32) - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + set(MKLDNN_LIB ${ONEDNN_PATH}/lib/mkldnn.lib) else() - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libdnnl.so.3) + set(MKLDNN_LIB ${ONEDNN_PATH}/lib/libdnnl.so.3) endif() endif() else() @@ -309,7 +309,7 @@ if(WIN32) ${LIB_PATH} COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy ${ONEDNN_PATH}/lib/mkldnn.dll ${LIB_PATH}) else() add_custom_command( diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc b/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc index c3589f4251791..fda408b15df5f 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc @@ -57,9 +57,10 @@ std::unique_ptr CreateTensor(paddle_infer::PlaceType place, template struct RandomGenerator { - RandomGenerator(double min = (std::numeric_limits::min)(), - double max = (std::numeric_limits::max)()) - : dist_{static_cast(min), static_cast(max)} {} + RandomGenerator( + double min = static_cast((std::numeric_limits::min)()), + double max = static_cast((std::numeric_limits::max)())) + : dist_{min, max} {} T operator()() { return static_cast(dist_(random_engine_)); } private: diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/onednn_quantizer.cc similarity index 99% rename from paddle/fluid/inference/api/mkldnn_quantizer.cc rename to paddle/fluid/inference/api/onednn_quantizer.cc index 76222b84d4624..aa6f52008ab24 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/onednn_quantizer.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/api/mkldnn_quantizer.h" +#include "paddle/fluid/inference/api/onednn_quantizer.h" #include #include @@ -29,7 +29,7 @@ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/common/place.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.h b/paddle/fluid/inference/api/onednn_quantizer.h similarity index 100% rename from paddle/fluid/inference/api/mkldnn_quantizer.h rename to paddle/fluid/inference/api/onednn_quantizer.h diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/onednn_quantizer_config.cc similarity index 98% rename from paddle/fluid/inference/api/mkldnn_quantizer_config.cc rename to paddle/fluid/inference/api/onednn_quantizer_config.cc index da20870eb0f5c..786d9463766e9 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/onednn_quantizer_config.cc @@ -14,7 +14,7 @@ #include -#include "paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h" +#include "paddle/fluid/inference/api/paddle_onednn_quantizer_config.h" namespace paddle { diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 72df8efb095a6..019418f45b625 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -39,7 +39,7 @@ #include "paddle_api.h" // NOLINT #include "paddle_pass_builder.h" // NOLINT #ifdef PADDLE_WITH_DNNL -#include "paddle_mkldnn_quantizer_config.h" // NOLINT +#include "paddle_onednn_quantizer_config.h" // NOLINT #endif namespace paddle { @@ -970,19 +970,19 @@ struct PD_INFER_DECL AnalysisConfig { void SwitchIrDebug(int x = true, const std::vector& passes = {}); /// - /// \brief Turn on MKLDNN. + /// \brief Turn on OneDNN. /// /// void EnableMKLDNN(); /// - /// \brief Turn down MKLDNN. + /// \brief Turn down OneDNN. /// /// void DisableMKLDNN(); /// - /// \brief Set the cache capacity of different input shapes for MKLDNN. + /// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -991,9 +991,9 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetMkldnnCacheCapacity(int capacity); /// - /// \brief A boolean state telling whether to use the MKLDNN. + /// \brief A boolean state telling whether to use the OneDNN. /// - /// \return bool Whether to use the MKLDNN. + /// \return bool Whether to use the OneDNN. /// bool mkldnn_enabled() const { return use_mkldnn_; } @@ -1021,7 +1021,7 @@ struct PD_INFER_DECL AnalysisConfig { /// NativeConfig ToNativeConfig() const; /// - /// \brief Specify the operator type list to use MKLDNN acceleration. + /// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param op_list The operator type list. /// @@ -1030,47 +1030,47 @@ struct PD_INFER_DECL AnalysisConfig { } /// - /// \brief Turn on MKLDNN quantization. + /// \brief Turn on OneDNN quantization. /// /// void EnableMkldnnQuantizer(); /// - /// \brief Turn on MKLDNN int8. + /// \brief Turn on OneDNN int8. /// /// \param op_list The operator type list. /// void EnableMkldnnInt8(const std::unordered_set& op_list = {}); /// - /// \brief A boolean state telling whether to use the MKLDNN Int8. + /// \brief A boolean state telling whether to use the OneDNN Int8. /// - /// \return bool Whether to use the MKLDNN Int8. + /// \return bool Whether to use the OneDNN Int8. /// bool mkldnn_int8_enabled() const { return use_mkldnn_int8_; } /// - /// \brief Turn on MKLDNN bfloat16. + /// \brief Turn on OneDNN bfloat16. /// /// void EnableMkldnnBfloat16(); /// - /// \brief Turn off MKLDNN fc passes. + /// \brief Turn off OneDNN fc passes. /// void DisableMkldnnFcPasses(); /// - /// \brief A boolean state telling whether to disable the MKLDNN Fc passes. + /// \brief A boolean state telling whether to disable the OneDNN Fc passes. /// - /// \return bool Whether to disable the MKLDNN Fc passes. + /// \return bool Whether to disable the OneDNN Fc passes. /// bool mkldnn_fc_passes_disabled() const { return disable_mkldnn_fc_passes_; } /// - /// \brief A boolean state telling whether to use the MKLDNN Bfloat16. + /// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// - /// \return bool Whether to use the MKLDNN Bfloat16. + /// \return bool Whether to use the OneDNN Bfloat16. /// bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; } @@ -1091,16 +1091,16 @@ struct PD_INFER_DECL AnalysisConfig { bool thread_local_stream_enabled() const { return thread_local_stream_; } /// - /// \brief A boolean state telling whether the MKLDNN quantization is enabled. + /// \brief A boolean state telling whether the OneDNN quantization is enabled. /// - /// \return bool Whether the MKLDNN quantization is enabled. + /// \return bool Whether the OneDNN quantization is enabled. /// bool mkldnn_quantizer_enabled() const { return use_mkldnn_quantizer_; } /// - /// \brief Get MKLDNN quantizer config. + /// \brief Get OneDNN quantizer config. /// - /// \return MkldnnQuantizerConfig* MKLDNN quantizer config. + /// \return MkldnnQuantizerConfig* OneDNN quantizer config. /// MkldnnQuantizerConfig* mkldnn_quantizer_config() const; @@ -1250,8 +1250,16 @@ struct PD_INFER_DECL AnalysisConfig { bool custom_pass_only = false); /// - /// \brief Set passmanager opt level.Pass level lower than - /// opt level which will be added to passmanager + /// \brief Set pir Optimization level. + /// \param opt_level The optimization level + /// The optimization Level in range [0,4], Default 2. + /// Higher optimization level allows the predictor to apply more passes. + /// If 0, Only basic pass support. + /// If 1, Additional support for functional pass. + /// If 2, Additional support the fusion logical pass,maybe affect precision + /// and speed. + /// If 3, support layout pass, etc. + /// If 4, add the radicaloptimization, maybe affect precision, etc. /// void SetOptimizationLevel(int opt_level); @@ -1419,7 +1427,7 @@ struct PD_INFER_DECL AnalysisConfig { // NNAdapter related LiteNNAdapterConfig nnadapter_config_; - // mkldnn related. + // onednn related. int mkldnn_cache_capacity_{10}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; diff --git a/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h b/paddle/fluid/inference/api/paddle_onednn_quantizer_config.h similarity index 99% rename from paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h rename to paddle/fluid/inference/api/paddle_onednn_quantizer_config.h index 1208c29c79a9c..c44f7a3e0d049 100644 --- a/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h +++ b/paddle/fluid/inference/api/paddle_onednn_quantizer_config.h @@ -53,7 +53,7 @@ enum class ScaleAlgo { /// /// \class MkldnnQuantizerConfig /// -/// \brief Config for mkldnn quantize. +/// \brief Config for onednn quantize. /// /// The MkldnnQuantizerConfig is used to configure Mkldnn's quantization /// parameters, including scale algorithm, warmup data, warmup batch size, diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 06f3d9d899659..e503f1133cb7b 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -228,7 +228,6 @@ const std::vector kCINNCompilerPasses{ "gpu_cpu_map_matmul_v2_to_mul_pass", "gpu_cpu_map_matmul_v2_to_matmul_pass", "gpu_cpu_map_matmul_to_mul_pass", - "build_cinn_pass", }; const std::vector CpuBasicPasses{ @@ -358,34 +357,34 @@ void CpuPassStrategy::EnableMKLDNN() { // TODO(Superjomn) Consider the way to mix CPU with GPU. #ifdef PADDLE_WITH_DNNL if (!use_mkldnn_) { - passes_.insert(passes_.begin(), "mkldnn_placement_pass"); + passes_.insert(passes_.begin(), "onednn_placement_pass"); for (auto &pass : std::vector({ "squeeze2_transpose2_onednn_fuse_pass", - "depthwise_conv_mkldnn_pass", // + "depthwise_conv_onednn_pass", // "conv_bn_fuse_pass", // Execute BN passes again to "conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order - "conv_affine_channel_mkldnn_fuse_pass", // + "conv_affine_channel_onednn_fuse_pass", // "conv_transpose_bn_fuse_pass", // "conv_transpose_eltwiseadd_bn_fuse_pass", // - "conv_bias_mkldnn_fuse_pass", // - "conv_transpose_bias_mkldnn_fuse_pass", + "conv_bias_onednn_fuse_pass", // + "conv_transpose_bias_onednn_fuse_pass", // TODO(baoachun): Need to support 5-dimensional input. - // "conv3d_bias_mkldnn_fuse_pass", // - "conv_elementwise_add_mkldnn_fuse_pass", - "conv_activation_mkldnn_fuse_pass", // + // "conv3d_bias_onednn_fuse_pass", // + "conv_elementwise_add_onednn_fuse_pass", + "conv_activation_onednn_fuse_pass", // "scale_matmul_fuse_pass", // - "reshape_transpose_matmul_mkldnn_fuse_pass", // - "matmul_transpose_reshape_mkldnn_fuse_pass", // - "matmul_elementwise_add_mkldnn_fuse_pass", // - "matmul_activation_mkldnn_fuse_pass", // + "reshape_transpose_matmul_onednn_fuse_pass", // + "matmul_transpose_reshape_onednn_fuse_pass", // + "matmul_elementwise_add_onednn_fuse_pass", // + "matmul_activation_onednn_fuse_pass", // // Disabled due to topology-dependent speed-up - "fc_mkldnn_pass", - "fc_act_mkldnn_fuse_pass", + "fc_onednn_pass", + "fc_act_onednn_fuse_pass", "self_attention_fuse_pass", // "batch_norm_act_fuse_pass", // "softplus_activation_onednn_fuse_pass", // - "shuffle_channel_mkldnn_detect_pass", // + "shuffle_channel_onednn_detect_pass", // "elementwise_act_onednn_fuse_pass", // "operator_scale_onednn_fuse_pass", // "operator_unsqueeze2_onednn_fuse_pass", // @@ -419,8 +418,8 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { void CpuPassStrategy::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_DNNL if (!use_mkldnn_bfloat16_) { - passes_.emplace_back("fc_mkldnn_pass"); - passes_.emplace_back("fc_act_mkldnn_fuse_pass"); + passes_.emplace_back("fc_onednn_pass"); + passes_.emplace_back("fc_act_onednn_fuse_pass"); passes_.emplace_back("cpu_bfloat16_placement_pass"); passes_.emplace_back("cpu_bfloat16_pass"); @@ -437,8 +436,8 @@ void CpuPassStrategy::EnableMkldnnInt8() { if (!use_mkldnn_int8_) { passes_.clear(); passes_.emplace_back("simplify_with_basic_ops_pass"); - passes_.emplace_back("quant_dequant_mkldnn_pass"); - passes_.emplace_back("mkldnn_placement_pass"); + passes_.emplace_back("quant_dequant_onednn_pass"); + passes_.emplace_back("onednn_placement_pass"); passes_.emplace_back("constant_folding_pass"); passes_.emplace_back("squeeze2_transpose2_onednn_fuse_pass"); passes_.emplace_back("layer_norm_fuse_pass"); @@ -462,27 +461,27 @@ void CpuPassStrategy::EnableMkldnnInt8() { passes_.emplace_back("matmul_scale_fuse_pass"); passes_.emplace_back("gpu_cpu_map_matmul_to_mul_pass"); passes_.emplace_back("repeated_fc_relu_fuse_pass"); - passes_.emplace_back("depthwise_conv_mkldnn_pass"); + passes_.emplace_back("depthwise_conv_onednn_pass"); passes_.emplace_back("conv_bn_fuse_pass"); passes_.emplace_back("conv_eltwiseadd_bn_fuse_pass"); - passes_.emplace_back("conv_affine_channel_mkldnn_fuse_pass"); + passes_.emplace_back("conv_affine_channel_onednn_fuse_pass"); passes_.emplace_back("conv_transpose_bn_fuse_pass"); passes_.emplace_back("conv_transpose_eltwiseadd_bn_fuse_pass"); - passes_.emplace_back("conv_bias_mkldnn_fuse_pass"); - passes_.emplace_back("conv_transpose_bias_mkldnn_fuse_pass"); - passes_.emplace_back("conv_elementwise_add_mkldnn_fuse_pass"); - passes_.emplace_back("conv_activation_mkldnn_fuse_pass"); + passes_.emplace_back("conv_bias_onednn_fuse_pass"); + passes_.emplace_back("conv_transpose_bias_onednn_fuse_pass"); + passes_.emplace_back("conv_elementwise_add_onednn_fuse_pass"); + passes_.emplace_back("conv_activation_onednn_fuse_pass"); passes_.emplace_back("fc_fuse_pass"); passes_.emplace_back("repeated_fc_relu_fuse_pass"); - passes_.emplace_back("fc_mkldnn_pass"); - passes_.emplace_back("fc_act_mkldnn_fuse_pass"); - passes_.emplace_back("matmul_transpose_reshape_mkldnn_fuse_pass"); + passes_.emplace_back("fc_onednn_pass"); + passes_.emplace_back("fc_act_onednn_fuse_pass"); + passes_.emplace_back("matmul_transpose_reshape_onednn_fuse_pass"); passes_.emplace_back("batch_norm_act_fuse_pass"); passes_.emplace_back("softplus_activation_onednn_fuse_pass"); - passes_.emplace_back("compute_propagate_scales_mkldnn_pass"); + passes_.emplace_back("compute_propagate_scales_onednn_pass"); passes_.emplace_back("scale_matmul_fuse_pass"); - passes_.emplace_back("reshape_transpose_matmul_mkldnn_fuse_pass"); - passes_.emplace_back("matmul_elementwise_add_mkldnn_fuse_pass"); + passes_.emplace_back("reshape_transpose_matmul_onednn_fuse_pass"); + passes_.emplace_back("matmul_elementwise_add_onednn_fuse_pass"); passes_.emplace_back("operator_scale_onednn_fuse_pass"); passes_.emplace_back("operator_unsqueeze2_onednn_fuse_pass"); passes_.emplace_back("operator_reshape2_onednn_fuse_pass"); @@ -510,7 +509,7 @@ void CpuPassStrategy::DisableMkldnnFcPasses() { void CpuPassStrategy::EraseFcMkldnnPasses() { std::vector fc_passes_to_erase( - {"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"}); + {"fc_onednn_pass", "fc_act_onednn_fuse_pass"}); for (const auto &pass : fc_passes_to_erase) { int idx = static_cast(GetPassIndex(pass)); if (idx != -1) { @@ -538,6 +537,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "cast_embedding_trans_ids_to_int32_pass", "delete_elementwise_mul_op_pass", "generate_sequence_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass", "embedding_with_eltwise_add_xpu_fuse_pass", "qk_qkv_attention_xpu_fuse_pass", "multi_encoder_xpu_fuse_pass", @@ -545,6 +545,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "multi_encoder_xpu_slice_fuse_pass", "fused_multi_transformer_cachekv_layout_trans_pass", "fused_multi_transformer_int8_cachekv_layout_trans_pass", + "cross_attention_xpu_fuse_pass", "decoder_attention_xpu_fuse_pass", "one_beam_size_fuse_pass", "fold_interp_outsize_fuse_pass", @@ -586,6 +587,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "xpu_quantize_op_pass", "xpu_quantize_squash_pass", "link_xpu_op_max_pass", + "spatial_transformer_resblock_xpu_fuse_pass", "delete_isolated_node_pass", "inplace_op_var_pass", }); @@ -611,22 +613,31 @@ const std::vector kPirGpuPasses{ "fc_elementwise_layernorm_fuse_pass", "matmul_scale_fuse_pass", "matmul_transpose_fuse_pass", - "transpose_flatten_concat_fuse_pass"}; + "transpose_flatten_concat_fuse_pass", + "remove_redundant_transpose_pass"}; const std::vector kPirXpuPasses{// Functional pass "map_op_to_another_pass", "identity_op_clean_pass", // Operator fusion pass - "add_layernorm_xpu_fuse_pass"}; + "add_layernorm_xpu_fuse_pass", + "group_norm_silu_xpu_fuse_pass"}; const std::vector kPirMkldnnPasses{ + "depthwise_conv_onednn_pass", + "squeeze_transpose_onednn_fuse_pass", "conv2d_bias_fuse_pass", "conv2d_transpose_bias_fuse_pass", "conv3d_bias_fuse_pass", "batch_norm_act_fuse_pass", + "scale_matmul_fuse_pass", + "reshape_transpose_matmul_fuse_pass", + "matmul_transpose_reshape_fuse_pass", "matmul_elementwise_add_fuse_pass", "matmul_activation_fuse_pass", - "conv_elementwise_add_mkldnn_fuse_pass"}; + "conv_elementwise_add_onednn_fuse_pass", + "conv_activation_onednn_fuse_pass", + "conv_concat_activation_onednn_fuse_pass"}; const std::vector kPirCpuPasses{}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 79ef68c853cfb..013fb8d477924 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -43,7 +43,7 @@ namespace paddle { /// Example Usage: /// Build a new pass. /// \code{cpp} -/// const vector passes(1, "conv_relu_mkldnn_fuse_pass"); +/// const vector passes(1, "conv_relu_onednn_fuse_pass"); /// PaddlePassBuilder builder(passes); /// \endcode class PD_INFER_DECL PaddlePassBuilder { @@ -139,24 +139,24 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \brief Enable the use of cuDNN kernel. virtual void EnableCUDNN() {} - /// \brief Enable the use of MKLDNN. - /// The MKLDNN control exists in both CPU and GPU mode, because there can + /// \brief Enable the use of OneDNN. + /// The OneDNN control exists in both CPU and GPU mode, because there can /// still be some CPU kernels running in GPU mode. virtual void EnableMKLDNN() {} - /// \brief Disable the use of MKLDNN. + /// \brief Disable the use of OneDNN. virtual void DisableMKLDNN() {} - /// \brief Enable MKLDNN quantize optimization. + /// \brief Enable OneDNN quantize optimization. virtual void EnableMkldnnQuantizer() {} - /// \brief Enable MKLDNN bfloat16. + /// \brief Enable OneDNN bfloat16. virtual void EnableMkldnnBfloat16() {} - /// \brief Enable MKLDNN int8. + /// \brief Enable OneDNN int8. virtual void EnableMkldnnInt8() {} - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. virtual void DisableMkldnnFcPasses() {} /// \brief Check if we are using gpu. @@ -214,26 +214,26 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy { /// \brief Enable the use of cuDNN kernel. void EnableCUDNN() override; - /// \brief Enable the use of MKLDNN. + /// \brief Enable the use of OneDNN. void EnableMKLDNN() override; - /// \brief Disable the use of MKLDNN. + /// \brief Disable the use of OneDNN. void DisableMKLDNN() override; - /// \brief Enable MKLDNN quantize optimization. + /// \brief Enable OneDNN quantize optimization. void EnableMkldnnQuantizer() override; - /// \brief Enable MKLDNN bfloat16. + /// \brief Enable OneDNN bfloat16. void EnableMkldnnBfloat16() override; - /// \brief Enable MKLDNN int8. + /// \brief Enable OneDNN int8. void EnableMkldnnInt8() override; - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. void DisableMkldnnFcPasses() override; protected: - /// \brief Erase MKLDNN fc passes. + /// \brief Erase OneDNN fc passes. void EraseFcMkldnnPasses(); /// \cond Protected @@ -276,7 +276,7 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { /// \brief Not supported in GPU mode yet. void EnableMkldnnInt8() override; - /// \brief Disable MKLDNN fc passes. + /// \brief Disable OneDNN fc passes. void DisableMkldnnFcPasses() override; /// \brief Default destructor. diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h index 427e9b95ac499..f1bfe828cbcf2 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.h +++ b/paddle/fluid/inference/capi_exp/pd_config.h @@ -526,14 +526,14 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigLiteEngineEnabled( PADDLE_CAPI_EXPORT extern void PD_ConfigSwitchIrDebug( __pd_keep PD_Config* pd_config, PD_Bool x); /// -/// \brief Turn on MKLDNN. +/// \brief Turn on OneDNN. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMKLDNN( __pd_keep PD_Config* pd_config); /// -/// \brief Set the cache capacity of different input shapes for MKLDNN. +/// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -544,10 +544,10 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMKLDNN( PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnCacheCapacity( __pd_keep PD_Config* pd_config, int32_t capacity); /// -/// \brief A boolean state telling whether to use the MKLDNN. +/// \brief A boolean state telling whether to use the OneDNN. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN. +/// \return Whether to use the OneDNN. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnEnabled( __pd_keep PD_Config* pd_config); @@ -570,7 +570,7 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigSetCpuMathLibraryNumThreads( PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGetCpuMathLibraryNumThreads( __pd_keep PD_Config* pd_config); /// -/// \brief Specify the operator type list to use MKLDNN acceleration. +/// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param[in] pd_config config /// \param[in] ops_num The number of operator type list. @@ -579,32 +579,32 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGetCpuMathLibraryNumThreads( PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnOp( __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list); /// -/// \brief Turn on MKLDNN quantization. +/// \brief Turn on OneDNN quantization. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnQuantizer( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether the MKLDNN quantization is enabled. +/// \brief A boolean state telling whether the OneDNN quantization is enabled. /// /// \param[in] pd_config config -/// \return Whether the MKLDNN quantization is enabled. +/// \return Whether the OneDNN quantization is enabled. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnQuantizerEnabled( __pd_keep PD_Config* pd_config); /// -/// \brief Turn on MKLDNN bfloat16. +/// \brief Turn on OneDNN bfloat16. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnBfloat16( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether to use the MKLDNN Bfloat16. +/// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN Bfloat16. +/// \return Whether to use the OneDNN Bfloat16. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled( __pd_keep PD_Config* pd_config); @@ -617,17 +617,17 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled( PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op( __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list); /// -/// \brief Turn on MKLDNN int8. +/// \brief Turn on OneDNN int8. /// /// \param[in] pd_config config /// PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnInt8( __pd_keep PD_Config* pd_config); /// -/// \brief A boolean state telling whether to use the MKLDNN int8. +/// \brief A boolean state telling whether to use the OneDNN int8. /// /// \param[in] pd_config config -/// \return Whether to use the MKLDNN int8. +/// \return Whether to use the OneDNN int8. /// PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnInt8Enabled( __pd_keep PD_Config* pd_config); diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp index 0d585f938be8c..b4cf4a0953169 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp +++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp @@ -122,7 +122,7 @@ Java_com_baidu_paddle_inference_Config_cpuMathLibraryNumThreads( return mathThreadsNum; } -// 5. MKLDNN settings +// 5. OneDNN settings JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMKLDNN( JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) { diff --git a/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java b/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java index a312cc73fde22..e9bef0d271f05 100644 --- a/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java +++ b/paddle/fluid/inference/experimental/javaapi/src/main/java/com/baidu/paddle/inference/Config.java @@ -208,7 +208,7 @@ public void resetCppPaddleConfigPointer() { private native int cpuMathLibraryNumThreads(long cppPaddleConfigPointer); - // 5. MKLDNN settings + // 5. OneDNN settings private native void enableMKLDNN(long cppPaddleConfigPointer); diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go index 9d0a1e5864418..c2e2b410e4061 100644 --- a/paddle/fluid/inference/goapi/config.go +++ b/paddle/fluid/inference/goapi/config.go @@ -554,14 +554,14 @@ func (config *Config) SwitchIrDebug(x bool) { } /// -/// \brief Turn on MKLDNN. +/// \brief Turn on OneDNN. /// func (config *Config) EnableMKLDNN() { C.PD_ConfigEnableMKLDNN(config.c) } /// -/// \brief Set the cache capacity of different input shapes for MKLDNN. +/// \brief Set the cache capacity of different input shapes for OneDNN. /// Default value 0 means not caching any shape. /// Please see MKL-DNN Data Caching Design Document: /// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md @@ -573,9 +573,9 @@ func (config *Config) SetMkldnnCacheCapacity(capacity int32) { } /// -/// \brief A boolean state telling whether to use the MKLDNN. +/// \brief A boolean state telling whether to use the OneDNN. /// -/// \return bool Whether to use the MKLDNN. +/// \return bool Whether to use the OneDNN. /// func (config *Config) MkldnnEnabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnEnabled(config.c)) @@ -609,7 +609,7 @@ func (config *Config) CpuMathLibraryNumThreads() int32 { // NativeConfig ToNativeConfig() const; /// -/// \brief Specify the operator type list to use MKLDNN acceleration. +/// \brief Specify the operator type list to use OneDNN acceleration. /// /// \param opList The operator type list. /// @@ -627,23 +627,23 @@ func (config *Config) SetMKLDNNOp(opList []string) { } /// -/// \brief Turn on MKLDNN quantization. +/// \brief Turn on OneDNN quantization. /// func (config *Config) EnableMkldnnQuantizer() { C.PD_ConfigEnableMkldnnQuantizer(config.c) } /// -/// \brief Turn on MKLDNN bfloat16. +/// \brief Turn on OneDNN bfloat16. /// func (config *Config) EnableMkldnnBfloat16() { C.PD_ConfigEnableMkldnnBfloat16(config.c) } /// -/// \brief A boolean state telling whether to use the MKLDNN Bfloat16. +/// \brief A boolean state telling whether to use the OneDNN Bfloat16. /// -/// \return bool Whether to use the MKLDNN Bfloat16. +/// \return bool Whether to use the OneDNN Bfloat16. /// func (config *Config) MkldnnBfloat16Enabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnBfloat16Enabled(config.c)) @@ -677,9 +677,9 @@ func (config *Config) ThreadLocalStreamEnabled() bool { } /// -/// \brief A boolean state telling whether the MKLDNN quantization is enabled. +/// \brief A boolean state telling whether the OneDNN quantization is enabled. /// -/// \return bool Whether the MKLDNN quantization is enabled. +/// \return bool Whether the OneDNN quantization is enabled. /// func (config *Config) MkldnnQuantizerEnabled() bool { return cvtPDBoolToGo(C.PD_ConfigMkldnnQuantizerEnabled(config.c)) diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index f41a25fe9717c..d3d9174e84c48 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -2023,6 +2023,19 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber { const OpInputInfoList& input_infos, pir::Block* block) override { std::vector op_inputs; + if (op_desc.HasInput("ValueTensor", true) && + op_desc.Input("ValueTensor", true).size() > 0) { + auto value_tensor_vars = op_desc.Input("ValueTensor", true); + auto defining_info = (*param_map)[value_tensor_vars[0]]; + op_inputs.push_back(defining_info.value); + } else { + float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); + pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value); + auto defining_op = + InsertFullOperationForAttributeInput(ctx, block, new_attr); + op_inputs.push_back(defining_op->result(0)); + } + if (op_desc.HasInput("ShapeTensor", true) && op_desc.Input("ShapeTensor", true).size() > 0) { auto shape_tensor_vars = op_desc.Input("ShapeTensor", true); @@ -2044,18 +2057,6 @@ struct FillConstant2FullWithTensorTranscriber : public OpTranscriber { op_inputs.push_back(defining_op->result(0)); } - if (op_desc.HasInput("ValueTensor", true) && - op_desc.Input("ValueTensor", true).size() > 0) { - auto value_tensor_vars = op_desc.Input("ValueTensor", true); - auto defining_info = (*param_map)[value_tensor_vars[0]]; - op_inputs.push_back(defining_info.value); - } else { - float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); - pir::Attribute new_attr = pir::FloatAttribute::get(ctx, value); - auto defining_op = - InsertFullOperationForAttributeInput(ctx, block, new_attr); - op_inputs.push_back(defining_op->result(0)); - } return op_inputs; } diff --git a/paddle/fluid/ir_adaptor/translator/utils.cc b/paddle/fluid/ir_adaptor/translator/utils.cc index 07bbb644c6b72..4015d358930b4 100644 --- a/paddle/fluid/ir_adaptor/translator/utils.cc +++ b/paddle/fluid/ir_adaptor/translator/utils.cc @@ -95,7 +95,7 @@ std::vector CheckUnregisteredOperationInBlock( OpTranscriber general_handler; try { general_handler.LookUpOpInfo(ctx, *op); - } catch (pir::IrNotMetException& e) { + } catch (common::enforce::EnforceNotMet& e) { unregistered_ops.push_back(op->Type()); } } diff --git a/paddle/fluid/jit/engine/interpreter_engine.cc b/paddle/fluid/jit/engine/interpreter_engine.cc index e8f622641c33b..7d575ff838f4f 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.cc +++ b/paddle/fluid/jit/engine/interpreter_engine.cc @@ -52,11 +52,11 @@ void InterpreterEngine::CreateInterpreterCore() { framework::ir::PassRegistry::Instance().Get("delete_dropout_op_x_pass"); pass->Apply(&graph); #ifdef PADDLE_WITH_DNNL - auto mkldnn_pass = - framework::ir::PassRegistry::Instance().Get("mkldnn_placement_pass"); - mkldnn_pass->Set("mkldnn_enabled_op_types", + auto onednn_pass = + framework::ir::PassRegistry::Instance().Get("onednn_placement_pass"); + onednn_pass->Set("mkldnn_enabled_op_types", new std::unordered_set({})); - mkldnn_pass->Apply(&graph); + onednn_pass->Apply(&graph); #endif GraphToProgram(graph, &converted_prog_, nullptr); diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 18bd48d0cd2e1..2fd782875856e 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -2,12 +2,12 @@ add_subdirectory(allocation) file(GLOB fluid_memory_srcs "*.cc") -if(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) +if(WITH_ONEDNN) + set(ONEDNN_CTX_DEPS onednn) else() - set(MKLDNN_CTX_DEPS) + set(ONEDNN_CTX_DEPS) endif() -set(fluid_memory_deps place enforce common allocator ${MKLDNN_CTX_DEPS}) +set(fluid_memory_deps place enforce common allocator ${ONEDNN_CTX_DEPS}) cc_library( fluid_memory diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 693c1cd47b0de..9d960845198f7 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -147,7 +147,10 @@ static T&& FillValue(T&& allocation) { #if defined(PADDLE_WITH_CUDA) if (allocation != nullptr) { if (FLAGS_sync_after_alloc || FLAGS_alloc_fill_value >= 0) { - PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); + bool need_sync = !platform::is_cpu_place(allocation->place()); + if (need_sync) { + PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); + } if (FLAGS_alloc_fill_value >= 0) { VLOG(10) << "Set " << FLAGS_alloc_fill_value << " on " << allocation->ptr() << " " << allocation->place() << " " @@ -159,7 +162,9 @@ static T&& FillValue(T&& allocation) { std::memset( allocation->ptr(), FLAGS_alloc_fill_value, allocation->size()); } - PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); + if (need_sync) { + PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); + } } } } diff --git a/paddle/fluid/memory/allocation/custom_allocator.cc b/paddle/fluid/memory/allocation/custom_allocator.cc index b4c3ebe1b2926..36848ff9cf0b0 100644 --- a/paddle/fluid/memory/allocation/custom_allocator.cc +++ b/paddle/fluid/memory/allocation/custom_allocator.cc @@ -16,6 +16,10 @@ #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/trace_event.h" + +COMMON_DECLARE_bool(custom_device_mem_record); namespace paddle { namespace memory { @@ -33,6 +37,14 @@ void CustomAllocator::FreeImpl(phi::Allocation* allocation) { phi::DeviceManager::GetDeviceWithPlace(place_)->MemoryDeallocate( allocation->ptr(), allocation->size()); } + if (FLAGS_custom_device_mem_record) { + DEVICE_MEMORY_STAT_UPDATE( + Reserved, place_.GetDeviceId(), -allocation->size()); + platform::RecordMemEvent(allocation->ptr(), + place_, + allocation->size(), + platform::TracerMemEventType::ReservedFree); + } delete allocation; } @@ -42,6 +54,11 @@ phi::Allocation* CustomAllocator::AllocateImpl(size_t size) { void* ptr = phi::DeviceManager::GetDeviceWithPlace(place_)->MemoryAllocate(size); if (LIKELY(ptr)) { + if (FLAGS_custom_device_mem_record) { + DEVICE_MEMORY_STAT_UPDATE(Reserved, place_.GetDeviceId(), size); + platform::RecordMemEvent( + ptr, place_, size, platform::TracerMemEventType::ReservedAllocate); + } return new Allocation(ptr, size, place_); } diff --git a/paddle/fluid/memory/allocation/mmap_allocator.cc b/paddle/fluid/memory/allocation/mmap_allocator.cc index a4a05df1dcaa9..f9647032a6a59 100644 --- a/paddle/fluid/memory/allocation/mmap_allocator.cc +++ b/paddle/fluid/memory/allocation/mmap_allocator.cc @@ -54,11 +54,14 @@ struct CountInfo { std::atomic refcount; }; -void AllocateMemoryMap( - std::string filename, int flags, size_t size, void **map_ptr_, int *fd_) { +void AllocateMemoryMap(std::string filename, + int *shared_fd, + int flags, + size_t size, + void **map_ptr_) { // TODO(@ZHUI): support win32 int file_flags = 0; - int fd = -1; + int fd = *shared_fd; if (flags & MAPPED_SHAREDMEM) { file_flags = O_RDWR | O_CREAT; } else { @@ -71,7 +74,7 @@ void AllocateMemoryMap( file_flags &= ~O_CREAT; } - if (!(flags & MAPPED_FROMFD)) { + if (!(flags & MAPPED_FROMFD) && fd == -1) { if (flags & MAPPED_SHAREDMEM) { fd = shm_open(filename.c_str(), file_flags, (mode_t)0600); PADDLE_ENFORCE_NE( @@ -83,8 +86,6 @@ void AllocateMemoryMap( VLOG(6) << "shm_open: " << filename; MemoryMapFdSet::Instance().Insert(filename); } - } else { - fd = -1; } PADDLE_ENFORCE_EQ(ftruncate(fd, size), @@ -98,32 +99,38 @@ void AllocateMemoryMap( *map_ptr_ = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); } + if (flags & MAPPED_UNLINK) { + VLOG(6) << "shm_unlink: " << filename; + shm_unlink(filename.c_str()); + } + PADDLE_ENFORCE_NE(*map_ptr_, MAP_FAILED, platform::errors::Unavailable( "Memory map failed when create shared memory.")); - if (flags & MAPPED_KEEPFD) { - *fd_ = fd; + *shared_fd = fd; + VLOG(6) << "keep fd: " << *shared_fd; } else { PADDLE_ENFORCE_NE(::close(fd), -1, platform::errors::Unavailable( "Error closing memory mapped file <", filename, ">")); - *fd_ = -1; + *shared_fd = -1; } } std::shared_ptr AllocateRefcountedMemoryMapAllocation(std::string filename, + int shared_fd, int flags, size_t size, int buffer_id) { - int fd = -1; + int fd = shared_fd; void *base_ptr = nullptr; if (buffer_id == -1) { - AllocateMemoryMap(filename, flags, size + mmap_alignment, &base_ptr, &fd); + AllocateMemoryMap(filename, &fd, flags, size + mmap_alignment, &base_ptr); VLOG(4) << "Create and mmap a new shm: " << filename; } else { base_ptr = MemoryMapAllocationPool::Instance().GetById(buffer_id).mmap_ptr_; @@ -132,7 +139,7 @@ AllocateRefcountedMemoryMapAllocation(std::string filename, void *aligned_base_ptr = static_cast(static_cast(base_ptr) + mmap_alignment); return std::make_shared( - aligned_base_ptr, size, filename, flags, fd, buffer_id); + aligned_base_ptr, size, filename, fd, flags, buffer_id); } RefcountedMemoryMapAllocation::RefcountedMemoryMapAllocation( @@ -145,11 +152,22 @@ RefcountedMemoryMapAllocation::RefcountedMemoryMapAllocation( : MemoryMapAllocation(ptr, size, ipc_name, fd, flags) { // must reset base ptr first. buffer_id_ = buffer_id; + fd_ = fd; + flags_ = flags; resetBaseptr(); initializeRefercount(); } void MemoryMapAllocation::close() { + if (!closed_fd_) { + closed_fd_ = true; + if (flags_ & MAPPED_KEEPFD) { + PADDLE_ENFORCE_NE(::close(fd_), + -1, + platform::errors::Unavailable( + "Error closing file descriptor <", fd_, ">")); + } + } if (closed_) { return; } @@ -193,6 +211,15 @@ void RefcountedMemoryMapAllocation::close() { void *data = map_ptr_; CountInfo *info = reinterpret_cast(data); --info->refcount; + if (flags_ & MAPPED_KEEPFD) { + closed_fd_ = true; + PADDLE_ENFORCE_NE(::close(fd_), + -1, + platform::errors::Unavailable( + "Error closing file descriptor <", fd_, ">")); + VLOG(6) << "close fd: " << fd_; + } + if (FLAGS_use_shm_cache && buffer_id_ != -1) { return; } else { @@ -260,6 +287,7 @@ std::shared_ptr AllocateMemoryMapWriterAllocation( const std::string &ipc_name = GetIPCName(); int flags = O_RDWR | O_CREAT; int fd = shm_open(ipc_name.c_str(), flags, 0600); + PADDLE_ENFORCE_NE(fd, -1, platform::errors::Unavailable( @@ -283,7 +311,6 @@ std::shared_ptr RebuildMemoryMapReaderAllocation( const std::string &ipc_name, size_t size) { int flags = O_RDWR | O_CREAT; flags &= ~O_CREAT; - int fd = shm_open(ipc_name.c_str(), flags, 0600); PADDLE_ENFORCE_NE(fd, -1, diff --git a/paddle/fluid/memory/allocation/mmap_allocator.h b/paddle/fluid/memory/allocation/mmap_allocator.h index 412e3a3545769..64a3ae9de7658 100644 --- a/paddle/fluid/memory/allocation/mmap_allocator.h +++ b/paddle/fluid/memory/allocation/mmap_allocator.h @@ -44,13 +44,17 @@ enum MappedModes { class MemoryMapAllocation : public Allocation { public: - explicit MemoryMapAllocation(void *ptr, size_t size, std::string ipc_name) + explicit MemoryMapAllocation(void *ptr, + size_t size, + std::string ipc_name, + int fd) : Allocation(ptr, size, platform::CPUPlace()), ipc_name_(std::move(ipc_name)), + fd_(fd), map_ptr_(ptr), map_size_(size) {} explicit MemoryMapAllocation( - void *ptr, size_t size, std::string ipc_name, int flags, int fd) + void *ptr, size_t size, std::string ipc_name, int fd, int flags) : Allocation(ptr, size, platform::CPUPlace()), ipc_name_(std::move(ipc_name)), fd_(fd), @@ -59,6 +63,7 @@ class MemoryMapAllocation : public Allocation { map_size_(size) {} inline const std::string &ipc_name() const { return ipc_name_; } + inline const int shared_fd() const { return fd_; } virtual void close(); @@ -71,6 +76,7 @@ class MemoryMapAllocation : public Allocation { void *map_ptr_ = nullptr; size_t map_size_ = 0; bool closed_ = false; + bool closed_fd_ = false; }; class RefcountedMemoryMapAllocation : public MemoryMapAllocation { @@ -93,11 +99,15 @@ class RefcountedMemoryMapAllocation : public MemoryMapAllocation { void resetBaseptr(); }; -void AllocateMemoryMap( - std::string filename, int flags, size_t size, void **base_ptr_, int *fd_); +void AllocateMemoryMap(std::string filename, + int *shared_fd, + int flags, + size_t size, + void **base_ptr_); std::shared_ptr AllocateRefcountedMemoryMapAllocation(std::string filename, + int shared_fd, int flags, size_t size, int buffer_id = -1); @@ -111,11 +121,13 @@ class MemoryMapWriterAllocation : public Allocation { ipc_name_(std::move(ipc_name)) {} inline const std::string &ipc_name() const { return ipc_name_; } + inline const int shared_fd() const { return fd_; } ~MemoryMapWriterAllocation() override; private: std::string ipc_name_; + int fd_ = -1; }; class MemoryMapReaderAllocation : public Allocation { @@ -127,11 +139,13 @@ class MemoryMapReaderAllocation : public Allocation { ipc_name_(std::move(ipc_name)) {} inline const std::string &ipc_name() const { return ipc_name_; } + inline const int shared_fd() const { return fd_; } ~MemoryMapReaderAllocation() override; private: std::string ipc_name_; + int fd_ = -1; }; std::shared_ptr AllocateMemoryMapWriterAllocation( diff --git a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc index 218068aeb9c97..bbc0915fe10ce 100644 --- a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.cc @@ -50,35 +50,11 @@ void StreamSafeCustomDeviceAllocation::RecordStream( outstanding_event_map_[stream]->Init(place()); VLOG(9) << "Create a new event " << outstanding_event_map_[stream]->raw_event(); - auto stream_wrapper = phi::stream::Stream(place(), stream); - VLOG(8) << "Record event " << outstanding_event_map_[stream]->raw_event() - << " to stream " << stream; - outstanding_event_map_[stream]->Record(&stream_wrapper); - } -} - -void StreamSafeCustomDeviceAllocation::MarkAsWillBeFreed() { - std::lock_guard lock_guard(outstanding_event_map_lock_); - if (!will_be_freed_) { - will_be_freed_ = false; - VLOG(8) << "ptr: " << ptr() << " will be freed"; - if (phi::DeviceManager::HasDeviceType(place_.GetDeviceType()) && - outstanding_event_map_.find(owning_stream_) == - outstanding_event_map_.end()) { - std::call_once(once_flag_, - [this] { phi::DeviceManager::SetDevice(place_); }); - outstanding_event_map_.insert( - {owning_stream_, std::make_shared()}); - outstanding_event_map_[owning_stream_]->Init(place_); - VLOG(9) << "Create a new event " - << outstanding_event_map_[owning_stream_]->raw_event(); - auto stream_wrapper = phi::stream::Stream(place_, owning_stream_); - VLOG(8) << "Record event " - << outstanding_event_map_[owning_stream_]->raw_event() - << " to stream " << owning_stream_; - outstanding_event_map_[owning_stream_]->Record(&stream_wrapper); - } } + auto stream_wrapper = phi::stream::Stream(place(), stream); + VLOG(8) << "Record event " << outstanding_event_map_[stream]->raw_event() + << " to stream " << stream; + outstanding_event_map_[stream]->Record(&stream_wrapper); } bool StreamSafeCustomDeviceAllocation::CanBeFreed() { @@ -190,7 +166,6 @@ void StreamSafeCustomDeviceAllocator::FreeImpl(phi::Allocation* allocation) { phi::DeviceContextPool::Instance().Get(place_)) ->stream()); } - stream_safe_cuda_allocation->MarkAsWillBeFreed(); if (stream_safe_cuda_allocation->CanBeFreed()) { VLOG(9) << "Directly delete allocation"; delete stream_safe_cuda_allocation; diff --git a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h index 75f25fc0cfc2a..6d6bea9b2535c 100644 --- a/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h +++ b/paddle/fluid/memory/allocation/stream_safe_custom_device_allocator.h @@ -37,7 +37,6 @@ class StreamSafeCustomDeviceAllocation : public Allocation { void RecordStream(phi::stream::stream_t stream); bool CanBeFreed(); - void MarkAsWillBeFreed(); phi::stream::stream_t GetOwningStream() const; void SetOwningStream(phi::stream::stream_t s); diff --git a/paddle/fluid/memory/allocation/system_allocator.cc b/paddle/fluid/memory/allocation/system_allocator.cc index 8fd7967e9752d..a6e19b84ba8d1 100644 --- a/paddle/fluid/memory/allocation/system_allocator.cc +++ b/paddle/fluid/memory/allocation/system_allocator.cc @@ -41,6 +41,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/mem_tracing.h" COMMON_DECLARE_bool(use_pinned_memory); +COMMON_DECLARE_bool(custom_device_mem_record); COMMON_DECLARE_double(fraction_of_gpu_memory_to_use); COMMON_DECLARE_uint64(initial_gpu_memory_in_mb); COMMON_DECLARE_uint64(reallocate_gpu_memory_in_mb); @@ -298,6 +299,11 @@ void* CustomAllocator::Alloc(size_t* index, size_t size) { VLOG(4) << "CustomAllocator::Alloc " << p << " size " << size; *index = 0; plug_alloc_size += size; + if (FLAGS_custom_device_mem_record) { + DEVICE_MEMORY_STAT_UPDATE(Reserved, dev_id_, size); + platform::RecordMemEvent( + p, place, size, platform::TracerMemEventType::ReservedAllocate); + } } else { size_t avail, total; @@ -332,6 +338,11 @@ void CustomAllocator::Free(void* p, size_t size, size_t index) { auto place = platform::CustomPlace(dev_type_, dev_id_); auto device = phi::DeviceManager::GetDeviceWithPlace(place); device->MemoryDeallocate(p, size); + if (FLAGS_custom_device_mem_record) { + DEVICE_MEMORY_STAT_UPDATE(Reserved, dev_id_, size); + platform::RecordMemEvent( + p, place, size, platform::TracerMemEventType::ReservedFree); + } } bool CustomAllocator::UseGpu() const { return true; } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 5d03c833a87c7..9126023d389be 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -19,12 +19,10 @@ add_subdirectory(controlflow) add_subdirectory(detection) add_subdirectory(elementwise) add_subdirectory(fused) -add_subdirectory(metrics) add_subdirectory(optimizers) add_subdirectory(reduce_ops) add_subdirectory(sequence_ops) add_subdirectory(string) -add_subdirectory(prim_ops) if(WITH_DISTRIBUTE) @@ -35,8 +33,6 @@ if (WITH_PSCORE) add_subdirectory(pscore) endif() -add_subdirectory(amp) - add_subdirectory(reader) if (NOT WIN32) @@ -51,10 +47,6 @@ if (WITH_DLNNE) add_subdirectory(dlnne) endif() -if (WITH_LITE) - add_subdirectory(lite) -endif() - if(WITH_CINN) add_subdirectory(cinn) endif() diff --git a/paddle/fluid/operators/activation_op.cu.h b/paddle/fluid/operators/activation_op.cu.h index d9b1545abce4c..37fd511d7de17 100644 --- a/paddle/fluid/operators/activation_op.cu.h +++ b/paddle/fluid/operators/activation_op.cu.h @@ -13,9 +13,9 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" -#include "paddle/fluid/platform/bfloat16.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/kernels/funcs/activation_functor.h" namespace paddle { diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 38432f8768f59..399ea6963dd0b 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -30,7 +30,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/activation_functor.h" @@ -53,13 +53,13 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, phi::DenseTensor** Out) { auto x_var = context.InputVar("X"); auto out_var = context.OutputVar("Out"); - PADDLE_ENFORCE_NOT_NULL(x_var, - platform::errors::NotFound( - "Cannot get input Variable X, variable name = %s", - context.InputName("X"))); + PADDLE_ENFORCE_NOT_NULL( + x_var, + phi::errors::NotFound("Cannot get input Variable X, variable name = %s", + context.InputName("X"))); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get output Variable Out, variable name = %s", context.OutputName("Out"))); if (CanBeUsedBySelectedRows.count(context.Type())) { @@ -73,9 +73,9 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, PADDLE_ENFORCE_NOT_NULL( *Out, - platform::errors::NotFound("Cannot get the tensor from the Variable " - "Output(Out), variable name = %s", - context.OutputName("Out"))); + phi::errors::NotFound("Cannot get the tensor from the Variable " + "Output(Out), variable name = %s", + context.OutputName("Out"))); } template @@ -94,23 +94,21 @@ inline void ExtractActivationGradTensor( out_var = context.InputVar("Out"); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get input Variable Out, variable name = %s", context.InputName("Out"))); } PADDLE_ENFORCE_NOT_NULL( out_grad_var, - platform::errors::NotFound( - "Cannot get input Variable %s, variable name = %s", - framework::GradVarName("Out"), - context.InputName(framework::GradVarName("Out")))); + phi::errors::NotFound("Cannot get input Variable %s, variable name = %s", + framework::GradVarName("Out"), + context.InputName(framework::GradVarName("Out")))); PADDLE_ENFORCE_NOT_NULL( x_grad_var, - platform::errors::NotFound( - "Cannot get output Variable %s, variable name = %s", - framework::GradVarName("X"), - context.OutputName(framework::GradVarName("X")))); + phi::errors::NotFound("Cannot get output Variable %s, variable name = %s", + framework::GradVarName("X"), + context.OutputName(framework::GradVarName("X")))); if (CanBeUsedBySelectedRows.count(context.Type())) { *dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar( @@ -137,19 +135,19 @@ inline void ExtractActivationGradTensor( } } - PADDLE_ENFORCE_NOT_NULL(*dX, - platform::errors::NotFound( - "Cannot get the tensor from the Variable " - "Output(Out), variable name = %s", - context.OutputName(framework::GradVarName("X")))); + PADDLE_ENFORCE_NOT_NULL( + *dX, + phi::errors::NotFound("Cannot get the tensor from the Variable " + "Output(Out), variable name = %s", + context.OutputName(framework::GradVarName("X")))); if (static_cast(kDepValue) & static_cast(ActBwdOpFwdDeps::kDepX)) { auto x_var = context.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::NotFound("Cannot get the tensor from the " - "Variable Input(X), variable name = %s", - context.InputName("X"))); + phi::errors::NotFound("Cannot get the tensor from the " + "Variable Input(X), variable name = %s", + context.InputName("X"))); if (CanBeUsedBySelectedRows.count(context.Type())) { *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var); } else { @@ -384,16 +382,15 @@ inline void ExtractDoubleGradTensorWithInputDOut( auto ddo_var = ctx.OutputVar("DDOut"); PADDLE_ENFORCE_NOT_NULL( ddx_var, - platform::errors::NotFound( - "Cannot get input Variable Out, variable name = %s", - ctx.InputName("DDX"))); + phi::errors::NotFound("Cannot get input Variable Out, variable name = %s", + ctx.InputName("DDX"))); *ddX = ctx.Input("DDX"); if (ddo_var) { *ddOut = ctx.Output("DDOut"); } PADDLE_ENFORCE_NOT_NULL( ddX, - platform::errors::NotFound( + phi::errors::NotFound( "Cannot get the tensor from the Variable DDX, variable name = %s", ctx.OutputName("DDX"))); @@ -401,9 +398,8 @@ inline void ExtractDoubleGradTensorWithInputDOut( auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::NotFound( - "Cannot get input Variable Out, variable name = %s", - ctx.InputName("X"))); + phi::errors::NotFound("Cannot get input Variable Out, variable name = %s", + ctx.InputName("X"))); auto dx_var = ctx.OutputVar("DX"); *X = ctx.Input("X"); if (dx_var) { diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index ad24d37b90d81..13d6f7449f6dd 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -72,7 +72,7 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GE( alpha, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute 'alpha' must be greater than or equal to 0.0.")); }); AddAttr("beta", "The scale of Position Embedding.") @@ -81,7 +81,7 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GE( beta, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute 'beta' must be greater than or equal to 0.0.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/add_position_encoding_op.h b/paddle/fluid/operators/add_position_encoding_op.h index 4547f6321a01d..009e40efeae38 100644 --- a/paddle/fluid/operators/add_position_encoding_op.h +++ b/paddle/fluid/operators/add_position_encoding_op.h @@ -41,7 +41,7 @@ class AddPositionEncodingKernel : public framework::OpKernel { if (x_lod.empty()) { PADDLE_ENFORCE_EQ(x_dim.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s dimension of AddPositionEncodingOp " "should be equal to " "3, but received %d. ", @@ -52,14 +52,14 @@ class AddPositionEncodingKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(x_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s dimension of AddPositionEncodingOp " "should be equal to " "2, but received %d. ", x_dim.size())); PADDLE_ENFORCE_EQ(x_lod.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input(X)'s lod level of AddPositionEncodingOp " "should be equal to " "1, but received %d. ", @@ -70,13 +70,13 @@ class AddPositionEncodingKernel : public framework::OpKernel { enc_size = x_dim[1]; } - PADDLE_ENFORCE_EQ(enc_size % 2, - 0, - platform::errors::InvalidArgument( - "The input(X)'s feature size of " - "AddPositionEncodingOp only support even, " - "but received an odd number: %d. ", - enc_size)); + PADDLE_ENFORCE_EQ( + enc_size % 2, + 0, + phi::errors::InvalidArgument("The input(X)'s feature size of " + "AddPositionEncodingOp only support even, " + "but received an odd number: %d. ", + enc_size)); const int half_size = enc_size / 2; for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index f44c181cca097..b80672216efe3 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -80,13 +80,13 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dims.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Scale) must be 1," "But received the dimensions of Input(Scale) is [%d] ", scale_dims.size())); PADDLE_ENFORCE_EQ(b_dims.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Bias) must be 1," "But received the dimensions of Input(Bias) is [%d] ", scale_dims.size())); @@ -94,7 +94,7 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dims[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension value of Input(Scale) must be [%d]," "But received [%d].", C, @@ -104,7 +104,7 @@ class AffineChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension value of Input(Bias) must be [%d]," "But received [%d].", C, diff --git a/paddle/fluid/operators/affine_channel_op_xpu.cc b/paddle/fluid/operators/affine_channel_op_xpu.cc index 799bb87cf9892..9024dab8f98c2 100644 --- a/paddle/fluid/operators/affine_channel_op_xpu.cc +++ b/paddle/fluid/operators/affine_channel_op_xpu.cc @@ -70,7 +70,7 @@ class AffineChannelXPUKernel : public framework::OpKernel { dev_ctx.x_context(), x_d, scale_d, y_d, x_shape, b_shape); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_mul XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -78,7 +78,7 @@ class AffineChannelXPUKernel : public framework::OpKernel { dev_ctx.x_context(), y_d, bias_d, y_d, x_shape, b_shape); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_add XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -140,28 +140,28 @@ class AffineChannelGradXPUKernel : public framework::OpKernel { dev_ctx.x_context(), dy_d, dbias_d, x_shape, rdims); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The reduce_sum XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); T* tmp = RAII_GUARD.alloc_l3_or_gm(dy->numel()); PADDLE_ENFORCE_NOT_NULL( - tmp, platform::errors::External("XPU has no enough memory")); + tmp, phi::errors::External("XPU has no enough memory")); r = xpu::mul( dev_ctx.x_context(), dy_d, x->data(), tmp, dy->numel()); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("The mul XPU OP return wrong value[%d %s]", - r, - XPUAPIErrorMsg[r])); + phi::errors::External("The mul XPU OP return wrong value[%d %s]", + r, + XPUAPIErrorMsg[r])); r = xpu::reduce_sum( dev_ctx.x_context(), tmp, dscale_d, x_shape, rdims); PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The reduce_sum XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); @@ -172,7 +172,7 @@ class AffineChannelGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "The broadcast_mul XPU OP return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); diff --git a/paddle/fluid/operators/amp/CMakeLists.txt b/paddle/fluid/operators/amp/CMakeLists.txt deleted file mode 100644 index cbd9c8b2768b4..0000000000000 --- a/paddle/fluid/operators/amp/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(operators) -if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/amp. - include(unity_build_rule.cmake) -endif() -register_operators() diff --git a/paddle/fluid/operators/amp/alloc_float_status_op.cc b/paddle/fluid/operators/amp/alloc_float_status_op.cc deleted file mode 100644 index 2c1b4b201e5c3..0000000000000 --- a/paddle/fluid/operators/amp/alloc_float_status_op.cc +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class AllocFloatStatusOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasOutput("FloatStatus"), - "Output", - "FloatStatus", - "alloc_float_status"); - ctx->SetOutputDim("FloatStatus", {8}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class AllocFloatStatusMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("FloatStatus", - "(Tensor) of shape {8} that holds the float status."); - AddComment(R"DOC( - Produces a float Tensor that holds the float status -)DOC"); - } -}; - -template -class AllocFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Operator alloc_float_status is not supported on CPU")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -using CPU = phi::CPUContext; - -REGISTER_OPERATOR( - alloc_float_status, - ops::AllocFloatStatusOp, - ops::AllocFloatStatusMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - alloc_float_status, CPU, ALL_LAYOUT, ops::AllocFloatStatusKernel, float) {} diff --git a/paddle/fluid/operators/amp/clear_float_status_op.cc b/paddle/fluid/operators/amp/clear_float_status_op.cc deleted file mode 100644 index d595a26e5575a..0000000000000 --- a/paddle/fluid/operators/amp/clear_float_status_op.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class ClearFloatStatusOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasOutput("FloatStatusOut"), - "Output", - "FloatStatusOut", - "clear_float_status"); - ctx->SetOutputDim("FloatStatusOut", ctx->GetInputDim("FloatStatus")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class ClearFloatStatusMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("FloatStatus", - "(Tensor) of shape {8} that holds the float status."); - AddOutput( - "FloatStatusOut", - "(Tensor) of shape {8} that holds the float status, which is cleared."); - AddComment(R"DOC( - Clear the float status -)DOC"); - } -}; - -template -class ClearFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Operator clear_float_status is not supported on CPU")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - clear_float_status, - ops::ClearFloatStatusOp, - ops::ClearFloatStatusMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - clear_float_status, CPU, ALL_LAYOUT, ops::ClearFloatStatusKernel, float) {} diff --git a/paddle/fluid/operators/amp/get_float_status_op.cc b/paddle/fluid/operators/amp/get_float_status_op.cc deleted file mode 100644 index 8700d82976f01..0000000000000 --- a/paddle/fluid/operators/amp/get_float_status_op.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class GetFloatStatusOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasOutput("FloatStatusOut"), - "Output", - "FloatStatusOut", - "get_float_status"); - ctx->SetOutputDim("FloatStatusOut", ctx->GetInputDim("FloatStatus")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class GetFloatStatusMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("FloatStatus", - "(Tensor) of shape {8} that holds the float status."); - AddOutput("FloatStatusOut", - "(Tensor) of shape {8} that holds the get float status."); - AddComment(R"DOC( - Get the float status -)DOC"); - } -}; - -template -class GetFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Operator get_float_status is not supported on CPU")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -using CPU = phi::CPUContext; - -REGISTER_OPERATOR( - get_float_status, - ops::GetFloatStatusOp, - ops::GetFloatStatusMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - get_float_status, CPU, ALL_LAYOUT, ops::GetFloatStatusKernel, float) {} diff --git a/paddle/fluid/operators/amp/unity_build_rule.cmake b/paddle/fluid/operators/amp/unity_build_rule.cmake deleted file mode 100644 index fa460e33c8068..0000000000000 --- a/paddle/fluid/operators/amp/unity_build_rule.cmake +++ /dev/null @@ -1,10 +0,0 @@ -# This file records the Unity Build compilation rules. -# The source files in a `register_unity_group` called are compiled in a unity -# file. -# Generally, the combination rules in this file do not need to be modified. -# If there are some redefined error in compiling with the source file which -# in combination rule, you can remove the source file from the following rules. -register_unity_group(cc check_finite_and_unscale_op.cc - update_loss_scaling_op.cc) -register_unity_group(cu check_finite_and_unscale_op.cu - update_loss_scaling_op.cu) diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h index c7b8ce3f381d1..3d8a08b6725f1 100644 --- a/paddle/fluid/operators/array_operator.h +++ b/paddle/fluid/operators/array_operator.h @@ -34,16 +34,15 @@ class ArrayOp : public framework::OperatorBase { size_t GetOffset(const framework::Scope &scope, const platform::Place &place) const { auto *i = scope.FindVar(Input("I")); - PADDLE_ENFORCE_NOT_NULL( - i, platform::errors::NotFound("Input(I) is not found.")); + PADDLE_ENFORCE_NOT_NULL(i, phi::errors::NotFound("Input(I) is not found.")); auto &i_tensor = i->Get(); - PADDLE_ENFORCE_EQ(i_tensor.numel(), - 1, - platform::errors::InvalidArgument( - "Input(I) must have numel 1. " - "But received %d, and it's shape is [%s].", - i_tensor.numel(), - i_tensor.dims())); + PADDLE_ENFORCE_EQ( + i_tensor.numel(), + 1, + phi::errors::InvalidArgument("Input(I) must have numel 1. " + "But received %d, and it's shape is [%s].", + i_tensor.numel(), + i_tensor.dims())); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index 275dc6a99d63e..fae4ecbf9eb2b 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -59,7 +59,7 @@ struct ArrayToLoDFunctor { Apply(static_cast(pool.Get(place))); #else PADDLE_THROW( - platform::errors::Unavailable("Paddle is not compiled with CUDA.")); + phi::errors::Unavailable("Paddle is not compiled with CUDA.")); #endif } } @@ -101,7 +101,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { // dim PADDLE_ENFORCE_EQ(x.empty(), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "There's no element in the input array.")); int rank = x[0].dims().size(); platform::Place place = x[0].place(); @@ -116,7 +116,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ins_i_dims, ins_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current dimension is %d, and the previous dimension is %d.", @@ -126,7 +126,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( x[i].place(), place, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The place class of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current place is %d, and the previous place is %d.", @@ -136,7 +136,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( x[i].dtype(), data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The date type of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current data type is %d, and the previous data type is %d.", @@ -172,7 +172,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { cur_level_lod.push_back(cur_level_lod.back() + table_items[idx].length); PADDLE_ENFORCE_LE(table_items[idx].length, x.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The RankTable items length should less than or " "equal to Input(X) size," "but receive TankTable items length is %d , longer " @@ -194,7 +194,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( end_offset, start_offset, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The lod data start offset should smaller or equal to the end " "offset," "but the start offset is %d, larger than end offset %d.", @@ -243,11 +243,11 @@ class ArrayToLoDTensorInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("X"), true, - platform::errors::NotFound("Input(X) of BmmOp should not be null.")); - PADDLE_ENFORCE_EQ(context->HasInput("RankTable"), - true, - platform::errors::NotFound( - "Input(RankTable) of BmmOp should not be null.")); + phi::errors::NotFound("Input(X) of BmmOp should not be null.")); + PADDLE_ENFORCE_EQ( + context->HasInput("RankTable"), + true, + phi::errors::NotFound("Input(RankTable) of BmmOp should not be null.")); // For compile-time, the first dim of input X and output Out should be -1. // For runtime, the first dim of output Out should be the sum of all // elements's first dim in input X. The output's dims will be re-computed in diff --git a/paddle/fluid/operators/assert_op.cc b/paddle/fluid/operators/assert_op.cc index 4ab60914908da..5a9fb09d44807 100644 --- a/paddle/fluid/operators/assert_op.cc +++ b/paddle/fluid/operators/assert_op.cc @@ -56,14 +56,14 @@ class AssertOp : public framework::OperatorBase { const platform::Place &dev_place) const override { const framework::Variable *cond_var_ptr = scope.FindVar(Input(kCond.data())); - PADDLE_ENFORCE_NOT_NULL(cond_var_ptr, - platform::errors::NotFound( - "Input(Condition) of AssertOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + cond_var_ptr, + phi::errors::NotFound("Input(Condition) of AssertOp is not found.")); const phi::DenseTensor &cond = cond_var_ptr->Get(); PADDLE_ENFORCE_EQ( cond.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Condition) of AssertOp must be 1. But now " "the Condition's shape is %s.", cond.dims().to_str())); @@ -83,7 +83,7 @@ class AssertOp : public framework::OperatorBase { formatter.Print(x_tensor, name); } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The condition variable '%s' of AssertOp must be " "true, but received false", Input(kCond.data()))); diff --git a/paddle/fluid/operators/assign_op.h b/paddle/fluid/operators/assign_op.h index 6efc621120929..36cee420f0c36 100644 --- a/paddle/fluid/operators/assign_op.h +++ b/paddle/fluid/operators/assign_op.h @@ -64,7 +64,7 @@ class AssignFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for assign op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/assign_pos_op.cc b/paddle/fluid/operators/assign_pos_op.cc index 66c453885e4a9..7def3a0cac503 100644 --- a/paddle/fluid/operators/assign_pos_op.cc +++ b/paddle/fluid/operators/assign_pos_op.cc @@ -41,11 +41,11 @@ class AssignPosOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(cum_count_dtype, X_dtype, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the cum_count and X should be same")); PADDLE_ENFORCE_EQ(cum_count_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the cum_count_dtype, eff_num_len and " "X should be same as int64")); return phi::KernelKey(cum_count_dtype, ctx.device_context().GetPlace()); diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index 5ba8b9367e64e..d147575773c06 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -119,7 +119,7 @@ class AssignValueKernel : public framework::OpKernel { value_name = "int8_values"; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported data type(code %d) for AssignValue operator, only " "supports bool, int32, float32, float64, int8 and int64.", dtype)); diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 6a0775e6331a7..3a5b50a7906e5 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -54,7 +54,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { const int M = static_cast(x_dims[1]); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(X)'s dimension is 2. But received %d.", x_dims.size())); @@ -63,39 +63,39 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(LSTMWeight)'s dimension is 2.But received %d.", w_dims.size())); PADDLE_ENFORCE_EQ( w_dims[0], D + M, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LSTMWeight dims should be (%d + %d) * %d.", D, M, 4 * D)); auto b_dims = ctx->GetInputDim("LSTMBias"); PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument("Input(LSTMBias)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(b_dims[0], - 1, - platform::errors::InvalidArgument( - "LSTMBias dims should be 1 x %d.", 4 * D)); - PADDLE_ENFORCE_EQ(b_dims[1], - 4 * D, - platform::errors::InvalidArgument( - "LSTMBias dims should be 1 x %d.", 4 * D)); + phi::errors::InvalidArgument("Input(LSTMBias)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + b_dims[0], + 1, + phi::errors::InvalidArgument("LSTMBias dims should be 1 x %d.", 4 * D)); + PADDLE_ENFORCE_EQ( + b_dims[1], + 4 * D, + phi::errors::InvalidArgument("LSTMBias dims should be 1 x %d.", 4 * D)); auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ( c_dims.size(), 2, - platform::errors::InvalidArgument("Input(C0)'s rank must be 2.")); + phi::errors::InvalidArgument("Input(C0)'s rank must be 2.")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( c_dims[1], D, - platform::errors::InvalidArgument("C0 dims should be N x %d.", D)); + phi::errors::InvalidArgument("C0 dims should be N x %d.", D)); } if (ctx->HasInput("H0")) { @@ -103,27 +103,27 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( h_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected input(H0)'s dimension is 2. But received %d.", h_dims.size())); if (ctx->IsRuntime() || (common::product(c_dims) > 0 && common::product(h_dims) > 0)) { PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) " "should be the same.")); } } auto atten_w_dims = ctx->GetInputDim("AttentionWeight"); - PADDLE_ENFORCE_EQ(atten_w_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(AttentionWeight)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + atten_w_dims.size(), + 2, + phi::errors::InvalidArgument("Input(AttentionWeight)'s rank must be 2.")); PADDLE_ENFORCE_EQ(atten_w_dims[0], M + D, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected `AttentionWeight` shape is [(%d + %d), 1]. " "But received shape = [%d, 1], shape[0] is not %d.", M, @@ -132,39 +132,39 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { M + D)); PADDLE_ENFORCE_EQ(atten_w_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionWeight shapes must be (%d + %d) * 1.", M, D)); if (ctx->HasInput("AttentionBias")) { auto atten_b_dims = ctx->GetInputDim("AttentionBias"); - PADDLE_ENFORCE_EQ(atten_b_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(AttentionBias)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(atten_b_dims[0], - 1, - platform::errors::InvalidArgument( - "AttentionBias shapes must be 1 * 1.")); - PADDLE_ENFORCE_EQ(atten_b_dims[1], - 1, - platform::errors::InvalidArgument( - "AttentionBias shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + atten_b_dims.size(), + 2, + phi::errors::InvalidArgument("Input(AttentionBias)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + atten_b_dims[0], + 1, + phi::errors::InvalidArgument("AttentionBias shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + atten_b_dims[1], + 1, + phi::errors::InvalidArgument("AttentionBias shapes must be 1 * 1.")); } if (ctx->HasInput("AttentionScalar")) { auto dims = ctx->GetInputDim("AttentionScalar"); PADDLE_ENFORCE_EQ(dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(AttentionScalar)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(dims[0], - 1, - platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); - PADDLE_ENFORCE_EQ(dims[1], - 1, - platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + dims[0], + 1, + phi::errors::InvalidArgument("AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ( + dims[1], + 1, + phi::errors::InvalidArgument("AttentionScalar shapes must be 1 * 1.")); } if (ctx->HasInput("AttentionScalarBias")) { @@ -175,15 +175,15 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionLstm"); PADDLE_ENFORCE_EQ(dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(AttentionScalarBias)'s rank must be 2.")); PADDLE_ENFORCE_EQ(dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionScalarBias shapes must be 1 * 1.")); PADDLE_ENFORCE_EQ(dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "AttentionScalarBias shapes must be 1 * 1.")); } @@ -381,11 +381,11 @@ class AttentionLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_lod.size(), 1UL, - platform::errors::InvalidArgument("Input(X)'s lod size must be 1.")); + phi::errors::InvalidArgument("Input(X)'s lod size must be 1.")); PADDLE_ENFORCE_EQ( c0->dims()[0], N, - platform::errors::InvalidArgument("C0 dims should be %d x %d.", N, D)); + phi::errors::InvalidArgument("C0 dims should be %d x %d.", N, D)); fc_out->Resize({max_seq_len, 1}); std::function act_gate, act_cell, act_cand; diff --git a/paddle/fluid/operators/batch_fc_op.cc b/paddle/fluid/operators/batch_fc_op.cc index 706cb17e40f34..2eea44e05b057 100644 --- a/paddle/fluid/operators/batch_fc_op.cc +++ b/paddle/fluid/operators/batch_fc_op.cc @@ -27,49 +27,49 @@ class BatchFCOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Input"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "X(Input) of Batch Fully Connected should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Out(Output) of Batch Fully Connected should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "W(Input) of Batch Fully Connected should not be null.")); auto input_dims = ctx->GetInputDim("Input"); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(input_dims.size(), - 3, - platform::errors::InvalidArgument( - "Input of BatchFCOp should have 3D.")); + PADDLE_ENFORCE_EQ( + input_dims.size(), + 3, + phi::errors::InvalidArgument("Input of BatchFCOp should have 3D.")); PADDLE_ENFORCE_EQ( w_dims.size(), 3, - platform::errors::InvalidArgument("W of BatchFCOp should have 3D.")); + phi::errors::InvalidArgument("W of BatchFCOp should have 3D.")); PADDLE_ENFORCE_EQ( input_dims[0], w_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input.dim[0] and W.dim[0] of BatchFCOp should be same.")); PADDLE_ENFORCE_EQ( input_dims[2], w_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input.dim[2] and W.dim[1] of BatchFCOp should be same.")); auto bias_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(bias_dims[0], input_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Bias.dim[0] should be same as input.dim[0].")); PADDLE_ENFORCE_EQ(bias_dims[1], w_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Bias.dim[1] should be same as input.dim[2].")); ctx->SetOutputDim("Out", {input_dims[0], input_dims[1], w_dims[2]}); @@ -89,14 +89,13 @@ class BatchFCGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("Input"), - true, - platform::errors::InvalidArgument("Input should not be null")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), + true, + phi::errors::InvalidArgument("Input should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument("Input(W) should not be null")); + phi::errors::InvalidArgument("Input(W) should not be null")); ctx->SetOutputDim(framework::GradVarName("Input"), ctx->GetInputDim("Input")); diff --git a/paddle/fluid/operators/batch_fc_op.h b/paddle/fluid/operators/batch_fc_op.h index ca8c22243dbe4..5db142d5da6ba 100644 --- a/paddle/fluid/operators/batch_fc_op.h +++ b/paddle/fluid/operators/batch_fc_op.h @@ -26,7 +26,7 @@ class BatchFCKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::Unimplemented("BatchFC only supports GPU now.")); + phi::errors::Unimplemented("BatchFC only supports GPU now.")); } }; } // namespace operators diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 996c6af070631..31a21f2138e6f 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h" @@ -57,12 +57,12 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean and MeanOut should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variance and VarianceOut should share the same memory")); const auto x_dims = ctx->GetInputDim("X"); @@ -71,7 +71,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( (x_dims[i] == -1) || (x_dims[i] > 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension of input tensor is expected to be -1 or a " "positive number, but received %d. Input's shape is [%s].", x_dims[i], @@ -85,7 +85,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { auto mom = ctx->Inputs("MomentumTensor"); PADDLE_ENFORCE_EQ(mom.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor MomentumTensor's size must be 1" "But received: MomentumTensor's size is [%d]", mom.size())); @@ -94,7 +94,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of input " "X must greater than or equal to 2. But received: the shape of input " "X = [%s], the dimension of input X =[%d]", @@ -103,7 +103,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of input X " "must smaller than or equal to 5. But received: the shape of input X " "= [%s], the dimension of input X = [%d]", @@ -121,7 +121,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -134,7 +134,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( bias_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -153,14 +153,14 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { if (check) { PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, ctx->GetInputDim("Scale")[0])); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -191,29 +191,29 @@ phi::KernelKey BatchNormOp::GetExpectedKernelType( bn_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); } PADDLE_ENFORCE_EQ( bn_param_type, framework::TransToProtoVarType( ctx.Input("Mean")->dtype()), - platform::errors::InvalidArgument("Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Variance")->dtype()), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + phi::errors::InvalidArgument("Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), + phi::errors::InvalidArgument("Variance input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -254,11 +254,11 @@ void BatchNormOpMaker::Make() { PADDLE_ENFORCE_GE( epsilon, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be greater or equal than 0.0.")); PADDLE_ENFORCE_LE(epsilon, 0.001f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be less or equal than 0.001.")); }); AddAttr("data_layout", "").SetDefault("NCHW"); @@ -349,7 +349,7 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Scale@GRAD) and Output(Bias@GRAD) must be null " "or not be null at same time. But now, " "has Scale@Grad=[%d], has Bias@GRAD=[%d]", @@ -361,7 +361,7 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( !ctx->Attrs().Get("use_mkldnn"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Using global stats during training is not supported " "in oneDNN version of batch_norm_gradient kernel now.")); } @@ -391,7 +391,7 @@ phi::KernelKey BatchNormGradOp::GetExpectedKernelType( const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("can't find gradient variable of Y")); + phi::errors::InvalidArgument("can't find gradient variable of Y")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -399,7 +399,7 @@ phi::KernelKey BatchNormGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("gradient variable of Y is empty")); + phi::errors::InvalidArgument("gradient variable of Y is empty")); } auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); @@ -532,8 +532,7 @@ phi::KernelKey BatchNormDoubleGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar("DY"); if (var == nullptr) { - PADDLE_THROW( - platform::errors::NotFound("cannot find gradient variable of Y")); + PADDLE_THROW(phi::errors::NotFound("cannot find gradient variable of Y")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -541,7 +540,7 @@ phi::KernelKey BatchNormDoubleGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::InvalidArgument("gradient variable of Y is empty")); + phi::errors::InvalidArgument("gradient variable of Y is empty")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index cc013bd0b406e..9d48d7858f41a 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -26,8 +26,8 @@ namespace cub = hipcub; #include "paddle/common/flags.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/math_function.h" COMMON_DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index ec3ced614bd92..50a69e6390302 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -114,6 +114,6 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeOpKernel, float, double, - paddle::platform::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/beam_search_decode_op.cu.cc b/paddle/fluid/operators/beam_search_decode_op.cu.cc index bab5423c99b05..beeb13725c6b1 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cu.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cu.cc @@ -23,6 +23,6 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeOpKernel, float, double, - paddle::platform::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h index 7347e228780b4..99735e98276e6 100644 --- a/paddle/fluid/operators/beam_search_decode_op.h +++ b/paddle/fluid/operators/beam_search_decode_op.h @@ -85,7 +85,7 @@ struct BeamSearchDecodeFunctor { template void apply_mix() const { if (std::is_same::value) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "beam search decode op does not support bool!")); } else { @@ -125,7 +125,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( step_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "beam search steps, which is the" "size of Input(Ids) LoDTensorArray. beam search steps should " "be larger than 0, but received %d. ", @@ -134,7 +134,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( source_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "source_num is the sequence number of the" "first decoding step, indicating by Input(Ids)[0].lod[0].size. " "The number of source_num should be larger than" @@ -145,7 +145,7 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ids->at(i).lod().size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For the i step in beam search steps," "the size of Input(Ids)[i].lod() should larger than 2," "but received %d. ", diff --git a/paddle/fluid/operators/beam_search_decode_op_def.h b/paddle/fluid/operators/beam_search_decode_op_def.h index d358d8255fcf3..ff16e093e0bf5 100644 --- a/paddle/fluid/operators/beam_search_decode_op_def.h +++ b/paddle/fluid/operators/beam_search_decode_op_def.h @@ -90,7 +90,7 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( PADDLE_ENFORCE_NE( src_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "src_num is the sequence number of the first decoding step" ", indicating by Input(Ids)[0].lod[0].size." "src_num has wrong value." @@ -162,12 +162,12 @@ void BeamSearchDecoder::Backtrace(const LoDTensorArray& step_ids, PADDLE_ENFORCE_NE( step_ids.empty(), true, - platform::errors::InvalidArgument("Input(Ids) should not be empty." - "But the Input(Ids) is empty.")); + phi::errors::InvalidArgument("Input(Ids) should not be empty." + "But the Input(Ids) is empty.")); PADDLE_ENFORCE_EQ( step_ids.size(), step_scores.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Input(Ids) and Input(Scores) should be " "the same. But the size of Input(Ids) and Input(Scores) " "are not equal.")); diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.cc b/paddle/fluid/operators/beam_search_decode_op_xpu.cc index 5fd2b2fc6fa35..c438070ce07f9 100644 --- a/paddle/fluid/operators/beam_search_decode_op_xpu.cc +++ b/paddle/fluid/operators/beam_search_decode_op_xpu.cc @@ -30,7 +30,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( step_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "beam search steps, which is the" "size of Input(Ids) LoDTensorArray. beam search steps should " "be larger than 0, but received %d. ", @@ -40,7 +40,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( source_num, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "source_num is the sequence number of the" "first decoding step, indicating by Input(Ids)[0].lod[0].size. " "The number of source_num should be larger than" @@ -51,7 +51,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ids->at(i).lod().size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For the i step in beam search steps," "the size of Input(Ids)[i].lod() should larger than 2," "but received %d. ", @@ -91,7 +91,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); r = CopyTensorByType( @@ -99,7 +99,7 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); sentenceIds_temp->set_lod(sentenceIds->lod()); sentenceScores_temp->set_lod(sentenceScores->lod()); @@ -119,7 +119,7 @@ PD_REGISTER_STRUCT_KERNEL(beam_search_decode, ops::BeamSearchDecodeXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t) {} #endif diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.h b/paddle/fluid/operators/beam_search_decode_op_xpu.h index 5e63627c6f88c..863b92e9f2b7d 100644 --- a/paddle/fluid/operators/beam_search_decode_op_xpu.h +++ b/paddle/fluid/operators/beam_search_decode_op_xpu.h @@ -45,7 +45,7 @@ int CopyTensorByXPU(const phi::DenseTensor& srcTensor, PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("Execute function SetMeta failed by [%d]", r)); + phi::errors::External("Execute function SetMeta failed by [%d]", r)); if (flag == 0) { T* dstData = @@ -75,8 +75,7 @@ const int CopyTensorByType(const phi::DenseTensor& srcTensor, if (srcTensor.dtype() == phi::DataType::FLOAT32) r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::FLOAT16) - r = CopyTensorByXPU( - srcTensor, dstTensor, flag, place); + r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::FLOAT64) r = CopyTensorByXPU(srcTensor, dstTensor, flag, place); else if (srcTensor.dtype() == phi::DataType::INT32) @@ -88,7 +87,7 @@ const int CopyTensorByType(const phi::DenseTensor& srcTensor, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); return xpu::Error_t::SUCCESS; @@ -117,7 +116,7 @@ struct BeamSearchDecodeXPUFunctor { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByXPU failed by [%d]", r)); } @@ -135,7 +134,7 @@ struct BeamSearchDecodeXPUFunctor { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External( + phi::errors::External( "Execute function CopyTensorByType failed by [%d]", r)); } @@ -148,7 +147,7 @@ struct BeamSearchDecodeXPUFunctor { template void apply_xpu() const { if (std::is_same::value) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "beam search decode op does not support bool!")); } else { BeamSearchDecoder beam_search_decoder(beam_size_, end_id_); diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index fea706bb54a93..0beeb0cc407fe 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -29,16 +29,15 @@ class BeamSearchOpKernel : public framework::OpKernel { auto* pre_ids = context.Input("pre_ids"); auto* pre_scores = context.Input("pre_scores"); - PADDLE_ENFORCE_NOT_NULL(scores, - platform::errors::NotFound( - "Input(scores) of BeamSearchOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + scores, + phi::errors::NotFound("Input(scores) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( pre_ids, - platform::errors::NotFound( - "Input(pre_ids) of BeamSearchOp is not found.")); + phi::errors::NotFound("Input(pre_ids) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( pre_scores, - platform::errors::NotFound( + phi::errors::NotFound( "Input(pre_scores) of BeamSearchOp is not found.")); size_t level = context.Attr("level"); @@ -51,11 +50,11 @@ class BeamSearchOpKernel : public framework::OpKernel { auto* parent_idx = context.Output("parent_idx"); PADDLE_ENFORCE_NOT_NULL( selected_ids, - platform::errors::NotFound( + phi::errors::NotFound( "Output(selected_ids) of BeamSearchOp is not found.")); PADDLE_ENFORCE_NOT_NULL( selected_scores, - platform::errors::NotFound( + phi::errors::NotFound( "Output(selected_scores) of BeamSearchOp is not found.")); math::BeamSearchFunctor alg; diff --git a/paddle/fluid/operators/bernoulli_op.h b/paddle/fluid/operators/bernoulli_op.h index ffa2722ccbb60..f5ca225a49d26 100644 --- a/paddle/fluid/operators/bernoulli_op.h +++ b/paddle/fluid/operators/bernoulli_op.h @@ -25,14 +25,14 @@ namespace operators { template inline HOSTDEVICE T BernoulliFunctor(T p, T rand) { - PADDLE_ENFORCE_LE(p, - 1.0, - platform::errors::OutOfRange( - "The probability should be <= 1, but got %f", p)); - PADDLE_ENFORCE_GE(p, - 0.0, - platform::errors::OutOfRange( - "The probability should be >= 0, but got %f", p)); + PADDLE_ENFORCE_LE( + p, + 1.0, + phi::errors::OutOfRange("The probability should be <= 1, but got %f", p)); + PADDLE_ENFORCE_GE( + p, + 0.0, + phi::errors::OutOfRange("The probability should be >= 0, but got %f", p)); return static_cast(rand < p); } diff --git a/paddle/fluid/operators/bilateral_slice_op.cc b/paddle/fluid/operators/bilateral_slice_op.cc index 111f128fc3cc6..1b4624e3594f7 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cc +++ b/paddle/fluid/operators/bilateral_slice_op.cc @@ -37,7 +37,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim_x.size(), 4, - platform::errors::Unimplemented( + phi::errors::Unimplemented( "Input(X) dimension must be 4, but got dimension = %d .", dim_x.size())); @@ -58,7 +58,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { if (has_offset) { PADDLE_ENFORCE_EQ((coeffs_chans % (input_chans + 1)), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Slicing with affine offset, coefficients grid " "should have n_out*(n_in+1) channels, but got %d", coeffs_chans)); @@ -67,7 +67,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (coeffs_chans % input_chans), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Slicing without affine offset, coefficients grid " "should have n_out*n_in channels, but got %d .", coeffs_chans)); @@ -179,10 +179,10 @@ template class BilateralSliceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::Unimplemented( - "BilateralSlice only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::Unimplemented("BilateralSlice only supports GPU now.")); } }; diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc deleted file mode 100644 index c628bad0aa3c0..0000000000000 --- a/paddle/fluid/operators/bpr_loss_op.cc +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/bpr_loss_op.h" - -#include - -namespace paddle { -namespace operators { - -class BprLossOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BprLoss"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "BprLoss"); - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "BprLoss"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ( - rank, - label_dims.size(), - platform::errors::InvalidArgument( - "Input(X) and Input(Label) shall have the same rank.")); - - if (ctx->IsRuntime() || - (common::product(x_dims) > 0 && common::product(label_dims) > 0)) { - PADDLE_ENFORCE_EQ( - common::slice_ddim(x_dims, 0, rank - 1), - common::slice_ddim(label_dims, 0, rank - 1), - platform::errors::InvalidArgument( - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension.")); - } - - auto y_dims = x_dims; - y_dims[rank - 1] = 1; - ctx->SetOutputDim("Y", y_dims); - ctx->ShareLoD("X", /*->*/ "Y"); - } - - protected: - // Explicitly set that the data type of computation kernel of Seq-bpr - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -class BprLossGradientOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BprLossGradient"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "BprLossGradient"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - framework::GradVarName("Y"), - "BprLossGradient"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "BprLossGradient"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - int rank = x_dims.size(); - PADDLE_ENFORCE_EQ( - dy_dims.size(), - rank, - platform::errors::InvalidArgument( - "Input(Y@Grad) and Input(X) should have the same rank.")); - PADDLE_ENFORCE_EQ( - label_dims.size(), - rank, - platform::errors::InvalidArgument( - "Input(Label) and Input(X) should have the same rank.")); - PADDLE_ENFORCE_EQ(common::slice_ddim(x_dims, 0, rank - 1), - common::slice_ddim(label_dims, 0, rank - 1), - platform::errors::InvalidArgument( - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension.")); - PADDLE_ENFORCE_EQ(common::slice_ddim(x_dims, 0, rank - 1), - common::slice_ddim(dy_dims, 0, rank - 1), - platform::errors::InvalidArgument( - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension.")); - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], - 1, - platform::errors::InvalidArgument( - "The last dimension of Input(Y@Grad) should be 1.")); - PADDLE_ENFORCE_EQ(label_dims[rank - 1], - 1, - platform::errors::InvalidArgument( - " the last dimension of Input(Label) should be 1.")); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of cross_entropy - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -class BprLossOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), a tensor whose last dimension " - "size is equal to the number of classes. This input is a " - "real number."); - AddInput( - "Label", - "(Tensor), the tensor which represents the ground truth. It has the " - "same shape with 'X' except the last dimension. the last dimension " - "size is 1."); - AddOutput("Y", - "(Tensor, default Tensor), a tensor whose shape is same " - "with 'X' except that the last dimension size is 1. It " - "represents the sequence bpr loss."); - AddComment(R"DOC( -Bayesian Personalized Ranking Loss Operator. - -This operator belongs to pairwise ranking loss. Label is the desired item. -The loss at a given point in one session is defined as: -$Y[i] = -\frac{1}{N_{i}} * \sum_{j=0}^{N_{i}}\log(\sigma(X[i, Label[i]]-X[i, j]))$ - -Learn more details by reading paper (https://arxiv.org/abs/1511.06939) - -)DOC"); - } -}; - -template -class BprLossGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("bpr_loss_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Label", this->Input("Label")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(bpr_loss, - ops::BprLossOp, - ops::BprLossOpMaker, - ops::BprLossGradMaker, - ops::BprLossGradMaker); -REGISTER_OPERATOR(bpr_loss_grad, ops::BprLossGradientOp); - -PD_REGISTER_STRUCT_KERNEL( - bpr_loss, CPU, ALL_LAYOUT, ops::BprLossOpKernel, float, double) {} -PD_REGISTER_STRUCT_KERNEL(bpr_loss_grad, - CPU, - ALL_LAYOUT, - ops::BprLossGradientOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h deleted file mode 100644 index 1a1bddc17c2e5..0000000000000 --- a/paddle/fluid/operators/bpr_loss_op.h +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/for_range.h" -#include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -/*Todo: - *Find a way to adapt TolerableValue, using blas or eigen. - */ -template -struct TolerableValue { - HOSTDEVICE T operator()(const T& x) const { - const T kApproInf = 1e20; - if (x == INFINITY) return kApproInf; - if (x == -INFINITY) return -kApproInf; - return x; - } -}; - -template -class BprLossOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* y = ctx.Output("Y"); - y->mutable_data(ctx.GetPlace()); - int rank = x->dims().size(); - - phi::DenseTensor x_2d = phi::ReshapeToMatrix(*x, rank - 1); - phi::DenseTensor labels_2d = phi::ReshapeToMatrix(*label, rank - 1); - phi::DenseTensor y_2d = phi::ReshapeToMatrix(*y, rank - 1); - - const phi::DenseTensor* logits = &x_2d; - const phi::DenseTensor* labels = &labels_2d; - phi::DenseTensor* out = &y_2d; - - const int step_size = logits->dims()[0]; - const int class_num = logits->dims()[1]; - const T* logits_data = logits->data(); - T* loss_data = out->data(); - - const int64_t* label_data = labels->data(); - for (int i = 0; i < step_size; ++i) { - int lbl_pos = label_data[i]; - PADDLE_ENFORCE_GE(lbl_pos, - 0, - platform::errors::InvalidArgument( - "label data %d is illegal.", lbl_pos)); - PADDLE_ENFORCE_LT(lbl_pos, - class_num, - platform::errors::InvalidArgument( - "label data %d is illegal.", lbl_pos)); - int index_pos = i * class_num + lbl_pos; - T sum = static_cast(0); - for (int j = 0; j < class_num; j++) { - if (j == lbl_pos) continue; - int index_neg = i * class_num + j; - sum += TolerableValue()(-std::log( - 1.0f + TolerableValue()(std::exp(logits_data[index_neg] - - logits_data[index_pos])))); - } - loss_data[i] = -sum / (class_num - 1); - } - } -}; - -template -class BprLossGradientOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* label = ctx.Input("Label"); - auto* dx = ctx.Output(framework::GradVarName("X")); - - const size_t step_size = static_cast(x->dims()[0]); - const size_t num_classes = static_cast(x->dims()[1]); - T* dx_data = dx->mutable_data(ctx.GetPlace()); - const T* dy_data = dy->data(); - const T* x_data = x->data(); - const int64_t* label_data = label->data(); - - for (size_t sample_id = 0; sample_id < step_size; sample_id++) { - for (size_t x_offset = sample_id * num_classes; - x_offset < (sample_id + 1) * num_classes; - x_offset++) { - dx_data[x_offset] = static_cast(0); - } - auto p_index = sample_id * num_classes + label_data[sample_id]; - for (size_t ni = 0; ni < num_classes; ni++) { - if (label_data[sample_id] == static_cast(ni)) continue; - auto n_index = sample_id * num_classes + ni; - auto grad_ = -dy_data[sample_id] / - ((num_classes - 1) * - (1.0f + TolerableValue()(std::exp(x_data[p_index] - - x_data[n_index])))); - dx_data[p_index] += grad_; - dx_data[n_index] -= grad_; - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc index b83ebc6f899ef..6b587aba3dbf1 100644 --- a/paddle/fluid/operators/cast_op.cc +++ b/paddle/fluid/operators/cast_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" @@ -94,7 +94,7 @@ class CastOp : public framework::OperatorWithKernel { auto *tensor = ctx.Input("X"); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The tensor of Input(X) is not initialized.")); auto &tensor_place = tensor->place(); // NOTE: cuda pinned tensor need to copy its data to target place diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc deleted file mode 100644 index 5e95d0cdda3f8..0000000000000 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/chunk_eval_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class ChunkEvalOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Inference"), "Input", "Inference", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "chunk_eval"); - - OP_INOUT_CHECK( - ctx->HasOutput("Precision"), "Output", "Precision", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("Recall"), "Output", "Recall", "chunk_eval"); - OP_INOUT_CHECK( - ctx->HasOutput("F1-Score"), "Output", "F1-Score", "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumInferChunks"), - "Output", - "NumInferChunks", - "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumLabelChunks"), - "Output", - "NumLabelChunks", - "chunk_eval"); - OP_INOUT_CHECK(ctx->HasOutput("NumCorrectChunks"), - "Output", - "NumCorrectChunks", - "chunk_eval"); - - auto inference_dim = ctx->GetInputDim("Inference"); - auto label_dim = ctx->GetInputDim("Label"); - - PADDLE_ENFORCE_EQ( - inference_dim, - label_dim, - platform::errors::InvalidArgument( - "Input(Inference)'s shape must be the same as Input(Label)'s " - "shape, but received [%s] (Inference) vs [%s] (Label).", - inference_dim, - label_dim)); - - bool use_padding = ctx->HasInput("SeqLength"); - if (use_padding) { - PADDLE_ENFORCE_EQ( - (inference_dim.size() == 3 && inference_dim[2] == 1) || - inference_dim.size() == 2, - true, - platform::errors::InvalidArgument( - "when Input(SeqLength) is provided, Input(Inference) " - "should be of dim 3 (batch_size, bucket, 1) or dim 2 " - "(batch_size, bucket), but received [%s].", - inference_dim)); - auto seq_length_dim = ctx->GetInputDim("SeqLength"); - PADDLE_ENFORCE_LE(seq_length_dim.size(), - 2, - platform::errors::InvalidArgument( - "Input(SeqLength)'s rank should not be greater " - "than 2, but received %d.", - seq_length_dim.size())); - } - - ctx->SetOutputDim("Precision", {1}); - ctx->SetOutputDim("Recall", {1}); - ctx->SetOutputDim("F1-Score", {1}); - ctx->SetOutputDim("NumInferChunks", {1}); - ctx->SetOutputDim("NumLabelChunks", {1}); - ctx->SetOutputDim("NumCorrectChunks", {1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, - platform::CPUPlace()); - } -}; - -class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Inference", - "(Tensor, default: Tensor). " - "Predictions from the network."); - AddInput("Label", - "(Tensor, default: Tensor). The true tag sequences."); - AddInput("SeqLength", - "(Tensor, default: Tensor). The length of each sequence, " - "used when Inference and Label are Tensor type .") - .AsDispensable(); - AddOutput("Precision", - "(float). The evaluated precision (called positive predictive " - "value) of chunks on the given mini-batch."); - AddOutput("Recall", - "(float). The evaluated recall (true positive rate or " - "sensitivity) of chunks on the given mini-batch."); - AddOutput("F1-Score", - "(float). The evaluated F1-Score on the given mini-batch."); - AddOutput("NumInferChunks", - "(int64_t). The number of chunks in Inference on the given " - "mini-batch."); - AddOutput( - "NumLabelChunks", - "(int64_t). The number of chunks in Label on the given mini-batch."); - AddOutput( - "NumCorrectChunks", - "(int64_t). The number of chunks both in Inference and Label on the " - "given mini-batch."); - AddAttr("num_chunk_types", - "The number of chunk type. See the description for details."); - AddAttr("chunk_scheme", - "The labeling scheme indicating " - "how to encode the chunks. Must be IOB, IOE, IOBES or " - "plain. See the description" - "for details.") - .SetDefault("IOB"); - AddAttr>("excluded_chunk_types", - "A list including chunk type ids " - "indicating chunk types that are not counted. " - "See the description for details.") - .SetDefault(std::vector{}); - AddComment(R"DOC( -For some basics of chunking, please refer to -'Chunking with Support Vector Machines '. - -ChunkEvalOp computes the precision, recall, and F1-score of chunk detection, -and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. -Here is a NER example of labeling for these tagging schemes: - - Li Ming works at Agricultural Bank of China in Beijing. - IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC - IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC - IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC - IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC - -There are three chunk types(named entity types) including PER(person), ORG(organization) -and LOC(LOCATION), and we can see that the labels have the form -. - -Since the calculations actually use label ids rather than labels, extra attention -should be paid when mapping labels to ids to make CheckEvalOp work. The key point -is that the listed equations are satisfied by ids. - - tag_type = label % num_tag_type - chunk_type = label / num_tag_type - -where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` -is the num of chunk types, and `tag_type` get its value from the following table. - - Scheme Begin Inside End Single - plain 0 - - - - IOB 0 1 - - - IOE - 0 1 - - IOBES 0 1 2 3 - -Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG, -PER and LOC. To satisfy the above equations, the label map can be like this: - - B-ORG 0 - I-ORG 1 - B-PER 2 - I-PER 3 - B-LOC 4 - I-LOC 5 - O 6 - -It's not hard to verify the equations noting that the num of chunk types -is 3 and the num of tag types in IOB scheme is 2. For example, the label -id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of -I-LOC is 2, which consistent with the results from the equations. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(chunk_eval, - ops::ChunkEvalOp, - ops::ChunkEvalOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - chunk_eval, CPU, ALL_LAYOUT, ops::ChunkEvalKernel, float) {} diff --git a/paddle/fluid/operators/chunk_eval_op.h b/paddle/fluid/operators/chunk_eval_op.h deleted file mode 100644 index baad8719db37f..0000000000000 --- a/paddle/fluid/operators/chunk_eval_op.h +++ /dev/null @@ -1,358 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class ChunkEvalKernel : public framework::OpKernel { - public: - struct Segment { - int begin; - int end; - int type; - bool operator==(const Segment& y) const { - return begin == y.begin && end == y.end && type == y.type; - } - }; - - void GetSegments(const int64_t* label, - int length, - std::vector* segments, - int num_chunk_types, - int num_tag_types, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - segments->clear(); - segments->reserve(length); - int chunk_start = 0; - bool in_chunk = false; - int tag = -1; - int type = other_chunk_type; - for (int i = 0; i < length; ++i) { - int prev_tag = tag; - int prev_type = type; - PADDLE_ENFORCE_LE( - label[i], - num_chunk_types * num_tag_types, - platform::errors::InvalidArgument( - "The value of Input(Label) should be less than the number of " - "chunk types times the number of tag types, but received %d " - "(Label) vs %d (chunk types) * %d (tag types).", - label[i], - num_chunk_types, - num_tag_types)); - tag = label[i] % num_tag_types; - type = label[i] / num_tag_types; - if (in_chunk && ChunkEnd(prev_tag, - prev_type, - tag, - type, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single)) { - Segment segment{ - chunk_start, // begin - i - 1, // end - prev_type, - }; - segments->push_back(segment); - in_chunk = false; - } - if (ChunkBegin(prev_tag, - prev_type, - tag, - type, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single)) { - chunk_start = i; - in_chunk = true; - } - } - if (in_chunk) { - Segment segment{ - chunk_start, // begin - length - 1, // end - type, - }; - segments->push_back(segment); - } - } - - bool ChunkEnd(int prev_tag, - int prev_type, - int tag, - int type, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - if (prev_type == other_chunk_type) return false; - if (type == other_chunk_type) return true; - if (type != prev_type) return true; - if (prev_tag == tag_begin) return tag == tag_begin || tag == tag_single; - if (prev_tag == tag_inside) return tag == tag_begin || tag == tag_single; - if (prev_tag == tag_end) return true; - if (prev_tag == tag_single) return true; - return false; - } - - bool ChunkBegin(int prev_tag, - int prev_type, - int tag, - int type, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single) const { - if (prev_type == other_chunk_type) return type != other_chunk_type; - if (type == other_chunk_type) return false; - if (type != prev_type) return true; - if (tag == tag_begin) return true; - if (tag == tag_inside) return prev_tag == tag_end || prev_tag == tag_single; - if (tag == tag_end) return prev_tag == tag_end || prev_tag == tag_single; - if (tag == tag_single) return true; - return false; - } - - void Compute(const framework::ExecutionContext& context) const override { - // initialize to parse configurations - int num_chunk_types, num_tag_types; - int other_chunk_type; - int tag_begin, tag_inside, tag_end, tag_single; - std::vector label_segments; - std::vector output_segments; - std::set excluded_chunk_types; - - if (context.Attr("chunk_scheme") == "IOB") { - num_tag_types = 2; - tag_begin = 0; - tag_inside = 1; - tag_end = -1; - tag_single = -1; - } else if (context.Attr("chunk_scheme") == "IOE") { - num_tag_types = 2; - tag_begin = -1; - tag_inside = 0; - tag_end = 1; - tag_single = -1; - } else if (context.Attr("chunk_scheme") == "IOBES") { - num_tag_types = 4; - tag_begin = 0; - tag_inside = 1; - tag_end = 2; - tag_single = 3; - } else if (context.Attr("chunk_scheme") == "plain") { - num_tag_types = 1; - tag_begin = -1; - tag_inside = -1; - tag_end = -1; - tag_single = -1; - } else { - PADDLE_THROW(platform::errors::InvalidArgument("Unknown chunk scheme.")); - } - other_chunk_type = num_chunk_types = context.Attr("num_chunk_types"); - excluded_chunk_types.insert( - context.Attr>("excluded_chunk_types").begin(), - context.Attr>("excluded_chunk_types").end()); - - auto* inference = context.Input("Inference"); - auto place = inference->place(); - auto* label = context.Input("Label"); - auto* precision = context.Output("Precision"); - auto* recall = context.Output("Recall"); - auto* f1 = context.Output("F1-Score"); - auto* num_infer_chunks = context.Output("NumInferChunks"); - auto* num_label_chunks = context.Output("NumLabelChunks"); - auto* num_correct_chunks = - context.Output("NumCorrectChunks"); - - const int64_t* inference_data = inference->data(); - const int64_t* label_data = label->data(); - T* precision_data = precision->mutable_data(place); - T* recall_data = recall->mutable_data(place); - T* f1_data = f1->mutable_data(place); - int64_t* num_infer_chunks_data = - num_infer_chunks->mutable_data(place); - int64_t* num_label_chunks_data = - num_label_chunks->mutable_data(place); - int64_t* num_correct_chunks_data = - num_correct_chunks->mutable_data(place); - *num_infer_chunks_data = 0; - *num_label_chunks_data = 0; - *num_correct_chunks_data = 0; - - auto lod = label->lod(); - bool use_padding = lod.empty(); - int num_sequences = 0; - - if (use_padding) { - auto dim1 = inference->dims()[1]; - auto* seq_length_t = context.Input("SeqLength"); - auto* seq_length_data = seq_length_t->data(); - num_sequences = seq_length_t->dims()[0]; - - for (int i = 0; i < num_sequences; ++i) { - int seq_length = seq_length_data[i]; - EvalOneSeq(inference_data + i * dim1, - label_data + i * dim1, - seq_length, - &output_segments, - &label_segments, - num_infer_chunks_data, - num_label_chunks_data, - num_correct_chunks_data, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single, - excluded_chunk_types); - } - } else { - PADDLE_ENFORCE_EQ( - lod.size(), - 1UL, - platform::errors::InvalidArgument( - "Only support one level LoD sequence now, but received %d.", - lod.size())); - PADDLE_ENFORCE_EQ( - lod, - inference->lod(), - platform::errors::InvalidArgument( - "Input(Inference) and Input(Label) of Op(chunk_eval) should have " - "same LoD information.")); - num_sequences = lod[0].size() - 1; - - for (int i = 0; i < num_sequences; ++i) { - int seq_length = lod[0][i + 1] - lod[0][i]; - EvalOneSeq(inference_data + lod[0][i], - label_data + lod[0][i], - seq_length, - &output_segments, - &label_segments, - num_infer_chunks_data, - num_label_chunks_data, - num_correct_chunks_data, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single, - excluded_chunk_types); - } - } - - *precision_data = !(*num_infer_chunks_data) - ? 0 - : static_cast(*num_correct_chunks_data) / - (*num_infer_chunks_data); - *recall_data = !(*num_label_chunks_data) - ? 0 - : static_cast(*num_correct_chunks_data) / - (*num_label_chunks_data); - *f1_data = !(*num_correct_chunks_data) - ? 0 - : 2 * (*precision_data) * (*recall_data) / - ((*precision_data) + (*recall_data)); - } - - void EvalOneSeq(const int64_t* output, - const int64_t* label, - int length, - std::vector* output_segments, - std::vector* label_segments, - int64_t* num_output_segments, - int64_t* num_label_segments, - int64_t* num_correct, - int num_chunk_types, - int num_tag_types, - int other_chunk_type, - int tag_begin, - int tag_inside, - int tag_end, - int tag_single, - const std::set& excluded_chunk_types) const { - GetSegments(output, - length, - output_segments, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single); - GetSegments(label, - length, - label_segments, - num_chunk_types, - num_tag_types, - other_chunk_type, - tag_begin, - tag_inside, - tag_end, - tag_single); - size_t i = 0, j = 0; - while (i < output_segments->size() && j < label_segments->size()) { - if (output_segments->at(i) == label_segments->at(j) && - excluded_chunk_types.count(output_segments->at(i).type) != 1) { - ++(*num_correct); - } - if (output_segments->at(i).end < label_segments->at(j).end) { - ++i; - } else if (output_segments->at(i).end > label_segments->at(j).end) { - ++j; - } else { - ++i; - ++j; - } - } - for (auto& segment : (*label_segments)) { - if (excluded_chunk_types.count(segment.type) != 1) { - ++(*num_label_segments); - } - } - for (auto& segment : (*output_segments)) { - if (excluded_chunk_types.count(segment.type) != 1) { - ++(*num_output_segments); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc deleted file mode 100644 index 5d4e3ae331596..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/cinn/cinn_instruction_run_op.h" - -#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" -#include "paddle/fluid/operators/cinn/cinn_launch_context.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle::operators { - -class CinnInstructionRunOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - // The cinn-graph may hasn't input for CINN now support fill_constant, - // and its all inputs may generated by fill_constant instead of by fetch. - // OP_INOUT_CHECK(ctx->HasInputs(kX), "Input", kX, "CinnInstructionRun"); - OP_INOUT_CHECK( - ctx->HasOutputs(kOutputs), "Output", kOutputs, "CinnInstructionRun"); - const CinnCompiledObject& compiled_object = - CinnCompiler::GetInstance()->GetCompiledObject( - ctx->Attrs().Get(kCachedIndex)); - - details::CinnLaunchContext* launch_context = - compiled_object.launch_context.get(); - std::vector output_args = ctx->Outputs(kOutputs); - std::vector output_dims(output_args.size()); - std::transform(output_args.begin(), - output_args.end(), - output_dims.begin(), - [launch_context](const std::string& var_name) { - cinn_buffer_t* buffer = - launch_context->GetCinnBufferOfVar(var_name); - return framework::DDim(buffer->dims, buffer->dimensions); - }); - ctx->SetOutputsDim(kOutputs, output_dims); - } - - protected: - /* [Why use single type kernel]: - * - * Whether the kernel data type is int, float or other type, - * which has no effect on its execution logic, so directly - * specified a data type here. - * - */ - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput(kX, - "(vector)" - "which are the input arguments of this cinn instruction") - .AsDuplicable(); - AddOutput(kOutputs, - "(vector)" - "which are the output arguments of this cinn instruction") - .AsDuplicable(); - AddAttr( - kCachedIndex, - "(int64_t)" - "the stored index of the cached compilation result in CinnCompiler," - "which is used to fetch the CinnCompiledObject where this cinn " - "instruction is included"); - AddAttr( - kInstructionIndex, - "(int64_t)" - "the index of this instruction to the cinn runtime program"); - AddComment(R"DOC( -CinnInstructionRun Operator. - -This operator is used to launch a -CINN(https://github.com/PaddlePaddle/CINN/blob/develop/README.md) instruction execution - -Both the input and output of this operator are a set of variables -which are the input and output arguments of the bound cinn instruction respectively. -In addition, there is an attribute named 'cached_index' should be -set necessarily to get the CinnCompiledObject where the instruction is included -and 'instruction_index' is fetch the instruction object from complied runtime program. - -It accomplishes the execution of the instruction according to the following steps: - 0. Set the shapes ot the output variables at InferShape function with - compilation result. - 1. Fetch the cinn instruction bound to this operator by 'cached_index' - and 'instruction_index' from CinnCompiler. - 2. Prepare the input and output variables of the instruction in Paddle and share - their buffers to CINN by setting 'memory' of according cinn_buffer_t. - 3. Launch CINN runtime to execute the instruction. - -)DOC"); - } -}; - -} // namespace paddle::operators - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - cinn_instruction_run, - ops::CinnInstructionRunOp, - ops::CinnInstructionRunOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(cinn_instruction_run, - CPU, - ALL_LAYOUT, - ops::CinnInstructionRunOpKernel, - float) {} diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc deleted file mode 100644 index d5d236dbd6529..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cinn/cinn_instruction_run_op.h" - -#include "paddle/fluid/framework/op_registry.h" - -namespace ops = paddle::operators; -/* see [Why use single type kernel] */ -PD_REGISTER_STRUCT_KERNEL(cinn_instruction_run, - GPU, - ALL_LAYOUT, - ops::CinnInstructionRunOpKernel, - float) {} diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h deleted file mode 100644 index 3a7779ae83338..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "paddle/cinn/hlir/framework/graph_compiler.h" -#include "paddle/cinn/hlir/framework/instruction.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" -#include "paddle/fluid/framework/paddle2cinn/transform_type.h" -#include "paddle/fluid/operators/cinn/cinn_launch_context.h" -#include "paddle/fluid/operators/cinn/cinn_op_helper.h" - -namespace paddle::operators { - -using CinnInstruction = ::cinn::hlir::framework::Instruction; -using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject; -using CinnCompiler = framework::paddle2cinn::CinnCompiler; - -template -class CinnInstructionRunOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - // step 1: fetch the cinn instruction bound to this operator - auto cached_index = ctx.template Attr(kCachedIndex); - auto ins_index = ctx.template Attr(kInstructionIndex); - const CinnCompiledObject& compiled_object = - CinnCompiler::GetInstance()->GetCompiledObject(cached_index); - const std::vector>& instructions = - compiled_object.runtime_program->GetRunInstructions(); - PADDLE_ENFORCE_LT(ins_index, - instructions.size(), - platform::errors::InvalidArgument( - "Index(%ld) > instructions.size(%ld).", - ins_index, - instructions.size())); - auto&& instruction = instructions.at(ins_index); - - // step 2: prepare the input and output arguments of the instruction - details::CinnLaunchContext* launch_context = - compiled_object.launch_context.get(); - auto share_argument_buffer_fn = [launch_context, - &ctx](const std::string& var_name) { - cinn_buffer_t* buffer = launch_context->GetCinnBufferOfVar(var_name); - std::string revise_var_name = launch_context->RedirectVarName(var_name); - framework::Variable* var = ctx.scope().GetVar(revise_var_name); - auto* tensor = var->template GetMutable(); - buffer->memory = reinterpret_cast(tensor->mutable_data( - ctx.GetPlace(), - framework::paddle2cinn::TransToPaddleDataType(buffer->type))); - }; - std::vector in_args = ctx.InputNames(kX); - std::for_each(in_args.begin(), in_args.end(), share_argument_buffer_fn); - std::vector out_args = ctx.OutputNames(kOutputs); - std::for_each(out_args.begin(), out_args.end(), share_argument_buffer_fn); - - // step 3: launch CINN runtime to execute the instruction - // TODO(CtfGo): simplify format of arguments package as a vector in CINN - // and update this usage call - instruction->Run(&launch_context->FinalizeArguments(), - false, - details::GetStream(ctx)); - } -}; - -} // namespace paddle::operators diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 734987ce92235..aefc3f8111e54 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -179,7 +179,7 @@ void CinnLaunchContext::BuildVarNameMap( PADDLE_ENFORCE_EQ( res.second, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Cinn variable(%s) maps to more than one paddle variable(%s,%s)", x.second, res.first->second, @@ -198,7 +198,7 @@ void CinnLaunchContext::BuildVarNameMap( PADDLE_ENFORCE_EQ( paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Size of variables is not equal, paddle[%ld] vs cinn[%ld]", paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size())); @@ -236,7 +236,7 @@ CinnTensor CinnLaunchContext::GetCinnTensorOfVar(const std::string& var_name) { PADDLE_ENFORCE_EQ( IsVariableUsed(var_name), true, - platform::errors::NotFound("Variable(%s) not applied in CINN", var_name)); + phi::errors::NotFound("Variable(%s) not applied in CINN", var_name)); const auto& arg_name = paddle2cinn_varmap_.at(var_name); return cinn_scope_->GetTensor(arg_name); } @@ -276,7 +276,7 @@ void CinnLaunchContext::CheckTensorEquivalent( const std::string& var_name, const phi::DenseTensor& paddle_tensor) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); // check dimension auto cinn_tensor = GetCinnTensorOfVar(var_name); @@ -309,7 +309,7 @@ void CinnLaunchContext::CheckTensorEquivalent( framework::paddle2cinn::TransToPaddleDataType(cinn_tensor->type()); PADDLE_ENFORCE_EQ(paddle_tensor.dtype(), cinn_dtype, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Tensors' dtype in variable(%s) are not equivalent, " "paddle is = [%s], but cinn is = [%s].", var_name, @@ -345,7 +345,7 @@ void CinnLaunchContext::InitializeArguments() { void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); std::string revise_var_name = RedirectVarName(var_name); @@ -372,7 +372,7 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) { PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); std::string revise_var_name = RedirectVarName(var_name); @@ -458,7 +458,7 @@ std::unique_ptr CinnLaunchContext::BuildCompiledProgram( PADDLE_ENFORCE_NE( res, cinn2paddle_varmap_.end(), - platform::errors::NotFound("Argument(%s) not found", arg)); + phi::errors::NotFound("Argument(%s) not found", arg)); var_names.emplace_back(res->second); } } @@ -592,8 +592,8 @@ cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar( PADDLE_ENFORCE_NE( res, paddle2argument_.end(), - platform::errors::NotFound("Variable(%s) not found in compilation result", - var_name)); + phi::errors::NotFound("Variable(%s) not found in compilation result", + var_name)); return static_cast(res->second); } diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cc deleted file mode 100644 index 9edb7348b125c..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cc +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/cinn/cinn_launch_op.h" - -#include -#include - -#include "paddle/cinn/common/target.h" -#include "paddle/cinn/hlir/framework/graph_compiler.h" -#include "paddle/cinn/runtime/cinn_runtime.h" -#include "paddle/cinn/runtime/flags.h" -#include "paddle/common/flags.h" -#include "paddle/phi/core/generator.h" -#include "paddle/utils/string/string_helper.h" - -#if defined(PADDLE_WITH_CUDA) -COMMON_DECLARE_bool(cudnn_deterministic); -#endif - -namespace paddle { -namespace operators { - -namespace details { - -const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place) { - if (platform::is_cpu_place(place)) { - return ::cinn::common::DefaultHostTarget(); - } else if (platform::is_gpu_place(place)) { - return ::cinn::common::DefaultNVGPUTarget(); - } - - PADDLE_THROW(platform::errors::InvalidArgument( - "CINN is not supported on current place:%s", place)); - return ::cinn::common::UnkTarget(); -} - -void DebugCinnCompiledResult(const CinnCompiledObject& result) { - if (!VLOG_IS_ON(4)) { - return; - } - const auto& cinn_runtime_program = result.runtime_program; - const auto& cinn_scope = *(result.scope); - const auto& paddle2cinn_varmap = result.paddle2cinn_varmap; - - VLOG(4) << "Compiled runtime_program instruction size:[" - << cinn_runtime_program->size() << "]"; - - std::vector infos; - auto cinn_var_names = cinn_scope.var_names(); - infos.reserve(cinn_var_names.size()); - std::transform(cinn_var_names.begin(), - cinn_var_names.end(), - std::back_inserter(infos), - [](const auto& name_view) { return name_view.data(); }); - VLOG(4) << "Compiled scope variable names:[" - << string::join_strings(infos, ',') << "]"; - - infos.clear(); - infos.reserve(paddle2cinn_varmap.size()); - std::transform(paddle2cinn_varmap.begin(), - paddle2cinn_varmap.end(), - std::back_inserter(infos), - [](const auto& paddle2cinn) { - return paddle2cinn.first + "->" + paddle2cinn.second; - }); - VLOG(4) << "Compiled paddle2cinn_varmap:[" << string::join_strings(infos, ',') - << "]"; -} - -void LaunchCinnExecution(const CinnCompiledObject& compiled_obj, - const CinnLaunchContext& context, - void* stream) { - compiled_obj.runtime_program->Execute(&context.FinalizeArguments(), stream); -} - -void SetCinnRuntimeFlags() { -#if defined(PADDLE_WITH_CUDA) - VLOG(4) << "Set FLAGS_cinn_cudnn_deterministic to " - << FLAGS_cudnn_deterministic; - ::cinn::runtime::SetCinnCudnnDeterministic(FLAGS_cudnn_deterministic); -#endif -} - -template <> -void SetCinnRandomSeed() { - auto seed = phi::DefaultCPUGenerator()->GetCurrentSeed(); - ::cinn::runtime::RandomSeed::GetOrSet(seed); -} - -void SetCinnTarget(const ::cinn::common::Target& target) { - VLOG(4) << "Set CINN compile target to " << target; - ::cinn::runtime::CurrentTarget::SetCurrentTarget(target); -} - -} // namespace details - -class CinnLaunchOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - // The cinn-graph may hasn't input for CINN now support fill_constant, - // and its all inputs may generated by fill_constant instead of by fetch. - // OP_INOUT_CHECK(ctx->HasInputs(kX) || ctx->HasInputs(kNoNeedBufferX), - // "Input", string::format_string("%s|%s", kX, - // kNoNeedBufferX), - // "CinnLaunchOp"); - OP_INOUT_CHECK( - ctx->HasOutputs(kOutputs), "Output", kOutputs, "CinnLaunchOp"); - } - - protected: - /* [Why use single type kernel]: - * - * This op is similar to a control flow op, it doses not need - * a op kernel, but in order to make it execute under dynamic - * graph mode, implement it with op kernel. - * - * So whether the kernel data type is int, float or other type, - * which has no effect on its execution logic, so directly - * specified a data type here. - * - * Of course, the data type here is also not important. - */ - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput(kX, - "(vector)" - "which are the input of graph inside the CinnLaunchOp" - "excluding kNoNeedBufferX.") - .AsDuplicable(); - AddInput(kNoNeedBufferX, - "(vector)" - "which are the input of graph inside the CinnLaunchOp but" - "their buffer are not needed.") - .AsDuplicable() - .AsDispensable(); - AddOutput(kOutputs, - "(vector)" - "which are the output of graph inside the CinnLaunchOp.") - .AsDuplicable(); - AddAttr( - kCompilationKey, - "(string)" - "a hash key used to get the graph object or its computation result."); - AddComment(R"DOC( -CinnLaunch Operator. - -This operator is used to launch CINN(https://github.com/PaddlePaddle/CINN/blob/develop/README.md) -to compile a graph and execute the compiled object. - -Both input and output of this operator are a set of variables -which are input and output of the graph respectively that will be -compiled and executed in this operator. -In addition, there is an attribute named 'compilation_key' should be -set necessarily to get corresponding ir::Graph object of the graph -or its computation result. - -It accomplishes the computation of graph following several steps: - 1. Fetch ir::Graph object from CinnCompiler using kCompilationKey - 2. Compile the graph to a compiled object, and insert it to the - global cache so that we can directly query it from this cache next time - when shape of input variables are not changed at all. - 3. Create and instantiate all variables used to execute compiled runtime program - if necessary according to the info(type,shape) included in the return scope. - 4. Pack each tensor buffer of all above variables as execution arguments. - 5. Launch execution of the runtime program with above arguments, then - the result would be output by writing value on underlying buffer address. - -)DOC"); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(CinnLaunchOpNoBufVarsInferer, - kNoNeedBufferX); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - cinn_launch, - ops::CinnLaunchOp, - ops::CinnLaunchOpMaker, - ops::CinnLaunchOpNoBufVarsInferer, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -/* see [Why use single type kernel] */ -PD_REGISTER_STRUCT_KERNEL( - cinn_launch, CPU, ALL_LAYOUT, ops::CinnLaunchOpKernel, float) {} diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc deleted file mode 100644 index a7ff605dca9b9..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/cinn/cinn_launch_op.h" - -#include "paddle/cinn/runtime/flags.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/phi/core/generator.h" - -namespace paddle { -namespace operators { - -namespace details { - -template <> -void SetCinnRandomSeed() { - auto seed = phi::DefaultCUDAGenerator(0)->GetCurrentSeed(); - ::cinn::runtime::RandomSeed::GetOrSet(seed); -} - -} // namespace details -} // namespace operators -} // namespace paddle - -/* see [Why use single type kernel] */ -PD_REGISTER_STRUCT_KERNEL(cinn_launch, - GPU, - ALL_LAYOUT, - paddle::operators::CinnLaunchOpKernel, - float) {} diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h deleted file mode 100644 index 2ce23dc965b31..0000000000000 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/cinn/common/target.h" -#include "paddle/common/flags.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/new_executor/interpretercore.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" -#include "paddle/fluid/operators/cinn/cinn_launch_context.h" -#include "paddle/fluid/operators/cinn/cinn_op_helper.h" -#include "paddle/fluid/platform/profiler.h" -#include "paddle/pir/include/core/program.h" -#include "paddle/pir/include/core/value.h" - -COMMON_DECLARE_bool(enable_pe_launch_cinn); -COMMON_DECLARE_bool(enable_interpretercore_launch_cinn); -namespace paddle { -namespace operators { - -using CinnCompiler = framework::paddle2cinn::CinnCompiler; -using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject; - -namespace details { - -// Transform Paddle place to CINN target -const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place); - -// Print detailed compilation result of graph for debug -void DebugCinnCompiledResult(const CinnCompiledObject& result); - -// Launch cinn to execute compiled executable program and wait done -void LaunchCinnExecution(const CinnCompiledObject& compiled_obj, - const CinnLaunchContext& context, - void* stream); - -// Set cinn FLAGS (such as FLAGS_cinn_cudnn_deterministic) with paddle's FLAGS. -void SetCinnRuntimeFlags(); - -// set CINN global random seed -template -void SetCinnRandomSeed(); - -// set CINN compile target -void SetCinnTarget(const ::cinn::common::Target& target); - -} // namespace details - -template -class CinnLaunchOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto& scope = ctx.scope(); - const auto& place = ctx.GetPlace(); - void* stream = details::GetStream(ctx); - platform::RecordEvent record_event_1( - "Step 1. Find graph object and prepare input"); - // Step 1. Find graph object and prepare input - PADDLE_ENFORCE_EQ(ctx.HasAttr(kCompilationKey), - true, - platform::errors::NotFound( - "No Attribute(%s) found for CinnLaunchOp operator.", - kCompilationKey)); - const auto& compilation_key = ctx.template Attr(kCompilationKey); - VLOG(4) << "CinnLaunchOp attribute(" << kCompilationKey << ") " - << "value:\n" - << CinnCompiler::GetInstance()->ReadableKey(compilation_key); - - std::map inputs_name2tensor; - std::vector input_x_variable_names; - std::vector input_no_need_buffer_variable_names; - auto add_name2tensor_fn = - [&inputs_name2tensor]( - const std::vector& variable_names, - const std::vector& tensors) { - std::transform( - variable_names.begin(), - variable_names.end(), - tensors.begin(), - std::inserter(inputs_name2tensor, inputs_name2tensor.end()), - [](const std::string& name, const phi::DenseTensor* tensor) { - return std::make_pair(name, tensor); - }); - }; - - auto input_x_tensors = ctx.MultiInput(kX); - if (!input_x_tensors.empty()) { - input_x_variable_names = std::move(ctx.InputNames(kX)); - add_name2tensor_fn(input_x_variable_names, input_x_tensors); - } - auto input_no_need_buffer_tensors = - ctx.MultiInput(kNoNeedBufferX); - if (!input_no_need_buffer_tensors.empty()) { - input_no_need_buffer_variable_names = - std::move(ctx.InputNames(kNoNeedBufferX)); - add_name2tensor_fn(input_no_need_buffer_variable_names, - input_no_need_buffer_tensors); - } - - platform::RecordEvent record_event_2( - "Step 2. Get compilation result of the graph"); - // Step 2. Get compilation result of the graph - auto target = details::PlaceToCinnTarget(place); - details::SetCinnTarget(target); - using ClockType = std::chrono::steady_clock; - std::chrono::time_point start_t, end_t; - if (VLOG_IS_ON(1)) { - VLOG(1) << "Starts to compile at thread " << std::this_thread::get_id(); - start_t = ClockType::now(); - } - const auto& cinn_compiled_object = CinnCompiler::GetInstance()->Compile( - compilation_key, inputs_name2tensor, target, stream); - if (VLOG_IS_ON(1)) { - end_t = ClockType::now(); - auto time_sec = std::chrono::duration_cast( - end_t - start_t); - VLOG(1) << "Ends to compile at thread " << std::this_thread::get_id() - << " , time cost : " << time_sec.count() << " ms"; - - const auto& visible_names = - cinn_compiled_object.launch_context->GetVisibleVarNames(); - VLOG(1) << "These CINN variable can visible by Paddle: " - << string::join_strings(visible_names, ", "); - } - details::DebugCinnCompiledResult(cinn_compiled_object); - auto* launch_context = cinn_compiled_object.launch_context.get(); - - platform::RecordEvent record_event_3("Step 3. Set CINN runtime FLAGS."); - // Step 3. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic. - details::SetCinnRuntimeFlags(); - - // set CINN global random seed - details::SetCinnRandomSeed(); - - // Step 4. Execute the compiled CINN instructions by a PE or - // by the CINN compiled program in sequential order - if (FLAGS_enable_pe_launch_cinn) { - if (FLAGS_enable_interpretercore_launch_cinn) { - platform::RecordEvent record_event_4( - "Step 4. Execute the runtime program by InterpreterCore."); - VLOG(4) << "Execute the runtime program by InterpreterCore"; - auto* interpreter_core = launch_context->InitializeInterpreterCore( - place, const_cast(&scope)); - interpreter_core->Run({}, false); - } else { - platform::RecordEvent record_event_4( - "Step 4. Execute the runtime graph by PE."); - VLOG(4) << "Execute the runtime graph by PE"; - framework::Scope& exec_scope = scope.NewScope(); - auto* pe = launch_context->InitializePE(place, &exec_scope); - pe->RunWithoutFetch(launch_context->GetSkipEagerVars()); - } - } else { - platform::RecordEvent record_event_4( - "Step 4. Execute the compiled executable program."); - VLOG(4) << "Execute the compiled executable program"; - launch_context->UpdateCapturedEnv(scope, place); - LaunchCinnExecution(cinn_compiled_object, *launch_context, stream); - } - VLOG(4) << "CinnLaunchOp launch execution done."; - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index 3895bc09a08a0..4a61792c5b647 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -37,21 +37,21 @@ class ClipByNormOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of ClipByNormOp should not be null. Please " "check if it is created correctly.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ClipByNormOp should not be null. " "Please check if it is created correctly.")); auto max_norm = ctx->Attrs().Get("max_norm"); PADDLE_ENFORCE_GT( max_norm, 0, - platform::errors::InvalidArgument("max_norm should be greater than 0. " - "Received max_norm is %f.", - max_norm)); + phi::errors::InvalidArgument("max_norm should be greater than 0. " + "Received max_norm is %f.", + max_norm)); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/collective/alltoall_op.cc b/paddle/fluid/operators/collective/alltoall_op.cc index a8e3b2808092c..bd99fdde2f2c2 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cc @@ -28,7 +28,7 @@ class AllToAllBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); framework::DDim dim = ctx->GetInputDim("X"); if (dim[0] < 0) dim[0] = -1; @@ -79,4 +79,4 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index 8f942013435eb..93a44776851d4 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -45,7 +45,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for alltoall op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -59,7 +59,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -70,7 +70,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -93,7 +93,7 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension size (%d) of the input tensor must be " "divisible by the number of ranks (%d).", x_dims[0], @@ -126,12 +126,11 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { VLOG(3) << "old NCCLCommContext has rid " << ring_id; } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -153,5 +152,5 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/alltoall_op.h b/paddle/fluid/operators/collective/alltoall_op.h index 61456c268d5d5..187d4965cdcc8 100644 --- a/paddle/fluid/operators/collective/alltoall_op.h +++ b/paddle/fluid/operators/collective/alltoall_op.h @@ -33,7 +33,7 @@ template class AllToAllOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support alltoall for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc index 04d409e82b4d5..dc6b701afee00 100644 --- a/paddle/fluid/operators/collective/barrier_op.cu.cc +++ b/paddle/fluid/operators/collective/barrier_op.cu.cc @@ -47,7 +47,7 @@ class BarrierOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -58,7 +58,7 @@ class BarrierOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); auto stream = comm_ctx->GetStream(); @@ -82,8 +82,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel { VLOG(3) << "old NCCLCommContext has rid " << rid; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should compile with NCCL.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should compile with NCCL.")); #endif } }; diff --git a/paddle/fluid/operators/collective/barrier_op.h b/paddle/fluid/operators/collective/barrier_op.h index b05f2de53a073..6bbd5c38a2f76 100644 --- a/paddle/fluid/operators/collective/barrier_op.h +++ b/paddle/fluid/operators/collective/barrier_op.h @@ -41,12 +41,12 @@ class BarrierOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::BarrierOptions opts(gloo->GetContext()); gloo::barrier(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index 2a0087cd8aa72..e67a2cccc16e9 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -26,10 +26,10 @@ class CAllGatherOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AllGather"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Input", "Out", "AllGather"); int nranks = ctx->Attrs().Get("nranks"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The value of nranks should be >=2.")); framework::DDim dim = ctx->GetInputDim("X"); // 0D use stack/unstack while others use concat/split if (dim.size() == 0) { @@ -85,4 +85,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, int64_t, uint8_t, bool, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index dcd88f4a311ee..7b57e7af25f9b 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -67,7 +67,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -78,7 +78,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -88,7 +88,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; @@ -112,7 +112,7 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -138,5 +138,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, int8_t, int64_t, bool, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index b4aff2c2363ec..c5e2088da9889 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -49,14 +49,14 @@ class CAllGatherOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::AllgatherOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); opts.setOutput(recv_buff, send_numel * nranks); gloo::allgather(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc index d31c120cf9ede..48e965894a294 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc @@ -65,7 +65,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -76,7 +76,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -86,7 +86,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old BKCLCommContext has rid " << rid; @@ -106,7 +106,7 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { comm->comm(), sendbuff, numel, recvbuff, dtype, stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU and bkcl.")); #endif } @@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, ops::CAllGatherOpXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t, uint8_t, diff --git a/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc index d3f0b45f64432..e859145df8b73 100644 --- a/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_avg_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_avg, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index c47bf7025e1fd..d659be0f3d141 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -55,4 +55,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index 277988b56916f..012b280a9ab15 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, double, int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc index 8c648b4ae4a37..943df02ad93e2 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, ops::CAllReduceMaxXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cc index c21337a27202e..2a9dd023cf162 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cc @@ -56,4 +56,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc index 4475abdef281b..a3eec10051c52 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc index f9be16781af70..fb19a2924d1eb 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, ops::CAllReduceMinXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 55ca03c0bc626..db9d6d5361462 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -96,7 +96,7 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::AllreduceOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); @@ -123,14 +123,14 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { &gloo::product)); break; default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "Invalid reduce type: %d.", red_type)); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("Invalid reduce type: %d.", red_type)); } gloo::allreduce(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } @@ -150,11 +150,11 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -197,8 +197,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(out, *in, opts, false, true); @@ -215,7 +215,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -226,7 +226,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -262,8 +262,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -278,7 +278,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); #endif } @@ -297,11 +297,11 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -345,8 +345,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(out, *in, opts, false, true); @@ -363,7 +363,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -374,7 +374,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -420,8 +420,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { #endif default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -436,7 +436,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index ee40f29d789e1..181b78b545e7c 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -56,4 +56,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc index c63a1d2182678..e2c0a71a9ced4 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc index 5558b1722093a..d3696c2c5dfc1 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, ops::CAllReduceProdXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc index 79e70757fbcfd..80b97b2bc70cb 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc @@ -77,4 +77,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 76d809cd234f0..909bd23db2413 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, double, int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc index 1d4c5f63b5850..21bedcff8774b 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, ops::CAllReduceSumXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cc b/paddle/fluid/operators/collective/c_broadcast_op.cc index 670b69c05701c..27f3a1bcdc29f 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cc @@ -73,4 +73,4 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index 4d49bc4990c6e..98f9102f2d8f0 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -80,7 +80,7 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { out->set_lod(x->lod()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -103,5 +103,5 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_broadcast_op.h b/paddle/fluid/operators/collective/c_broadcast_op.h index e0d6158f19db7..c02b8f8a9a4fe 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.h +++ b/paddle/fluid/operators/collective/c_broadcast_op.h @@ -59,7 +59,7 @@ class CBroadcastOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::BroadcastOptions opts(gloo->GetContext()); opts.setOutput(recv_buff, send_numel); @@ -67,7 +67,7 @@ class CBroadcastOpCPUKernel : public framework::OpKernel { gloo::broadcast(opts); } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc index 6bf9d956a342e..ac7d9623e3241 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc @@ -50,7 +50,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -61,7 +61,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -119,7 +119,7 @@ class CBroadcastOpXPUKernel : public framework::OpKernel { out->Resize(x->dims()); out->set_lod(x->lod()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU and BKCL.")); #endif } @@ -137,6 +137,6 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, ops::CBroadcastOpXPUKernel, float, double, - plat::float16, + phi::dtype::float16, int, int64_t) {} diff --git a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc index 14059c3d91027..ca0a45c8ae79c 100644 --- a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc @@ -54,7 +54,7 @@ class CCommInitMultiTrainerOp : public framework::OperatorBase { const platform::Place& place) const override { auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input X must be provided.")); + var, phi::errors::InvalidArgument("Input X must be provided.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) ncclUniqueId* nccl_id = var->GetMutable(); @@ -70,8 +70,8 @@ class CCommInitMultiTrainerOp : public framework::OperatorBase { platform::NCCLCommContext::Instance().CreateNCCLCommMultiTrainer( devices, nccl_id, ntrainers, train_id, rid); #else - PADDLE_THROW(platform::errors::Unimplemented( - "PaddlePaddle should compile with GPU.")); + PADDLE_THROW( + phi::errors::Unimplemented("PaddlePaddle should compile with GPU.")); #endif } }; diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index 172e330675033..5c6613a0e9ca3 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -65,7 +65,7 @@ class CCommInitOp : public framework::OperatorBase { #if defined(PADDLE_WITH_CUSTOM_DEVICE) auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input con not be empty.")); + var, phi::errors::InvalidArgument("Input con not be empty.")); int nranks = Attr("nranks"); int rid = Attr("ring_id"); @@ -87,7 +87,7 @@ class CCommInitOp : public framework::OperatorBase { "c_comm_init_op"); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with custom device.")); #endif } else { @@ -99,21 +99,21 @@ class CCommInitOp : public framework::OperatorBase { using UniqueId = BKCLUniqueId; using CommContext = platform::BKCLCommContext; #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with GPU or XPU.")); #endif PADDLE_ENFORCE_EQ( platform::is_gpu_place(place) || platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "CCommInitOp can run on gpu or xpu place only.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) auto var = scope.FindVar(Input("X")); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input con not be empty.")); + var, phi::errors::InvalidArgument("Input con not be empty.")); int nranks = Attr("nranks"); int rid = Attr("ring_id"); diff --git a/paddle/fluid/operators/collective/c_concat_op.cc b/paddle/fluid/operators/collective/c_concat_op.cc index 27c1141f8b67f..75db7e9fad427 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cc @@ -27,29 +27,29 @@ class CConcatOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The number of ranks (%d) for c_concat " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The number of ranks (%d) for c_concat " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_concat must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( rank, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for c_concat must be non-negative.", rank)); - PADDLE_ENFORCE_LT(rank, - nranks, - platform::errors::InvalidArgument( - "The value of rank (%d) for c_concat must " - "be less than that of nranks.", - rank, - nranks)); + PADDLE_ENFORCE_LT( + rank, + nranks, + phi::errors::InvalidArgument("The value of rank (%d) for c_concat must " + "be less than that of nranks.", + rank, + nranks)); framework::DDim dim = ctx->GetInputDim("X"); dim[dim.size() - 1] = dim[dim.size() - 1] * nranks; @@ -121,4 +121,4 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index b75b2d4b0f687..9ed68c7c6809b 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -47,19 +47,19 @@ class CConcatOpCUDAKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -95,7 +95,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -107,7 +107,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -117,7 +117,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; @@ -156,7 +156,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { auto& dev_ctx2 = ctx.template device_context(); functor(dev_ctx2, inputs, axis, out); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -178,5 +178,5 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_concat_op.h b/paddle/fluid/operators/collective/c_concat_op.h index 39bdc4c2740de..84edccffc6fa3 100644 --- a/paddle/fluid/operators/collective/c_concat_op.h +++ b/paddle/fluid/operators/collective/c_concat_op.h @@ -29,7 +29,7 @@ template class CConcatOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support c_concat for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/c_concat_op_xpu.cc b/paddle/fluid/operators/collective/c_concat_op_xpu.cc index 10a2624ae83a4..fcd3c8b33f8b9 100644 --- a/paddle/fluid/operators/collective/c_concat_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_concat_op_xpu.cc @@ -46,19 +46,19 @@ class CConcatOpXPUKernel : public framework::OpKernel { int rid = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -95,7 +95,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -107,7 +107,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -118,7 +118,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->nranks())); stream = comm->stream(); VLOG(3) << "old BKCLCommContext has rid " << rid; @@ -151,7 +151,7 @@ class CConcatOpXPUKernel : public framework::OpKernel { dev_ctx.template Alloc(out, x->dtype()); functor(dev_ctx, inputs, axis, out); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with XPU.")); #endif } @@ -169,4 +169,5 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, float, int, int64_t, - plat::float16) {} + plat::float16, + plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_embedding_op.cc b/paddle/fluid/operators/collective/c_embedding_op.cc index 86e882b1c6cc8..0bbd64abb10d5 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cc +++ b/paddle/fluid/operators/collective/c_embedding_op.cc @@ -33,7 +33,7 @@ class CEmbeddingOp : public framework::OperatorWithKernel { VLOG(5) << "ids rank is " << ids_rank << std::endl; PADDLE_ENFORCE_EQ(table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of the 'c_embedding' must be 2. " "But received c_embedding's dimensions = %d, " "c_embedding's shape = [%s].", @@ -57,7 +57,7 @@ class CEmbeddingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (height > 0 && width > 0 && start_idx >= 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "height:%ld width:%ld start_idx:%ld must not have negative values", height, width, @@ -133,10 +133,10 @@ class CEmbeddingOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("W"), table_dims); // check valid - PADDLE_ENFORCE_EQ(table_dims.size(), - 2, - platform::errors::InvalidArgument( - "Only accept the dims of table_t == 2")); + PADDLE_ENFORCE_EQ( + table_dims.size(), + 2, + phi::errors::InvalidArgument("Only accept the dims of table_t == 2")); const int64_t start_idx = ctx->Attrs().Get("start_index"); const int64_t height = table_dims[0]; @@ -145,7 +145,7 @@ class CEmbeddingOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (height > 0 && width > 0 && start_idx >= 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "height:%ld width:%ld start_idx:%ld must not have negative values", height, width, diff --git a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc index a7a234f5792ef..3d469b81609f8 100644 --- a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc @@ -31,10 +31,10 @@ namespace operators { static void GenBKCLID(std::vector* bkcl_ids) { for (size_t i = 0; i < bkcl_ids->size(); ++i) { BKCLResult_t ret = bkcl_get_unique_id(&(*bkcl_ids)[i]); - PADDLE_ENFORCE_EQ(BKCL_SUCCESS, - ret, - platform::errors::PreconditionNotMet( - "bkcl get unique id failed [%d]", ret)); + PADDLE_ENFORCE_EQ( + BKCL_SUCCESS, + ret, + phi::errors::PreconditionNotMet("bkcl get unique id failed [%d]", ret)); } } @@ -46,8 +46,8 @@ static void CopyBKCLIDToVar(const std::vector& bkcl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto bkcl_id = var->GetMutable(); memcpy(bkcl_id, &bkcl_ids[i], sizeof(BKCLUniqueId)); } diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index c66aedd3b3923..f7f92a0a574df 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -42,8 +42,8 @@ static void CopyNCCLIDToVar(const std::vector& nccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto nccl_id = var->GetMutable(); memcpy(nccl_id, &nccl_ids[i], sizeof(ncclUniqueId)); } diff --git a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc index e404a1357ee75..c24fb4964b336 100644 --- a/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_xccl_id_op.cc @@ -36,8 +36,8 @@ static void CopyXCCLIDToVar(const std::vector& xccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto xccl_id = var->GetMutable(); *xccl_id = xccl_ids[i]; } diff --git a/paddle/fluid/operators/collective/c_identity_op.cc b/paddle/fluid/operators/collective/c_identity_op.cc index c067c061b8613..78d4a27f822b4 100644 --- a/paddle/fluid/operators/collective/c_identity_op.cc +++ b/paddle/fluid/operators/collective/c_identity_op.cc @@ -31,7 +31,7 @@ class CIdentityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_identity must be non-negative.", ring_id)); framework::DDim dim = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", dim); diff --git a/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc index 07d2cc748900e..1dcd5a2c6489c 100644 --- a/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_avg_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_avg, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cc index a1509a89eb3b3..a0181c9f0e7af 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc index 8973de0a19675..24f3dffd0517e 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_max, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cc index 9b53d80e01607..621272895fe4c 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cc @@ -52,4 +52,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc index e3239cb812cd9..c7d979bd932b6 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_min, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index d90fb88fe8f3f..0ea4187ffc4f2 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -85,7 +85,7 @@ class CReduceOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); gloo::ReduceOptions opts(gloo->GetContext()); opts.setInput(const_cast(send_buff), send_numel); @@ -113,14 +113,14 @@ class CReduceOpCPUKernel : public framework::OpKernel { &gloo::product)); break; default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "Invalid reduce type: %d.", red_type)); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("Invalid reduce type: %d.", red_type)); } gloo::reduce(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } @@ -158,7 +158,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -169,7 +169,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -205,8 +205,8 @@ class CReduceOpXPUKernel : public framework::OpKernel { break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW( + phi::errors::InvalidArgument("Invalid reduce type: %d", red_type)); } if (comm_ctx) { @@ -222,7 +222,7 @@ class CReduceOpXPUKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); #endif } @@ -260,7 +260,7 @@ class CReduceOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -271,7 +271,7 @@ class CReduceOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -311,11 +311,11 @@ class CReduceOpCUDAKernel : public framework::OpKernel { #endif default: - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "red_type must be one of kRedSum, " - "kRedMax, kRedMin, kRedProd.")); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::InvalidArgument("red_type must be one of kRedSum, " + "kRedMax, kRedMin, kRedProd.")); } if (comm_ctx) { @@ -331,10 +331,10 @@ class CReduceOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::Unavailable( - "PaddlePaddle should compile with GPU..")); + PADDLE_ENFORCE_EQ( + true, + false, + phi::errors::Unavailable("PaddlePaddle should compile with GPU..")); #endif } }; diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cc index 20dacd19b382b..c34e799f5d8e1 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc index 675c274eb0638..b8b562031bc4e 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc @@ -31,4 +31,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_prod, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cc index 72be5c391fca2..5bf5c1c2f8b9f 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cc @@ -53,4 +53,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc index dfae966a35eb0..56fd0e1293389 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc @@ -31,5 +31,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reduce_sum, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cc index 11c0094340f08..7726c3bf5ca41 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cc @@ -32,7 +32,7 @@ class CReduceScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dim[0] (%d) is not divisible by nranks(%d)", dim[0], nranks)); dim[0] /= nranks; } @@ -81,4 +81,4 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 617a8f7b7f941..e00433ad7b4d6 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -46,7 +46,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -57,12 +57,12 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); PADDLE_ENFORCE_EQ(out_dims[0] % comm_ctx->GetSize(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -74,7 +74,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { comm = platform::NCCLCommContext::Instance().Get(rid, place); PADDLE_ENFORCE_EQ(out_dims[0] % comm->nranks(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -90,7 +90,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { int nranks = comm_ctx ? comm_ctx->GetSize() : comm->nranks(); PADDLE_ENFORCE_EQ(out_dims[0] % nranks, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor X's " "dim[0] (%d) should be divisible by nranks(%d)", out_dims[0], @@ -117,7 +117,7 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { stream)); } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -140,5 +140,5 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.h b/paddle/fluid/operators/collective/c_reducescatter_op.h index 52af0b9c43541..9f978f3f94bf3 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.h +++ b/paddle/fluid/operators/collective/c_reducescatter_op.h @@ -31,7 +31,7 @@ template class CReduceScatterOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented cpu kernel for CReduceScatterOp.")); } }; diff --git a/paddle/fluid/operators/collective/c_scatter_op.cc b/paddle/fluid/operators/collective/c_scatter_op.cc index 40b6eeacf8030..d3caf13485036 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cc @@ -29,20 +29,20 @@ class CScatterOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) must be greater than 1 " "to use collective op (c_scatter op).", nranks)); PADDLE_ENFORCE_GE( root_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The root_id (%d) for c_scatter_op must be non-negative.", root_id)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_scatter_op must be non-negative.", root_id)); framework::DDim dim = ctx->GetInputDim("X"); @@ -96,4 +96,4 @@ PD_REGISTER_STRUCT_KERNEL(c_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc index fc7a83ca638ee..7cfe5b6785b5a 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc @@ -47,13 +47,13 @@ class CScatterOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( root_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The root_id (%d) for c_scatter_op must be non-negative.", root_id)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_scatter_op must be non-negative.", ring_id)); @@ -62,7 +62,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,12 +73,12 @@ class CScatterOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); PADDLE_ENFORCE_EQ(nranks, comm_ctx->GetSize(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) you set of must " "be equal to comm_ctx->GetSize() (%d).", nranks, @@ -90,7 +90,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { comm = platform::NCCLCommContext::Instance().Get(ring_id, place); PADDLE_ENFORCE_EQ(nranks, comm->nranks(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of ranks (%d) you set of must " "be equal to comm->nranks (%d).", nranks, @@ -158,7 +158,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( true, false, - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -177,4 +177,4 @@ PD_REGISTER_STRUCT_KERNEL(c_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_scatter_op.h b/paddle/fluid/operators/collective/c_scatter_op.h index 76f3350a64c05..164b7f156de0a 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.h +++ b/paddle/fluid/operators/collective/c_scatter_op.h @@ -44,7 +44,7 @@ class CScatterOpCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gloo->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "You must initialize the gloo environment first to use it.")); int64_t send_numel = out->numel(); @@ -66,7 +66,7 @@ class CScatterOpCPUKernel : public framework::OpKernel { gloo::scatter(opts); #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON")); #endif } diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc index e4de0ceb136c1..496733759adb3 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cc @@ -46,7 +46,7 @@ class CSoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { if (ctx->IsRuntime() || (logits_dims[i] > 0 && labels_dims[i] > 0)) { PADDLE_ENFORCE_EQ(logits_dims[i], labels_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Logits) and Input(Label) should in " "same shape in dimensions except axis.")); } @@ -56,7 +56,7 @@ class CSoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( labels_dims[logits_rank - 1], 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the last dimension of Input(Label) should be 1." "But received: the last dimension of Input(Label) is [%d]," "the last dimension is [%d]", @@ -130,22 +130,22 @@ class CSoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Loss")), - true, - platform::errors::InvalidArgument( - "Input(Loss@Grad) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Softmax"), - true, - platform::errors::InvalidArgument( - "Input(Softmax) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Loss")), + true, + phi::errors::InvalidArgument("Input(Loss@Grad) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Softmax"), + true, + phi::errors::InvalidArgument("Input(Softmax) should be not null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Label"), true, - platform::errors::InvalidArgument("Input(Label) should be not null.")); + phi::errors::InvalidArgument("Input(Label) should be not null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Logits")), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Logits@Grad) should be not null.")); ctx->SetOutputDim(framework::GradVarName("Logits"), @@ -209,4 +209,4 @@ PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy, ops::CSoftmaxWithCrossEntropyOpCPUKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index e65ebafad7235..80ce7ce50c4a0 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -155,7 +155,7 @@ struct CSoftmaxWithCrossEntropyFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -166,7 +166,7 @@ struct CSoftmaxWithCrossEntropyFunctor { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -551,11 +551,11 @@ PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy, ops::CSoftmaxWithCrossEntropyOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(c_softmax_with_cross_entropy_grad, GPU, ALL_LAYOUT, ops::CSoftmaxWithCrossEntropyGradCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h index 9b6a2c86897cb..3689cbcefd9bd 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.h @@ -33,7 +33,7 @@ template class CSoftmaxWithCrossEntropyOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support c_embedding for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc index 499b25e65974b..65329ccd8b269 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc @@ -278,7 +278,7 @@ struct CSoftmaxWithCrossEntropyFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -289,7 +289,7 @@ struct CSoftmaxWithCrossEntropyFunctor { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "BKCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/collective/c_split_op.cc b/paddle/fluid/operators/collective/c_split_op.cc index dd65b99e3b7ee..f684c6fe35cf9 100644 --- a/paddle/fluid/operators/collective/c_split_op.cc +++ b/paddle/fluid/operators/collective/c_split_op.cc @@ -27,38 +27,38 @@ class CSplitOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The number of ranks (%d) for c_split " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The number of ranks (%d) for c_split " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_split must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( rank, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for c_split must be non-negative.", rank)); - PADDLE_ENFORCE_LT(rank, - nranks, - platform::errors::InvalidArgument( - "The value of rank (%d) for c_split must " - "be less than that of nranks.", - rank, - nranks)); + PADDLE_ENFORCE_LT( + rank, + nranks, + phi::errors::InvalidArgument("The value of rank (%d) for c_split must " + "be less than that of nranks.", + rank, + nranks)); framework::DDim dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ( dim[dim.size() - 1] % nranks, 0, - platform::errors::InvalidArgument("The last dimension (%d) of the X " - "should be divisible by nranks (%d)", - dim[dim.size() - 1], - nranks)); + phi::errors::InvalidArgument("The last dimension (%d) of the X " + "should be divisible by nranks (%d)", + dim[dim.size() - 1], + nranks)); dim[dim.size() - 1] = dim[dim.size() - 1] / nranks; if (dim[0] < 0) dim[0] = -1; diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc index 526726ae3c772..8d1134be70de1 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cu.cc @@ -24,5 +24,5 @@ PD_REGISTER_STRUCT_KERNEL(c_sync_calc_stream, double, int, int64_t, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.h b/paddle/fluid/operators/collective/c_sync_calc_stream_op.h index e100397924af5..a0e2d858ebd38 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.h +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.h @@ -51,14 +51,14 @@ class CSyncCalcStreamKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Sync stream op can run on xpu place only for now.")); auto dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(place)); dev_ctx->Wait(); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc index 24157f1c64a6c..1448e1e3745ec 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_xpu.cc @@ -25,4 +25,4 @@ PD_REGISTER_STRUCT_KERNEL(c_sync_calc_stream, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.h b/paddle/fluid/operators/collective/c_sync_comm_stream_op.h index d5fdad8f04f86..d67ef6820d021 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.h +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.h @@ -50,7 +50,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,7 +73,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Sync stream op can run on xpu place only for now.")); int ring_id = ctx.Attr("ring_id"); XPUStream stream = nullptr; @@ -82,7 +82,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -102,7 +102,7 @@ class CSyncCommStreamKernel : public framework::OpKernel { platform::XPUStreamSync(stream); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU or XPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_wait_comm_op.cc b/paddle/fluid/operators/collective/c_wait_comm_op.cc index fbb9c0d1ca7ce..10f4d9726f21b 100644 --- a/paddle/fluid/operators/collective/c_wait_comm_op.cc +++ b/paddle/fluid/operators/collective/c_wait_comm_op.cc @@ -43,7 +43,7 @@ class CWaitCommOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( platform::is_gpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "wait_comm op can run on gpu place only for now, but got %s", place.DebugString())); @@ -62,7 +62,7 @@ class CWaitCommOp : public framework::OperatorBase { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -94,7 +94,7 @@ class CWaitCommOp : public framework::OperatorBase { PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(compute_stream, event, 0)); #endif #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/c_wait_compute_op.cc b/paddle/fluid/operators/collective/c_wait_compute_op.cc index 040e86c46b9ec..a548998ce757d 100644 --- a/paddle/fluid/operators/collective/c_wait_compute_op.cc +++ b/paddle/fluid/operators/collective/c_wait_compute_op.cc @@ -43,7 +43,7 @@ class CWaitComputeOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( platform::is_gpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "wait_compute op can run on gpu place only for now, but got %s", place.DebugString())); @@ -62,7 +62,7 @@ class CWaitComputeOp : public framework::OperatorBase { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -94,7 +94,7 @@ class CWaitComputeOp : public framework::OperatorBase { PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamWaitEvent(comm_stream, event, 0)); #endif #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } diff --git a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc index fc765e3bde983..f7aa3baea0d60 100644 --- a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc @@ -34,10 +34,10 @@ namespace operators { static void GenBKCLID(std::vector* bkcl_ids) { for (size_t i = 0; i < bkcl_ids->size(); ++i) { BKCLResult_t ret = bkcl_get_unique_id(&(*bkcl_ids)[i]); - PADDLE_ENFORCE_EQ(BKCL_SUCCESS, - ret, - platform::errors::PreconditionNotMet( - "bkcl get unique id failed [%d]", ret)); + PADDLE_ENFORCE_EQ( + BKCL_SUCCESS, + ret, + phi::errors::PreconditionNotMet("bkcl get unique id failed [%d]", ret)); } } @@ -49,8 +49,8 @@ static void CopyBKCLIDToVar(const std::vector& bkcl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto bkcl_id = var->GetMutable(); memcpy(bkcl_id, &bkcl_ids[i], sizeof(BKCLUniqueId)); } @@ -74,14 +74,14 @@ class GenBKCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( trainer_id, 0, - platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + phi::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), - platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " - "range is [0, trainer_size)", - trainer_id)); + phi::errors::OutOfRange("trainer_id %d is out of range. Its valid " + "range is [0, trainer_size)", + trainer_id)); int bkcl_comm_num = Attr("bkcl_comm_num"); int use_hierarchical_allreduce = Attr("use_hierarchical_allreduce"); @@ -93,18 +93,18 @@ class GenBKCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( trainers.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of collective trainers %llu <= 1", trainers.size())); PADDLE_ENFORCE_GT( inter_nranks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "inter_nranks %d <= 1 while in hierarchical allreduce mode", inter_nranks)); PADDLE_ENFORCE_EQ( trainers.size() % inter_nranks, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of trainers %llu mod inter_nranks %d is not equal 0", trainers.size(), inter_nranks)); diff --git a/paddle/fluid/operators/collective/gen_nccl_id_op.cc b/paddle/fluid/operators/collective/gen_nccl_id_op.cc index 1d03cb151e4a0..37406b2918d7f 100644 --- a/paddle/fluid/operators/collective/gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_nccl_id_op.cc @@ -49,8 +49,8 @@ static void CopyNCCLIDToVar(const std::vector& nccl_ids, auto var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); + phi::errors::NotFound("Variable with name %s is not found", + var_name.c_str())); auto nccl_id = var->GetMutable(); memcpy(nccl_id, &nccl_ids[i], sizeof(ncclUniqueId)); } @@ -74,14 +74,14 @@ class GenNCCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GE( trainer_id, 0, - platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + phi::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), - platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " - "range is [0, trainer_size)", - trainer_id)); + phi::errors::OutOfRange("trainer_id %d is out of range. Its valid " + "range is [0, trainer_size)", + trainer_id)); int nccl_comm_num = Attr("nccl_comm_num"); int use_hierarchical_allreduce = Attr("use_hierarchical_allreduce"); @@ -93,18 +93,18 @@ class GenNCCLIdOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( trainers.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of collective trainers %llu <= 1", trainers.size())); PADDLE_ENFORCE_GT( inter_nranks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "inter_nranks %d <= 1 while in hierarchical allreduce mode", inter_nranks)); PADDLE_ENFORCE_EQ( trainers.size() % inter_nranks, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of trainers %llu mod inter_nranks %d is not equal 0", trainers.size(), inter_nranks)); diff --git a/paddle/fluid/operators/collective/global_gather_op.cc b/paddle/fluid/operators/collective/global_gather_op.cc index de93ca747b4e9..1b74fc6bde5f7 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cc @@ -32,18 +32,18 @@ class GlobalGatherOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto input_dims = ctx->GetInputDim("X"); auto ndim_input = input_dims.size(); // dim check - PADDLE_ENFORCE_EQ(ndim_input, - 2, - platform::errors::InvalidArgument( - "The input tensor's dimension must be 2. " - "But received input's dimension = %d.", - ndim_input)); + PADDLE_ENFORCE_EQ( + ndim_input, + 2, + phi::errors::InvalidArgument("The input tensor's dimension must be 2. " + "But received input's dimension = %d.", + ndim_input)); framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); } @@ -119,4 +119,4 @@ PD_REGISTER_STRUCT_KERNEL(global_gather, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index b3dcc2aac9423..8c0285cba049d 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -41,11 +41,11 @@ struct GlobalGatherFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -80,7 +80,7 @@ struct GlobalGatherFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -94,7 +94,7 @@ struct GlobalGatherFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm " @@ -105,7 +105,7 @@ struct GlobalGatherFunctor { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); @@ -192,12 +192,11 @@ struct GlobalGatherFunctor { } } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -215,11 +214,11 @@ struct GlobalGatherProcessGroupFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -251,7 +250,7 @@ struct GlobalGatherProcessGroupFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global gather op must be non-negative.", ring_id)); auto place = ctx.GetPlace(); @@ -309,12 +308,11 @@ struct GlobalGatherProcessGroupFunctor { #endif #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -349,4 +347,4 @@ PD_REGISTER_STRUCT_KERNEL(global_gather, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_gather_op.h b/paddle/fluid/operators/collective/global_gather_op.h index 723c5e48a5ae4..0ab3dd5da985f 100644 --- a/paddle/fluid/operators/collective/global_gather_op.h +++ b/paddle/fluid/operators/collective/global_gather_op.h @@ -29,7 +29,7 @@ template class GlobalGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support global gather op for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/global_scatter_op.cc b/paddle/fluid/operators/collective/global_scatter_op.cc index 095f968306bdc..e6b1bb8295bde 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cc @@ -34,18 +34,18 @@ class GlobalScatterOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); auto input_dims = ctx->GetInputDim("X"); auto ndim_input = input_dims.size(); // dim check - PADDLE_ENFORCE_EQ(ndim_input, - 2, - platform::errors::InvalidArgument( - "The input tensor's dimension must be 2. " - "But received input's dimension = %d.", - ndim_input)); + PADDLE_ENFORCE_EQ( + ndim_input, + 2, + phi::errors::InvalidArgument("The input tensor's dimension must be 2. " + "But received input's dimension = %d.", + ndim_input)); framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); @@ -123,4 +123,4 @@ PD_REGISTER_STRUCT_KERNEL(global_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index 096c33c3ef3cc..1eeb23fa602e2 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -42,11 +42,11 @@ struct GlobalScatterFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -79,7 +79,7 @@ struct GlobalScatterFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); @@ -95,7 +95,7 @@ struct GlobalScatterFunctor { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -106,7 +106,7 @@ struct GlobalScatterFunctor { comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -201,12 +201,11 @@ struct GlobalScatterFunctor { } #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -224,11 +223,11 @@ struct GlobalScatterProcessGroupFunctor { auto global_count_type = framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in local_count.")); } if (global_count_type != framework::proto::VarType::INT64) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Please use int64 type in global_count.")); } auto out = ctx.Output("Out"); @@ -258,7 +257,7 @@ struct GlobalScatterProcessGroupFunctor { PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for global scatter op must be non-negative.", ring_id)); @@ -316,12 +315,11 @@ struct GlobalScatterProcessGroupFunctor { #endif #else - PADDLE_THROW( - platform::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW(phi::errors::Unavailable("NCCL version >= 2.7.3 is needed.")); #endif #else PADDLE_THROW( - platform::errors::Unavailable("PaddlePaddle should compile with GPU.")); + phi::errors::Unavailable("PaddlePaddle should compile with GPU.")); #endif } }; @@ -356,4 +354,4 @@ PD_REGISTER_STRUCT_KERNEL(global_scatter, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/global_scatter_op.h b/paddle/fluid/operators/collective/global_scatter_op.h index fc4b48500c071..36ea0b151dc4b 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.h +++ b/paddle/fluid/operators/collective/global_scatter_op.h @@ -29,7 +29,7 @@ template class GlobalScatterOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support global scatter op for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc index f680818da2d94..d30d52821e74e 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cc @@ -96,4 +96,4 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc index b4773a8eb5456..fc856ea04e6f2 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc @@ -34,5 +34,5 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, #if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc index 9638bf68d1717..323d39f62092e 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op_xpu.cc @@ -28,4 +28,4 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, ops::CAllReduceSumXPUKernel, float, int, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cc b/paddle/fluid/operators/collective/partial_allgather_op.cc index 75220ea5b30a5..3ae33ecd9eeba 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cc @@ -26,14 +26,14 @@ class PartialAllGatherOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); - PADDLE_ENFORCE_GE(nranks, - 2, - platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE( + nranks, + 2, + phi::errors::InvalidArgument("The value of nranks should be >=2.")); PADDLE_ENFORCE_EQ( (rank >= 0 && rank < nranks), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank (%d) for partial_allgather op must >=0 and { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -67,7 +67,7 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -87,17 +87,17 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, real_nranks, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, real_nranks)); PADDLE_ENFORCE_EQ(rank, real_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rank: %s should equal to %s", rank, real_rank)); PADDLE_ENFORCE_EQ( (numel % nranks), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input numel (%d) must be divisible by nranks(%d)", numel, nranks)); @@ -137,7 +137,7 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { } } #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU.")); #endif } @@ -160,5 +160,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_allgather_op.h b/paddle/fluid/operators/collective/partial_allgather_op.h index 178545f4dd2d3..4b410154712e2 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.h +++ b/paddle/fluid/operators/collective/partial_allgather_op.h @@ -30,7 +30,7 @@ template class PartialAllGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_allgather for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/partial_recv_op.cc b/paddle/fluid/operators/collective/partial_recv_op.cc index 5d8a1276a630e..2a512260a792d 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cc @@ -34,26 +34,26 @@ class PartialRecvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_recv op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_recv op must be non-negative.", ring_id)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and SetOutputDim("Out", common::make_ddim(out_shape)); @@ -137,4 +137,4 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index 912de046b63af..7e623706b2037 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -49,26 +49,26 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_recv op must be non-negative.", rid)); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_recv op must be non-negative.", peer)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_recv op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_recv op must >=0 and { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -110,7 +110,7 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -134,13 +134,13 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { stream = ctx.cuda_device_context().stream(); } - PADDLE_ENFORCE_LT(peer, - nranks, - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than nranks (%d).", - peer, - nranks)); + PADDLE_ENFORCE_LT( + peer, + nranks, + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than nranks (%d).", + peer, + nranks)); ncclDataType_t dtype = platform::ToNCCLDataType(type); @@ -161,7 +161,7 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { << offset << "] from " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should be compiled with NCCL and " "NCCL version >= 2.7.3 is needed.")); #endif @@ -185,5 +185,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_recv_op.h b/paddle/fluid/operators/collective/partial_recv_op.h index baf47ef9dff8d..0840b85e504b4 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.h +++ b/paddle/fluid/operators/collective/partial_recv_op.h @@ -28,7 +28,7 @@ template class PartialRecvOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_recv for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/partial_send_op.cc b/paddle/fluid/operators/collective/partial_send_op.cc index a655479d3d8af..388ece7f4ba12 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cc @@ -31,22 +31,22 @@ class PartialSendOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_send op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_send op must be non-negative.", ring_id)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for partial_send op must be non-negative.", rid)); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for partial_send op must be non-negative.", peer)); PADDLE_ENFORCE_GE(num, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num (%d) for partial_send op must >=1", num)); PADDLE_ENFORCE_EQ( (id >= 0 && id < num), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id (%d) for partial_send op must >=0 and { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -104,7 +104,7 @@ class PartialSendCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); @@ -128,13 +128,13 @@ class PartialSendCUDAKernel : public framework::OpKernel { stream = ctx.cuda_device_context().stream(); } - PADDLE_ENFORCE_LT(peer, - nranks, - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than ranks (%d).", - peer, - nranks)); + PADDLE_ENFORCE_LT( + peer, + nranks, + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than ranks (%d).", + peer, + nranks)); ncclDataType_t dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); @@ -157,9 +157,9 @@ class PartialSendCUDAKernel : public framework::OpKernel { << offset << "] to " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should be compiled with NCCL " - "and NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should be compiled with NCCL " + "and NCCL version >= 2.7.3 is needed.")); #endif } }; @@ -181,5 +181,5 @@ PD_REGISTER_STRUCT_KERNEL(partial_send, #endif int, int64_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/partial_send_op.h b/paddle/fluid/operators/collective/partial_send_op.h index b7b72789b87ff..9076ce014fcab 100644 --- a/paddle/fluid/operators/collective/partial_send_op.h +++ b/paddle/fluid/operators/collective/partial_send_op.h @@ -29,7 +29,7 @@ template class PartialSendOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Do not support partial_send for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/recv_v2_op.cc b/paddle/fluid/operators/collective/recv_v2_op.cc index 40757ca89daa8..1448aad5f9bfa 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cc @@ -30,12 +30,12 @@ class RecvOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for recv_v2 op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for recv_v2 op must be non-negative.", ring_id)); if (ctx->GetOutputsVarType("Out").front() == @@ -44,7 +44,7 @@ class RecvOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( out_shape.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of the output shape must be greater than 0 " "but the value given is %d.", out_shape.size())); @@ -55,7 +55,7 @@ class RecvOpV2 : public framework::OperatorWithKernel { for (size_t i = 0; i < out_shape.size(); ++i) { PADDLE_ENFORCE_GE(out_shape[i], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape attribute for recv_v2 must be set " "explicitly, but the %dth element is %d which " "is less than 1. Or dynamic_shape should be " @@ -122,4 +122,4 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index 37cbf9dffdd3d..be849d7e6c53b 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -41,7 +41,7 @@ framework::DDim recv_shape_info(const platform::Place &place, PADDLE_ENFORCE_EQ( ((stream != nullptr && comm != nullptr) || comm_ctx != nullptr), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "NCCLComm and Stream should be provided if use NCCL " "to send the shape info.")); } @@ -131,14 +131,14 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for recv_v2 op must be non-negative.", rid)); int peer = ctx.Attr("peer"); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for recv_v2 op must be non-negative.", peer)); gpuStream_t stream = nullptr; @@ -180,7 +180,7 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -191,20 +191,20 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); VLOG(3) << "new comm_context_manager has rid " << rid; } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; } @@ -223,8 +223,8 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dynamic_shape, false, - platform::errors::InvalidArgument("Dynamic shape for send/recv not " - "support LoDTensorArray for now.")); + phi::errors::InvalidArgument("Dynamic shape for send/recv not " + "support LoDTensorArray for now.")); auto out_array = out_var->GetMutable(); for (size_t idx = 0; idx < out_array->size(); ++idx) { VLOG(3) << "LodTensorArray: idx(" << idx << ")"; @@ -267,20 +267,20 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { comm_ctx->Recv(out, numel, peer, stream); } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclRecv( out->data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " recv " << common::product(out->dims()) << " from " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "PaddlePaddle should be compiled with NCCL and " "NCCL version >= 2.7.3 is needed.")); #endif @@ -305,5 +305,5 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, int, int64_t, int8_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/recv_v2_op.h b/paddle/fluid/operators/collective/recv_v2_op.h index e76e4a7b55197..47b1941a73442 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.h +++ b/paddle/fluid/operators/collective/recv_v2_op.h @@ -28,8 +28,8 @@ template class RecvOpV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( - "Do not support recv for cpu kernel now.")); + PADDLE_THROW( + phi::errors::Unavailable("Do not support recv for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/collective/send_v2_op.cc b/paddle/fluid/operators/collective/send_v2_op.cc index 862a6a67813c1..c1763a5cd6478 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cc @@ -28,12 +28,12 @@ class SendOpV2 : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for send_v2 op must be non-negative.", peer)); PADDLE_ENFORCE_GE( ring_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for send_v2 op must be non-negative.", ring_id)); } @@ -94,4 +94,4 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, double, int, int64_t, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 8c72a7ccd384c..6938f413b0548 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -41,7 +41,7 @@ void send_shape_info(const phi::DenseTensor& x, PADDLE_ENFORCE_EQ( ((stream != nullptr && comm != nullptr) || comm_ctx != nullptr), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "NCCLComm and Stream should be provided if use NCCL " "to send the shape info.")); } @@ -129,14 +129,14 @@ class SendOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for send_v2 op must be non-negative.", rid)); int peer = ctx.Attr("peer"); PADDLE_ENFORCE_GE( peer, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The peer (%d) for send_v2 op must be non-negative.", peer)); auto map = distributed::ProcessGroupMapFromGid::getInstance(); if (map->has(rid)) { @@ -171,7 +171,7 @@ class SendOpV2CUDAKernel : public framework::OpKernel { if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -182,20 +182,20 @@ class SendOpV2CUDAKernel : public framework::OpKernel { comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); stream = comm_ctx->GetStream(); VLOG(3) << "new comm_context_manager has rid " << rid; } else { comm = platform::NCCLCommContext::Instance().Get(rid, place); - PADDLE_ENFORCE_LT(peer, - comm->nranks(), - platform::errors::InvalidArgument( - "The value of peer (%d) you set must " - "be less than comm->nranks (%d).", - peer, - comm->nranks())); + PADDLE_ENFORCE_LT( + peer, + comm->nranks(), + phi::errors::InvalidArgument("The value of peer (%d) you set must " + "be less than comm->nranks (%d).", + peer, + comm->nranks())); stream = comm->stream(); VLOG(3) << "old NCCLCommContext has rid " << rid; } @@ -210,8 +210,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dynamic_shape, false, - platform::errors::InvalidArgument("Dynamic shape for send/recv not " - "support LoDTensorArray for now.")); + phi::errors::InvalidArgument("Dynamic shape for send/recv not " + "support LoDTensorArray for now.")); auto& x_array = x_var->Get(); for (size_t idx = 0; idx < x_array.size(); idx++) { VLOG(3) << "LodTensorArray: idx(" << idx << ")"; @@ -255,9 +255,9 @@ class SendOpV2CUDAKernel : public framework::OpKernel { << common::product(x->dims()) << " to " << peer; } #else - PADDLE_THROW(platform::errors::Unavailable( - "PaddlePaddle should be compiled with NCCL " - "and NCCL version >= 2.7.3 is needed.")); + PADDLE_THROW( + phi::errors::Unavailable("PaddlePaddle should be compiled with NCCL " + "and NCCL version >= 2.7.3 is needed.")); #endif } }; @@ -280,5 +280,5 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, int, int64_t, int8_t, - plat::float16) { + phi::dtype::float16) { } diff --git a/paddle/fluid/operators/collective/send_v2_op.h b/paddle/fluid/operators/collective/send_v2_op.h index 7f51861008942..196e2941e9315 100644 --- a/paddle/fluid/operators/collective/send_v2_op.h +++ b/paddle/fluid/operators/collective/send_v2_op.h @@ -29,8 +29,8 @@ template class SendOpV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx UNUSED) const override { - PADDLE_THROW(platform::errors::Unavailable( - "Do not support send for cpu kernel now.")); + PADDLE_THROW( + phi::errors::Unavailable("Do not support send for cpu kernel now.")); } }; diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index 1c13f873818f4..0c83eeb6da92e 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -37,13 +37,13 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims, PADDLE_ENFORCE_GE( axis, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Axis should be great than or equal to 0, but received axis is %d.", axis)); PADDLE_ENFORCE_LE( axis, max_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Axis should be less than or equal to %d, but received axis is %d.", max_dim, axis)); @@ -68,7 +68,7 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims, x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || y_dims_array[i] <= 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Broadcast dimension mismatch. Operands could " "not be broadcast together with the shape of X = [%s] and " "the shape of Y = [%s]. Received [%d] in X is not equal to " @@ -126,7 +126,7 @@ void UnaryOpUnchangedInferShapeCheckAxis(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_GE( axis, -x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(axis) value should be in range [-R, R-1], " "R is the rank of Input(X). But received axis: %d, R: %d.", axis, @@ -134,7 +134,7 @@ void UnaryOpUnchangedInferShapeCheckAxis(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_LT( axis, x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(axis) value should be in range [-R, R-1], " "R is the rank of Input(X). But received axis: %d, R: %d.", axis, @@ -153,7 +153,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_EQ( ctx->GetInputsVarType(y_name).front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The var type of input %s should be phi::DenseTensor, but got %s.", ctx->Inputs(y_name).front(), ctx->GetInputsVarType(y_name).front())); @@ -162,7 +162,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { framework::proto::VarType::SELECTED_ROWS) { PADDLE_ENFORCE_EQ(y_dims.size(), 1u, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For binary broadcastable operator, if X is " "Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, and the size of Y should be 1. " @@ -171,7 +171,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { PADDLE_ENFORCE_EQ( y_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For binary broadcastable operator, if X is " "Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the first dimension of Y should be 1. " @@ -179,7 +179,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) { y_dims[0])); } else if (ctx->GetInputsVarType(x_name).front() != framework::proto::VarType::LOD_TENSOR) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "For binary broadcastable operator, the var type of input X should " "be LOD_TENSOR, but got %s", ctx->GetInputsVarType(x_name).front())); diff --git a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc index e684efe12c598..9f3034179fdd7 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/common/flags.h" @@ -72,7 +72,7 @@ class ConditionalBlockInferOp : public ConditionalOp { auto *scope_var = scope.FindVar(Output("Scope")); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Scope must be set in ConditionalBlockInferOp.")); auto *scopes = scope_var->GetMutable>(); scopes->resize(1); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 981bf0f8b00f5..3b320dd3f7912 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/control_flow_op_helper.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif COMMON_DECLARE_bool(use_mkldnn); @@ -73,7 +73,7 @@ class ConditionalBlockOp : public ConditionalOp { auto *scope_var = scope.FindVar(Output(ConditionalOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in conditional_block_op, but " "got a null Scope variable. Please set the Scope variable.")); @@ -139,7 +139,7 @@ class ConditionalBlockInferShape : public framework::InferShapeBase { void operator()(framework::InferShapeContext *context) const override { PADDLE_ENFORCE_EQ(context->HasInputs(ConditionalOp::kCondition), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "conditional_block_op must have condition input.")); } }; @@ -180,14 +180,14 @@ class ConditionalBlockGradOp : public ConditionalOp { auto *scope_var = scope.FindVar(Input(ConditionalOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in conditional_block_op, but " "got a null Scope variable. Please set the Scope variable.")); auto &scopes = scope_var->Get>(); PADDLE_ENFORCE_GT( scopes.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect Scope variable contains at least 1 scope, but got: %d", scopes.size())); framework::Scope &cur_scope = *(scopes[0]); @@ -272,7 +272,7 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInputs(ConditionalOp::kCondition), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Condition must be set in conditional_block_grad_op.")); if (context->HasInputs(ConditionalOp::kInputs) && context->HasOutputs(framework::GradVarName(ConditionalOp::kInputs))) { @@ -294,7 +294,7 @@ class ConditionalBlockGradInferVarType : public framework::VarTypeInference { ctx->OutputSize(framework::GradVarName(ConditionalOp::kInputs)); PADDLE_ENFORCE_EQ(input_size, output_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input_size and output_size should be equal for " "conditional_block_grad_op.")); for (size_t i = 0; i < output_size; ++i) { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h index 0f04a295ed263..7b24ec5629a48 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op.h @@ -53,7 +53,7 @@ class ConditionalOp : public framework::OperatorBase { [&scope](const std::string &var_name) -> const phi::DenseTensor * { auto *var = scope.FindVar(var_name); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Cannot find variable %s", var_name)); return &var->Get(); }); @@ -64,14 +64,14 @@ class ConditionalOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ips.size() == 1UL && ips[0]->IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "condition should have one initialized input as condition")); PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(ips[0]->dtype()) == framework::proto::VarType::BOOL && ips[0]->numel() == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "condition input's data type should be bool, " "numel should be 1, actual numel is %d", ips[0]->numel())); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc index 08569d835fd82..2908d1f5a5f81 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc @@ -38,7 +38,7 @@ static void FindAllConditionalBlockAndConditionalBlockGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of forward ops must be greater or equal to backward ops. The " "number of forward ops is %d and the number of backward ops is %d", fwd_ops->size(), @@ -59,7 +59,7 @@ static void FindAllConditionalBlockAndConditionalBlockGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more conditional_block_grad ops than " "conditional_block ops in the graph or program. The number of " "forward ops is %d and the number of backward ops is %d", @@ -122,7 +122,7 @@ static void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl( bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple matched conditional_block ops.")); matched_fwd_op = &fwd_op; } @@ -130,7 +130,7 @@ static void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl( PADDLE_ENFORCE_NOT_NULL( matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward conditional_block op.")); SetSkipVarsForConditionalBlockOp(const_cast(matched_fwd_op), diff --git a/paddle/fluid/operators/controlflow/control_flow_op_helper.h b/paddle/fluid/operators/controlflow/control_flow_op_helper.h index 0d08ae6d68663..945bcbb4e905e 100644 --- a/paddle/fluid/operators/controlflow/control_flow_op_helper.h +++ b/paddle/fluid/operators/controlflow/control_flow_op_helper.h @@ -96,7 +96,7 @@ static void AssignZeroToParentScope( PADDLE_ENFORCE_EQ( outside_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of outside_var %s is NOT phi::DenseTensor, which " "doesn't match input_var %s.", outside_grad_name, @@ -108,7 +108,7 @@ static void AssignZeroToParentScope( } else if (input_var->IsType()) { PADDLE_ENFORCE_EQ(outside_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of outside_var %s is NOT LoDTensorArray, " "which doesn't match input_var %s.", outside_grad_name, @@ -121,7 +121,7 @@ static void AssignZeroToParentScope( } PADDLE_ENFORCE_EQ(input_tensors.size(), outside_tensors->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LoDTensorArray outside_var %s doen't have same " "size as input_var %s.", outside_grad_name, @@ -132,7 +132,7 @@ static void AssignZeroToParentScope( } } else { // TODO(huihuangzheng): add support for SelectedRows - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Conditional block grad op doesn't support non-phi::DenseTensor " "output " "now.")); diff --git a/paddle/fluid/operators/controlflow/depend_op.cc b/paddle/fluid/operators/controlflow/depend_op.cc index 925990ba3ba5f..58ed498ad1b9e 100644 --- a/paddle/fluid/operators/controlflow/depend_op.cc +++ b/paddle/fluid/operators/controlflow/depend_op.cc @@ -50,8 +50,8 @@ class DependOp : public framework::OperatorBase { auto out_name = Output("Out"); PADDLE_ENFORCE_EQ(x_name, out_name, - platform::errors::PreconditionNotMet( - "Input(X) and Output(Out) varibale should be the " + phi::errors::PreconditionNotMet( + "Input(X) and Output(Out) variable should be the " "same, but got Input is %s and Output is %s.", x_name, out_name)); diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc index 7d0d899e8b6c3..141b13a71164b 100644 --- a/paddle/fluid/operators/controlflow/feed_op.cc +++ b/paddle/fluid/operators/controlflow/feed_op.cc @@ -34,7 +34,7 @@ const framework::FeedType& CheckAndGetFeedItem(const phi::ExtendedTensor& x, int col) { PADDLE_ENFORCE_GE(col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Feed') of current feeding variable to be " "no less than 0. But received column index = %d.", @@ -43,7 +43,7 @@ const framework::FeedType& CheckAndGetFeedItem(const phi::ExtendedTensor& x, PADDLE_ENFORCE_LT( static_cast(col), feed_list->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The column index of current feeding variable is expected to be " "less than the length of feeding list. But received column index = " "%d, the length of feeding list = %d", @@ -60,7 +60,7 @@ void FeedDenseTensorKernel(const Context& dev_ctx, phi::DenseTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); const auto& in_tensor = paddle::get(feed_item); @@ -81,7 +81,7 @@ void FeedSparseCooTensorKernel(const Context& dev_ctx, phi::SparseCooTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); const auto& in_tensor = paddle::get(feed_item); @@ -103,7 +103,7 @@ void FeedStringsKernel(const Context& dev_ctx UNUSED, phi::ExtendedTensor* out) { PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( + phi::errors::NotFound( "Output cannot be found in scope for operator 'Feed'")); const auto& feed_item = CheckAndGetFeedItem(x, col); auto strs_out = static_cast(out); diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index d3b4b086470a0..c9ceb1f3e01b2 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -76,7 +76,7 @@ class FetchOp : public framework::OperatorBase { auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE_NOT_NULL( fetch_var, - platform::errors::NotFound( + phi::errors::NotFound( "Input variable(%s) cannot be found in scope for operator 'Fetch'." "Confirm that you have used the fetch `Variable` format " "instead of the string literal('%s') in `fetch_list` " @@ -91,15 +91,15 @@ class FetchOp : public framework::OperatorBase { auto *out_var = scope.FindVar(out_name); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound("Output variable(%s) cannot be found " - "in scope for operator 'Fetch'.", - out_name)); + phi::errors::NotFound("Output variable(%s) cannot be found " + "in scope for operator 'Fetch'.", + out_name)); int col = Attr("col"); PADDLE_ENFORCE_GE( col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Fetch') of current fetching variable to be " "no less than 0. But received column index = %d.", diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc index 8e811c20b28ff..591d3bed324d3 100644 --- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc @@ -128,14 +128,14 @@ class FetchV2Kernel { PADDLE_ENFORCE_EQ( ctx.HasOutput("Out"), true, - platform::errors::NotFound("Output(Out) of fetch_v2_op is not found.")); + phi::errors::NotFound("Output(Out) of fetch_v2_op is not found.")); auto *out_var = ctx.OutputVar("Out"); int col = ctx.Attr("col"); PADDLE_ENFORCE_GE( col, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expected the column index (the attribute 'col' of " "operator 'Fetch') of current fetching variable to be " "no less than 0. But received column index = %d.", @@ -163,8 +163,8 @@ class FetchV2Kernel { PADDLE_ENFORCE_EQ( check_place, true, - platform::errors::InvalidArgument("Tensor's place of input(X) must " - "be CPUPlace or CUDAPinnedPlace.")); + phi::errors::InvalidArgument("Tensor's place of input(X) must " + "be CPUPlace or CUDAPinnedPlace.")); if (deepcopy) { DeepCopy(src_item, fetch_var_name, dst_item); } else { @@ -186,7 +186,7 @@ class FetchV2Kernel { for (size_t i = 0; i < src_item.size(); ++i) { PADDLE_ENFORCE_EQ(platform::is_cpu_place(src_item[i].place()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Tensor's place of input(X) must be CPUPlace.")); if (deepcopy) { DeepCopy(src_item[i], fetch_var_name, &dst_item[i]); @@ -244,7 +244,7 @@ PD_REGISTER_STRUCT_KERNEL(fetch_v2, int64_t, uint8_t, bool, - plat::float16, + phi::dtype::float16, plat::bfloat16, plat::complex, plat::complex) {} diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc deleted file mode 100644 index 9262ca59af970..0000000000000 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -class Scope; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#endif - -namespace paddle { -namespace operators { - -static size_t CUDADevCount() { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return platform::GetGPUDeviceCount(); -#else - return 0UL; -#endif -} - -class GetPlacesOp : public framework::OperatorBase { - public: - GetPlacesOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - bool is_gpu = false; - if (Attr("device_type") == "AUTO") { - is_gpu = platform::is_gpu_place(place); - } else { - is_gpu = Attr("device_type") == "CUDA"; - } - auto device_count = static_cast(Attr("device_count")); - if (device_count == 0) { - device_count = - is_gpu ? CUDADevCount() : std::thread::hardware_concurrency(); - } - PADDLE_ENFORCE_NE( - device_count, - 0UL, - platform::errors::InvalidArgument("Cannot indicate %s device count", - is_gpu ? "GPU" : "CPU")); - - auto out_var_name = Output("Out"); - auto &places = - *(GET_DATA_SAFELY( - scope.FindVar(out_var_name), "Output", "Out", "GetPlaces") - .GetMutable()); - places.reserve(device_count); - if (is_gpu) { - PADDLE_ENFORCE_LE(device_count, - CUDADevCount(), - platform::errors::InvalidArgument( - "Only %d CUDA devices found, cannot set to %d", - CUDADevCount(), - device_count)); - for (size_t i = 0; i < device_count; ++i) { - places.emplace_back(platform::CUDAPlace(static_cast(i))); - } - } else { - for (size_t i = 0; i < device_count; ++i) { - places.emplace_back(platform::CPUPlace()); - } - } - } -}; - -class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Out", "vector of Place"); - AddAttr("device_count", "device count").SetDefault(0); - AddAttr("device_type", "device type") - .InEnum({"CUDA", "CPU", "AUTO"}) - .SetDefault("AUTO"); - AddComment(R"DOC( -Returns a list of places based on arguments. The list will be used for parallel -execution. -)DOC"); - } -}; - -class GetPlacesInferVarType : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - ctx->SetOutputType( - "Out", framework::proto::VarType::PLACE_LIST, framework::ALL_ELEMENTS); - } -}; - -class GetPlacesInferShape : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *context) const override { - // Do nothing - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - get_places, - ops::GetPlacesOp, - ops::GetPlacesOpProtoMaker, - ops::GetPlacesInferVarType, - ops::GetPlacesInferShape, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h index 614db61558f79..8fde735d99936 100644 --- a/paddle/fluid/operators/controlflow/logical_op_xpu.h +++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h @@ -82,7 +82,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { bcast_ydims); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU broadcast kernel return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); @@ -118,7 +118,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { bcast_ydims); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU broadcast kernel return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); @@ -144,11 +144,11 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ret, XPU_SUCCESS, - platform::errors::External("XPU API return wrong value[%d %s] in " - "op_name[%s].", - ret, - XPUAPIErrorMsg[ret], - XpuLogicalType2Str(xpu_type))); + phi::errors::External("XPU API return wrong value[%d %s] in " + "op_name[%s].", + ret, + XPUAPIErrorMsg[ret], + XpuLogicalType2Str(xpu_type))); if (need_broad_cast && dev_ctx.x_context()->xpu_stream != nullptr) { dev_ctx.Wait(); @@ -178,7 +178,7 @@ class UnaryLogicalOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ret, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s].", ret, XPUAPIErrorMsg[ret])); } }; diff --git a/paddle/fluid/operators/controlflow/op_variant.cc b/paddle/fluid/operators/controlflow/op_variant.cc index 8d43a21e66437..0976ff36e63b2 100644 --- a/paddle/fluid/operators/controlflow/op_variant.cc +++ b/paddle/fluid/operators/controlflow/op_variant.cc @@ -70,11 +70,10 @@ void AppendOpVariantByOpName(const std::vector &op_descs, std::vector *result_ops) { PADDLE_ENFORCE_NOT_NULL( result_ops, - platform::errors::Unavailable("result_ops should not be a null_ptr.")); + phi::errors::Unavailable("result_ops should not be a null_ptr.")); for (auto *op_desc : op_descs) { PADDLE_ENFORCE_NOT_NULL( - op_desc, - platform::errors::Unavailable("op_desc should not be a null_ptr.")); + op_desc, phi::errors::Unavailable("op_desc should not be a null_ptr.")); if (op_desc->Type() == candidate_op_name) { result_ops->emplace_back(op_desc); } @@ -87,11 +86,10 @@ void AppendOpVariantByOpName( std::unordered_set *result_ops) { PADDLE_ENFORCE_NOT_NULL( result_ops, - platform::errors::Unavailable("result_ops should not be a null_ptr.")); + phi::errors::Unavailable("result_ops should not be a null_ptr.")); for (auto *op_desc : op_descs) { PADDLE_ENFORCE_NOT_NULL( - op_desc, - platform::errors::Unavailable("op_desc should not be a null_ptr.")); + op_desc, phi::errors::Unavailable("op_desc should not be a null_ptr.")); if (op_desc->Type() == candidate_op_name) { result_ops->emplace(op_desc); } diff --git a/paddle/fluid/operators/controlflow/op_variant.h b/paddle/fluid/operators/controlflow/op_variant.h index ad7cc6b741eb9..ed13a0285c375 100644 --- a/paddle/fluid/operators/controlflow/op_variant.h +++ b/paddle/fluid/operators/controlflow/op_variant.h @@ -49,10 +49,9 @@ class OpVariant { const AttrType &Attr(const std::string &name) const { auto &attrs = Attrs(); auto it = attrs.find(name); - PADDLE_ENFORCE_NE( - it, - attrs.end(), - platform::errors::NotFound("Cannot find attribute %s.", name)); + PADDLE_ENFORCE_NE(it, + attrs.end(), + phi::errors::NotFound("Cannot find attribute %s.", name)); return PADDLE_GET_CONST(AttrType, it->second); } diff --git a/paddle/fluid/operators/controlflow/pylayer_op.cc b/paddle/fluid/operators/controlflow/pylayer_op.cc index bd83c99a0c62d..57bce4224770a 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op.cc @@ -95,7 +95,7 @@ class PyLayerForwardOp : public PyLayerOp { auto *scope_var = scope.FindVar(Output(kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in pylayer_op, but " "got a null Scope variable. Please set the Scope variable.")); @@ -109,7 +109,7 @@ class PyLayerForwardOp : public PyLayerOp { PADDLE_ENFORCE_GT( blocks.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect blocks contains at least 1 block, but got: %d", blocks.size())); @@ -123,7 +123,7 @@ class PyLayerForwardOp : public PyLayerOp { LOG_FIRST_N(INFO, 1) << "[ControlFlow][PyLayer] New Executor is Running."; CreateInterpreter(dev_place, *forward_block, &cur_scope, skip_vars); - PADDLE_ENFORCE_NOT_NULL(core_, platform::errors::Fatal("core_ is nullptr")); + PADDLE_ENFORCE_NOT_NULL(core_, phi::errors::Fatal("core_ is nullptr")); core_->Run({}, false); } }; @@ -156,7 +156,7 @@ class PyLayerBackwardMaker : public framework::SingleGradOpMaker { PADDLE_ENFORCE_GT( blocks.size(), static_cast(PyLayerBlockIndex::kBACKWARD), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect blocks contains at least 2 block, but got: %d", blocks.size())); grad_op->SetBlockAttr( @@ -188,7 +188,7 @@ class PyLayerBackwardOp : public PyLayerOp { PADDLE_ENFORCE_EQ( inside_grads.size(), outside_grads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mismatch inside_grads.size(): %d, and outside_grads.size(): %d", inside_grads.size(), outside_grads.size())); @@ -196,14 +196,14 @@ class PyLayerBackwardOp : public PyLayerOp { auto *scope_var = scope.FindVar(Input(PyLayerOp::kScope)); PADDLE_ENFORCE_NOT_NULL( scope_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Expect Scope variable to be set in pylayer_op, but " "got a null Scope variable. Please set the Scope variable.")); auto &scopes = scope_var->Get>(); PADDLE_ENFORCE_GT( scopes.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Expect Scope variable contains at least 1 scope, but got: %d", scopes.size())); framework::Scope &cur_scope = *(scopes[0]); @@ -216,7 +216,7 @@ class PyLayerBackwardOp : public PyLayerOp { << "[ControlFlow][PyLayerBackwardOp] New Executor is Running."; CreateInterpreter(dev_place, *backward_block, &cur_scope, inside_grads); - PADDLE_ENFORCE_NOT_NULL(core_, platform::errors::Fatal("core_ is nullptr")); + PADDLE_ENFORCE_NOT_NULL(core_, phi::errors::Fatal("core_ is nullptr")); core_->Run({}, false); @@ -252,7 +252,7 @@ class PyLayerBackwardInferVarType : public framework::VarTypeInference { ctx->OutputSize(framework::GradVarName(PyLayerOp::kInputs)); PADDLE_ENFORCE_EQ(forward_input_size, backward_output_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input_size and output_size should be equal for " "pylayer_grad op.")); for (size_t i = 0; i < backward_output_size; ++i) { diff --git a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc index 9dc53d428ef1d..bdd669c644e6e 100644 --- a/paddle/fluid/operators/controlflow/pylayer_op_helper.cc +++ b/paddle/fluid/operators/controlflow/pylayer_op_helper.cc @@ -38,7 +38,7 @@ static void FindAllPyLayerOpAndPyLayerGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of forward ops must be greater or equal to backward ops. The " "number of forward ops is %d and the number of backward ops is %d", fwd_ops->size(), @@ -59,7 +59,7 @@ static void FindAllPyLayerOpAndPyLayerGradOp( PADDLE_ENFORCE_GE( fwd_ops->size(), bwd_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more pylayer_grad ops than " "pylayer ops in the graph or program. The number of " "forward ops is %d and the number of backward ops is %d", @@ -119,14 +119,14 @@ static void PrepareSafeEagerDeletionOnPyLayerOpAndPyLayerGradOp( if (IsMatchedPyLayerOpAndPyLayerGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple matched pylayer ops.")); matched_fwd_op = &fwd_op; } } PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward pylayer op.")); SetSkipVarsForPyLayerOp(const_cast(matched_fwd_op), &bwd_op); diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc index 2851757dccc4d..e290fa3e016bd 100644 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc +++ b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc @@ -74,7 +74,7 @@ static void FindAllOpAndGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( ops.size(), grad_ops.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more grad ops than forward ops in the graph or program, " "the number of ops is %d and the number of grad_ops is %d.", ops.size(), @@ -95,7 +95,7 @@ static void FindAllOpAndGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( ops.size(), grad_ops.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more grad ops than forward ops in the graph or program, " "the number of ops is %d and the number of grad_ops is %d.", ops.size(), @@ -183,7 +183,7 @@ static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr( PADDLE_ENFORCE_EQ( fwd_input.size(), in_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward input gradient number does not match forward " "input number. The number of forward input number is %d and the " "number of backward input gradient number is %d.", @@ -203,7 +203,7 @@ static void SetRecurrentOpAndRecurrentGradOpSkipVarAttr( PADDLE_ENFORCE_EQ( fwd_param.size(), param_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward parameter gradient number does not match " "forward parameter number. The number of forward parameter number is " "%d and the number of backward parameter gradient is %d.", @@ -269,15 +269,15 @@ void PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( if (IsMatchedRecurrentOpAndRecurrentGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple recurrent forward op matches " "recurrent grad op.")); matched_fwd_op = &fwd_op; } } - PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( - "Cannot find matched forward op.")); + PADDLE_ENFORCE_NOT_NULL( + matched_fwd_op, + phi::errors::PreconditionNotMet("Cannot find matched forward op.")); SetRecurrentOpAndRecurrentGradOpSkipVarAttr(*matched_fwd_op, bwd_op); recurrent_ops.erase(*matched_fwd_op); } diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index c04e897aa6366..52006166c8fc8 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -94,7 +94,7 @@ class WriteToArrayInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("I"), true, - platform::errors::NotFound("Input(I) of WriteToArrayOp is not found.")); + phi::errors::NotFound("Input(I) of WriteToArrayOp is not found.")); // TODO(wangchaochaohu) control flow Op do not support runtime infer shape // Later we add [ontext->GetInputDim("I")) == 1] check when it's supported @@ -103,10 +103,10 @@ class WriteToArrayInferShape : public framework::InferShapeBase { return; } - PADDLE_ENFORCE_EQ(context->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of WriteToArrayOp is not found.")); + PADDLE_ENFORCE_EQ( + context->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of WriteToArrayOp is not found.")); context->SetOutputDim("Out", context->GetInputDim("X")); // When compile time, we need to: @@ -148,15 +148,13 @@ class ReadFromArrayOp : public ArrayOp { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL(x, - platform::errors::NotFound( - "Input(X) of ReadFromArrayOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + x, phi::errors::NotFound("Input(X) of ReadFromArrayOp is not found.")); auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE_NOT_NULL( out, - platform::errors::NotFound( - "Output(Out) of ReadFromArrayOp is not found.")); + phi::errors::NotFound("Output(Out) of ReadFromArrayOp is not found.")); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { auto *out_tensor = out->GetMutable(); diff --git a/paddle/fluid/operators/controlflow/unity_build_rule.cmake b/paddle/fluid/operators/controlflow/unity_build_rule.cmake index 594ae3a36cf1d..4b88de66fd2f9 100644 --- a/paddle/fluid/operators/controlflow/unity_build_rule.cmake +++ b/paddle/fluid/operators/controlflow/unity_build_rule.cmake @@ -6,15 +6,11 @@ # in combination rule, you can remove the source file from the following rules. register_unity_group( cc - compare_all_op.cc - compare_op.cc conditional_block_infer_op.cc feed_op.cc fetch_op.cc fetch_v2_op.cc get_places_op.cc - logical_op.cc - bitwise_op.cc tensor_array_read_write_op.cc while_op.cc) register_unity_group(cu logical_op.cu bitwise_op.cu compare_op.cu diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 5c758bbf7ff42..65f9145dbd89d 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/operators/controlflow/while_op_helper.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #include "paddle/fluid/platform/flags.h" @@ -64,15 +64,15 @@ class WhileOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &dev_place) const override { - PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition)), - platform::errors::NotFound( - "Input(Condition) of WhileOp is not found.")); + PADDLE_ENFORCE_NOT_NULL( + scope.FindVar(Input(kCondition)), + phi::errors::NotFound("Input(Condition) of WhileOp is not found.")); auto &cond = scope.FindVar(Input(kCondition))->Get(); PADDLE_ENFORCE_EQ( cond.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Condition) of WhileOp must be 1. But now " "the Condition's numel is ", cond.numel(), @@ -136,7 +136,7 @@ class WhileOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(step_scopes->size(), 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The Output(StepScope) of WhileOp should be empty.")); bool cond_data = GetCondData(cond); @@ -329,7 +329,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( Attr("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "WhileGradOp is only callable when is_test is false.")); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); @@ -350,7 +350,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(outside_og_names.size(), inside_og_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of original output gradient names " "does not match the number of backward input " "gradient names. The number of Backward input " @@ -397,7 +397,7 @@ class WhileGradOp : public framework::OperatorBase { !og_outside.GetMutable()->IsInitialized()) { auto *var_desc = parent_block->FindVarRecursive(outside_og_name); PADDLE_ENFORCE_NOT_NULL(var_desc, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Var `%s` is not found in parent " "block, can't fill constant.", outside_og_name)); @@ -448,7 +448,7 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( inside_array[j].numel(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of %d-th element of var %s (LoDTensorArray) " "in while block must be 0, but received its numel is %d.", j, @@ -457,7 +457,7 @@ class WhileGradOp : public framework::OperatorBase { } } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Currently only support phi::DenseTensor and " "phi::DenseTensorArray in " "WhileGradOp.")); @@ -474,7 +474,7 @@ class WhileGradOp : public framework::OperatorBase { auto &p_names = Inputs(kX); PADDLE_ENFORCE_EQ(pg_ig_names.size(), p_names.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of names in Outputs(X@GRAD) does not " "match the number of names in Inputs(X). The " "number of names in Outputs(X@GRAD) is %d and " @@ -493,8 +493,8 @@ class WhileGradOp : public framework::OperatorBase { auto pg_ig_var = cur_scope.FindVar(inside_grad_name); PADDLE_ENFORCE_NOT_NULL( pg_ig_var, - platform::errors::NotFound("Variable %s is not found.", - inside_grad_name)); + phi::errors::NotFound("Variable %s is not found.", + inside_grad_name)); if (pg_ig_var->IsType()) { auto pg_ig_lod_t_arr = pg_ig_var->GetMutable(); @@ -531,13 +531,13 @@ class WhileGradOp : public framework::OperatorBase { auto *var = (*cur_scope_iter)->FindVar(inside_grad_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("Variable %s is not found.", - inside_grad_name)); + phi::errors::NotFound("Variable %s is not found.", + inside_grad_name)); PADDLE_ENFORCE_EQ( var->IsType() || var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Currently the type of var only can be LoDTensorArray, " "or phi::DenseTensor, but the received var[%s] is %s.", inside_grad_name, @@ -721,7 +721,7 @@ class WhileGradOpShapeInference : public framework::InferShapeBase { auto out_var_ptrs = ctx->GetOutputVarPtrs(kXGRAD); PADDLE_ENFORCE_EQ(in_var_ptrs.size(), out_var_ptrs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Inputs(X) must be the same as " "the size of Outputs(X@GRAD).")); diff --git a/paddle/fluid/operators/controlflow/while_op_helper.cc b/paddle/fluid/operators/controlflow/while_op_helper.cc index 80b4abe763123..638f2fbae740a 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.cc +++ b/paddle/fluid/operators/controlflow/while_op_helper.cc @@ -86,7 +86,7 @@ static void ModifyWhileOpAndWhileGradOpAttr(const OpVariant &fwd_op, PADDLE_ENFORCE_EQ( fwd_input.size(), in_grads.size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Backward output gradient number does not match forward input number." "The number of forward input number is %d and the number of backward " "output gradient number is %d.", @@ -116,7 +116,7 @@ static void FindAllWhileAndWhileGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( while_ops->size(), while_grad_ops->size(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "There are more while_grad_ops than forward while_ops in the graph " "or program, the number of while_ops is %d and the number of " "while_grad_ops is %d.", @@ -137,7 +137,7 @@ static void FindAllWhileAndWhileGradOp(const framework::ProgramDesc &program, PADDLE_ENFORCE_GE( while_ops->size(), while_grad_ops->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "There are more while_grad_ops than forward while_ops in the graph " "or program, the number of while_ops is %d and the number of " "while_grad_ops is %d.", @@ -167,14 +167,14 @@ static void PrepareSafeEagerDeletionOnWhileOpAndWhileGradOpImpl( if (IsMatchedWhileOpAndWhileGradOp(fwd_op, bwd_op)) { PADDLE_ENFORCE_EQ(matched_fwd_op, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Found multiple while forward ops match while " "grad ops.")); matched_fwd_op = &fwd_op; } } PADDLE_ENFORCE_NOT_NULL(matched_fwd_op, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot find matched forward while op.")); ModifyWhileOpAndWhileGradOpAttr(*matched_fwd_op, bwd_op); while_op_set.erase(*matched_fwd_op); @@ -231,7 +231,7 @@ bool GetCondData(const phi::DenseTensor &cond) { defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) framework::TensorCopySync(cond, platform::CPUPlace(), cpu_cond.get()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "This version of PaddlePaddle does NOT support GPU/XPU but got " "GPU/XPU tensor Cond in WhileOp. Please compile WITH_GPU or " "WITH_XPU option.")); diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc deleted file mode 100644 index ed433518068b4..0000000000000 --- a/paddle/fluid/operators/copy_cross_scope_op.cc +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type_traits.h" - -namespace paddle { -namespace framework { -class OpDesc; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -class CopyCrossScopeOp : public framework::OperatorBase { - public: - CopyCrossScopeOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext* ctx) const {} - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - int num_micro_scopes = static_cast(scope.kids().size()); - int num_micro_batches = Attr("num_micro_batches"); - bool ToM = Attr("to_main_scope"); - PADDLE_ENFORCE_EQ(num_micro_scopes, - num_micro_batches, - platform::errors::InvalidArgument( - "For pipeline, number of micro scopes (%d) should " - "be equal to number of micro batches (%d).", - num_micro_scopes, - num_micro_batches)); - const std::string& id_name = Input("Id"); - auto* id_var = scope.FindVar(id_name); - PADDLE_ENFORCE_NOT_NULL( - id_var, - platform::errors::NotFound("No variable with name %s found.", id_name)); - auto id_tensor = id_var->GetMutable(); - auto it = scope.kids().begin(); - phi::DenseTensor cpu_id_tensor; - paddle::framework::TensorCopySync( - *id_tensor, platform::CPUPlace(), &cpu_id_tensor); - auto id_value = cpu_id_tensor.data(); - for (auto i = 0; i < *id_value; i++) { - it++; - } - if (it == scope.kids().end()) { - if (ToM) { - auto dst_scope = *it; - const std::string& x_name = Input("X"); - auto* dst_var = dst_scope->FindVar(x_name); - PADDLE_ENFORCE_NOT_NULL( - dst_var, - platform::errors::NotFound( - "No variable with name %s found in source scope.", x_name)); - auto* main_var = scope.FindVar(x_name); - PADDLE_ENFORCE_NOT_NULL( - main_var, - platform::errors::NotFound( - "No variable with name %s found in destination scope.", - x_name)); - auto dst_tensor = dst_var->GetMutable(); - auto main_tensor = main_var->GetMutable(); - paddle::framework::TensorCopySync( - *dst_tensor, main_tensor->place(), main_tensor); - } - return; - } - auto source_scope = *it; - it++; - auto dst_scope = *it; - const std::string& x_name = Input("X"); - auto* source_var = source_scope->FindVar(x_name); - PADDLE_ENFORCE_NOT_NULL( - source_var, - platform::errors::NotFound( - "No variable with name %s found in source scope.", x_name)); - auto* dst_var = dst_scope->FindVar(x_name); - PADDLE_ENFORCE_NOT_NULL( - dst_var, - platform::errors::NotFound( - "No variable with name %s found in destination scope.", x_name)); - auto src_tensor = source_var->GetMutable(); - auto dst_tensor = dst_var->GetMutable(); - paddle::framework::TensorCopySync( - *src_tensor, dst_tensor->place(), dst_tensor); - - if (ToM) { - auto* main_var = scope.FindVar(x_name); - PADDLE_ENFORCE_NOT_NULL( - main_var, - platform::errors::NotFound( - "No variable with name %s found in destination scope.", x_name)); - auto main_tensor = main_var->GetMutable(); - paddle::framework::TensorCopySync( - *dst_tensor, main_tensor->place(), main_tensor); - } - } -}; - -class CopyCrossScopeOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor), The first input tensor of copy_cross_scope op, which " - "is copying micro scope."); - AddInput("Id", - "(Tensor), The second input tensor of copy_cross_scope op, which " - "is a id of the current micro scope."); - AddAttr("to_main_scope", "Return current scope to main scope.") - .SetDefault(false); - AddAttr("num_micro_batches", "Number of micro batches for pipeline."); - AddComment(R"DOC( - This op is used by pipeline to copy tensors across micro batch scopes. - Copy the variable value of the giving Id's micro scope to the micro scope of Id + 1 position. - If need to copy back to the main scope, using to_main_scope option to copy the variable value of - the current micro scope to the main scope. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(copy_cross_scope, - ops::CopyCrossScopeOp, - ops::CopyCrossScopeOpMaker); diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index 427f9a0307399..1243ae595bac4 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -84,19 +84,19 @@ class CorrelationOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("Input1"); auto in2_dims = ctx->GetInputDim("Input2"); - PADDLE_ENFORCE_EQ(in_dims.size() == 4, - true, - platform::errors::InvalidArgument( - "Input(X) of CorrelationOp must be 4 dims." - "But received dims is %d.", - in_dims.size())); - - PADDLE_ENFORCE_EQ(in2_dims.size() == 4, - true, - platform::errors::InvalidArgument( - "Input(Y) of CorrelationOp must be 4 dims." - "But received dims is %d.", - in2_dims.size())); + PADDLE_ENFORCE_EQ( + in_dims.size() == 4, + true, + phi::errors::InvalidArgument("Input(X) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in_dims.size())); + + PADDLE_ENFORCE_EQ( + in2_dims.size() == 4, + true, + phi::errors::InvalidArgument("Input(Y) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in2_dims.size())); std::vector output_shape = CorrelationOutputSize(static_cast(in_dims[0]), static_cast(in_dims[2]), @@ -114,11 +114,11 @@ class CorrelationOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ(input_data_type, - framework::TransToProtoVarType( - ctx.Input("Input2")->dtype()), - platform::errors::InvalidArgument( - "X and Y shoule have the same datatype")); + PADDLE_ENFORCE_EQ( + input_data_type, + framework::TransToProtoVarType( + ctx.Input("Input2")->dtype()), + phi::errors::InvalidArgument("X and Y shoule have the same datatype")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -173,7 +173,7 @@ class CorrelationKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::Unimplemented("Correlation only supports GPU now.")); + phi::errors::Unimplemented("Correlation only supports GPU now.")); } }; diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu index ee6cc22c867c3..61b922e0caecc 100644 --- a/paddle/fluid/operators/correlation_op.cu +++ b/paddle/fluid/operators/correlation_op.cu @@ -179,10 +179,10 @@ template class CorrelationCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "Correlation only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("Correlation only supports GPU now.")); auto *input1 = ctx.Input("Input1"); auto *input2 = ctx.Input("Input2"); @@ -447,10 +447,10 @@ template class CorrelationCUDAGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "Correlation only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("Correlation only supports GPU now.")); const auto *input1 = ctx.Input("Input1"); const auto *input2 = ctx.Input("Input2"); const auto *grad_output = diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index e0cbcc513d6cd..62edb0ece83fc 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -103,7 +103,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { if (has_length) { PADDLE_ENFORCE_EQ(emission_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 3-D tensor. But " "received: input rank %u, input shape [%s]. ", emission_dims.size(), @@ -111,7 +111,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(emission_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a 2-D tensor. But " "received: input rank %u, input shape [%s].", emission_dims.size(), @@ -121,7 +121,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { auto transition_dims = ctx->GetInputDim("Transition"); PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Transition) should be a 2-D tensor. But " "received: input rank %u, input shape [%s].", transition_dims.size(), @@ -129,7 +129,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "An invalid dimension for the Input(Transition), which should " "be a 2-D tensor with shape [(D + 2) x D]. But received: input " "rank %u, " @@ -140,7 +140,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { transition_dims[transition_dims.size() - 1] > 0)) { PADDLE_ENFORCE_EQ(emission_dims[emission_dims.size() - 1], transition_dims[transition_dims.size() - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the Input(Emission) and the " "Input(Transition) " "should be equal to the tag number. But received " @@ -159,7 +159,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { (label_dims.size() == 3UL && label_dims[2] == 1) || label_dims.size() == 2UL, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 3-D tensor with last dimension " "fixed to 1 or a 2-D tensor in padding mode. But received: " "input " @@ -171,7 +171,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { (label_dims.size() == 2UL && label_dims[1] == 1) || label_dims.size() == 1UL, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Label) should be a 2-D tensor with last " "dimension fixed to 1 or a 1-D tensor. But received: " "input rank %u, input shape [%s].", @@ -182,7 +182,7 @@ class CRFDecodingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( emission_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Emission) and Input(Label) " "should be the same. But received Input(Emission): rank %u, " "shape [%s]; received Input(Label): rank %u, shape [%s].", diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 50d6eece098e3..6649043014d64 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -78,7 +78,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Emission) should be a sequence with lod " "level 1. But received: lod level %u.", emission_weights->NumLevels())); @@ -86,7 +86,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( lod.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Emission) must be a sequence. But received: lod level %u.", lod.size())); const size_t level = 0; @@ -105,7 +105,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { if (label) { PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(label) should be a sequence with lod " "level 1. But received: lod level %u.", label->NumLevels())); diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index 19164959c7ceb..80db8230e9e24 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -34,7 +34,7 @@ class CropOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( int64_t(shape.size()), x_dim.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of CropOp's " "'shape' attribute should be equal to the number of dimensions " "(%d) of the Input(X).", @@ -49,7 +49,7 @@ class CropOp : public framework::OperatorWithKernel { auto y_dim = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(common::arity(x_dim), common::arity(y_dim), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions (%d) of CropOp's input(X)" " must be equal to that (%d) of input(Y).", common::arity(x_dim), diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index 7d0d4f06392fa..04b077de36e50 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { @@ -36,25 +36,25 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { if (ctx.HasInput("Offsets")) { PADDLE_ENFORCE_EQ(ctx.Attr>("offsets").empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Offsets' and attribute 'offsets' " "should not be used at the same time for CropOp.")); const auto* offsets_tensor = ctx.Input("Offsets"); PADDLE_ENFORCE_EQ(offsets_tensor->dims().size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of input 'Offsets' for " "CropOp must be 1, but the value received is %d.", offsets_tensor->dims().size())); PADDLE_ENFORCE_EQ( rank, offsets_tensor->dims()[0], - platform::errors::InvalidArgument("The number of elements (%d) for " - "input 'Offsets' must be equal to " - "the number of dimensions (%d) " - "of the input tensor.", - offsets_tensor->dims()[0], - rank)); + phi::errors::InvalidArgument("The number of elements (%d) for " + "input 'Offsets' must be equal to " + "the number of dimensions (%d) " + "of the input tensor.", + offsets_tensor->dims()[0], + rank)); const int* offsets_data; phi::DenseTensor cpu_tmp_tensor; if (platform::is_cpu_place(offsets_tensor->place())) { @@ -70,12 +70,12 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { PADDLE_ENFORCE_EQ( rank, static_cast(res.size()), - platform::errors::InvalidArgument("The number of elements (%d) for " - "input 'Offsets' must be equal to " - "the number of dimensions (%d) " - "of the input tensor.", - res.size(), - rank)); + phi::errors::InvalidArgument("The number of elements (%d) for " + "input 'Offsets' must be equal to " + "the number of dimensions (%d) " + "of the input tensor.", + res.size(), + rank)); } return res; } @@ -101,7 +101,7 @@ void CropFunction(const framework::ExecutionContext& context) { } auto& place = *context.template device_context().eigen_device(); - EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( place, out_tensor, x_tensor, e_offsets, e_shape); } @@ -113,14 +113,14 @@ class CropKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the Input(X) for CropOp must be " "greater than or equal to 1, but the value received is %d.", rank)); PADDLE_ENFORCE_LE( rank, 6, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the Input(X) for CropOp must be " "less than or equal to 6, but the value received is %d.", rank)); @@ -165,7 +165,7 @@ void CropGradFunction(const framework::ExecutionContext& context) { auto d_out_tensor = EigenTensor::From(*d_out); auto& place = *context.template device_context().eigen_device(); - EigenPad, T, D>::Eval( + phi::funcs::EigenPad, T, D>::Eval( place, d_x_tensor, d_out_tensor, paddings, static_cast(0)); } } @@ -181,7 +181,7 @@ class CropGradKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' for " "CropGrad must be greater than or equal " "to 1, but the value received is %d.", @@ -189,7 +189,7 @@ class CropGradKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( rank, 6, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' for " "CropGrad must be less than or equal " "to 6, but the value received is %d.", diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index cc2b4b4252835..e8baeac3b0bfa 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -42,7 +42,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::slice_ddim(x_dims, 0, rank - 1), common::slice_ddim(label_dims, 0, rank - 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) and Input(Label) shall have the same shape " "except the last dimension. But received: the shape of Input(X) " "is " @@ -55,7 +55,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rank, label_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Attr(soft_label) == true, Input(X) and Input(Label) " "shall have the same dimensions. But received: the dimensions of " "Input(X) is [%d]," @@ -72,7 +72,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[rank - 1], label_dims[rank - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Attr(soft_label) == true, the last dimension of " "Input(X) and Input(Label) should be equal. But received: the" "last dimension of Input(X) is [%d], the shape of Input(X) is " @@ -91,7 +91,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dims[rank - 1], 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the last dimension of Input(Label) should be 1." "But received: the last dimension of Input(Label) is [%d]," "the last dimension is [%d]", @@ -101,7 +101,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rank, label_dims.size() + 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The rank of Input(X) should be equal to " "Input(Label) plus 1." "But received: The dimension of Input(X) is [%d], " @@ -160,7 +160,7 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dy_dims.size(), label_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y@Grad) and Input(Y) should have the same rank." "But received: Y@Grad's rank is [%d], Y's rank is [%d]", dy_dims.size(), @@ -175,7 +175,7 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::slice_ddim(x_dims, 0, rank - 1), common::slice_ddim(dy_dims, 0, rank - 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) and Input(Y@Grad) should have the same " "shape except the last dimension. but received: " "the shape of Input(X) is [%s], " diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu index 06ac7791e6d68..e4e2420d152bc 100644 --- a/paddle/fluid/operators/cross_entropy_op.cu +++ b/paddle/fluid/operators/cross_entropy_op.cu @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; namespace ops = paddle::operators; @@ -24,14 +24,14 @@ PD_REGISTER_STRUCT_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy_grad, GPU, ALL_LAYOUT, ops::CrossEntropyGradientOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy2, GPU, @@ -39,11 +39,11 @@ PD_REGISTER_STRUCT_KERNEL(cross_entropy2, ops::CrossEntropyOpKernel2, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(cross_entropy_grad2, GPU, ALL_LAYOUT, ops::CrossEntropyGradientOpKernel2, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 5b76cc9a65a2b..9c0d025cb0cbb 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -180,7 +180,7 @@ struct HardLabelCrossEntropyForwardFunctor { auto label = label_[idx]; if (label != ignore_index_) { // don't update to PADDLE_ENFORCE_GE and PADDLE_ENFORCE_LT cause - // can't use platform::errors::InvalidArgument in HOSTDEVICE + // can't use phi::errors::InvalidArgument in HOSTDEVICE PADDLE_ENFORCE(label >= 0 && label < feature_size_, "Variable value (label) of " "OP(fluid.layers.cross_entropy) expected >= 0 " diff --git a/paddle/fluid/operators/ctc_align_op.cc b/paddle/fluid/operators/ctc_align_op.cc deleted file mode 100644 index a40ba84610293..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/ctc_align_op.h" - -namespace paddle { -namespace operators { - -class CTCAlignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "ctc_align"); - OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "ctc_align"); - - auto input_dims = ctx->GetInputDim("Input"); - - // TODO(wanghaoshuang): it is tricky to set the wrong dimension here. - ctx->SetOutputDim("Output", input_dims); - if (ctx->HasInput("InputLength")) { - ctx->SetOutputDim("OutputLength", {input_dims[0], 1}); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.device_context().GetPlace()); - } -}; - -class CTCAlignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Input", - "2-D Tensor or LodTensor with shape " - "[Lp, 1], where Lp is the sum of all input sequences' length."); - AddInput("InputLength", - "2-D Tensor with shape [batch_size, 1], " - " When Input is padding mode, InputLength is length of every " - "sequence in Input.") - .AsDispensable(); - AddOutput("Output", "(Tensor, default: Tensor), The align result."); - AddOutput("OutputLength", - "2-D Tensor with shape [batch_size, 1], " - "When Input is padding mode, OutputLength is length of every " - "sequence in Output.") - .AsDispensable(); - AddAttr("blank", - "(int, default: 0), the blank label set in Connectionist " - "Temporal Classification (CTC) op.") - .SetDefault(0); - AddAttr("merge_repeated", - "(bool, default: true), whether to " - "merge repeated elements between two blanks. ") - .SetDefault(true); - // add attr padding number for tensor input - AddAttr("padding_value", - "(int, default: 0), padding number " - "use to padding tensor. ") - .SetDefault(0); - AddComment(R"DOC( -CTCAlign op is used to merge repeated elements between two blanks -and then delete all blanks in sequence. - -Given: - Input.data = [0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, - 6, 0, 0, 7, 7, 7, 0] - Input.dims = {18, 1} - Input.LoD = [[0, 11, 18]] - -And: - blank = 0 - merge_repeated = True - -Then: - Output.data = [1, 2, 4, 4, 5, 6, - 6, 7] - Output.dims = {8, 1} - Output.LoD = [[0, 6, 8]] -or Given: - Input.data = [[0, 1, 2, 2, 0, 4], - [0, 4, 5, 0, 6, 0], - [0, 7, 7, 7, 0, 0]] - InputLength.data = [[6], - [5], - [4]], - Input.dims = {3, 6}, - Input.Lod = [] -And: - blank = 0 - merge_repeated = True - padding_value = 0 - -Then: - Output.data = [[1, 2, 4, 0, 0, 0], - [4, 5, 6, 0, 0, 0], - [7, 0, 0, 0, 0, 0]], - OutputLength.data = [[3], - [3], - [1]], - Output.dims = {3, 6}, - Output.Lod = [] -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - ctc_align, - ops::CTCAlignOp, - ops::CTCAlignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - ctc_align, CPU, ALL_LAYOUT, ops::CTCAlignKernel, int, int64_t) {} diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu deleted file mode 100644 index 3b7490b1dcff3..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.cu +++ /dev/null @@ -1,171 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include - -#include "paddle/fluid/operators/ctc_align_op.h" - -namespace paddle { -namespace operators { - -template -__global__ void MergeAndDelCudaKernel(const int64_t num_token, - const T* tokens, - const size_t num_seq, - size_t* lod0, - const int blank, - const int merge_repeated, - size_t* out_lod0, - T* output) { - int output_idx = 0; - out_lod0[0] = 0; - - for (int i = 0; i < num_seq; ++i) { - T pre_token = -1; - for (int j = lod0[i]; j < lod0[i + 1]; ++j) { - if (tokens[j] != blank && !(merge_repeated && tokens[j] == pre_token)) { - output[output_idx] = tokens[j]; - ++output_idx; - } - pre_token = tokens[j]; - } - out_lod0[i + 1] = output_idx; - } -} - -template -__global__ void PaddingMergeAndDelCudaKernel(const int64_t num_token, - const T* tokens, - const T* tokens_length, - const int blank, - const int merge_repeated, - const int padding_value, - const int64_t batch_size, - T* output, - T* output_length) { - int ind = blockIdx.x * blockDim.x + threadIdx.x; - if (ind >= batch_size) return; - int output_idx = ind * num_token; - T prev_token = -1; - for (int i = ind * num_token; i < ind * num_token + tokens_length[ind]; i++) { - if ((unsigned)tokens[i] != blank && - !(merge_repeated && tokens[i] == prev_token)) { - output[output_idx] = tokens[i]; - ++output_idx; - } - prev_token = tokens[i]; - } - output_length[ind] = output_idx - ind * num_token; - for (int i = output_idx; i < ind * num_token + num_token; i++) { - output[i] = padding_value; - } -} - -template -class CTCAlignOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "CTCAlign operator CUDA kernel must use CUDAPlace " - "rather than CPUPlace.")); - auto* input = ctx.Input("Input"); - auto* output = ctx.Output("Output"); - const int blank = ctx.Attr("blank"); - const int merge_repeated = - static_cast(ctx.Attr("merge_repeated")); - const T* tokens = input->data(); - auto stream = ctx.cuda_device_context().stream(); - - // tensor input which has no lod - if (input->lod().empty()) { - const int padding_value = ctx.Attr("padding_value"); - auto input_dims = input->dims(); - T* output_data = output->mutable_data({input_dims[0], input_dims[1]}, - ctx.GetPlace()); - auto* input_length = ctx.Input("InputLength"); - const T* input_length_data = input_length->data(); - auto* output_length = ctx.Output("OutputLength"); - T* output_length_data = - output_length->mutable_data({input_dims[0], 1}, ctx.GetPlace()); - PaddingMergeAndDelCudaKernel - <<<32, (input_dims[0] + 32 - 1) / 32, 0, stream>>>( - input_dims[1], - tokens, - input_length_data, - blank, - merge_repeated, - padding_value, - input_dims[0], - output_data, - output_length_data); - } else { - const size_t level = 0; - auto input_lod = framework::ToAbsOffset(input->lod()); - - const int64_t num_tokens = input->dims()[0]; - const size_t num_seq = input_lod[level].size() - 1; - - // prepare a lod to record lod information while merging elements - thrust::device_vector dev_out_lod0(input_lod[level].size()); - size_t* dev_out_lod0_ptr = thrust::raw_pointer_cast(dev_out_lod0.data()); - - // merge elements and delete blank - T* output_data = output->mutable_data({num_tokens, 1}, ctx.GetPlace()); - - phi::MixVector mixv_input_lod(&input_lod[level]); - MergeAndDelCudaKernel - <<<1, 1, 0, stream>>>(num_tokens, - tokens, - num_seq, - mixv_input_lod.CUDAMutableData(ctx.GetPlace()), - blank, - merge_repeated, - dev_out_lod0_ptr, - output_data); - mixv_input_lod.CopyToCPU(); - - // set output lod - std::vector host_out_lod0(dev_out_lod0.begin(), - dev_out_lod0.end()); - framework::LoD out_lod; - out_lod.push_back(host_out_lod0); - output->set_lod(out_lod); - - // resize output dims - output->Resize({static_cast(host_out_lod0.back()), 1}); - - if (host_out_lod0.back() == 0) { - output->Resize({1, 1}); - output->mutable_data(ctx.GetPlace()); - phi::funcs::SetConstant set_constant; - set_constant( - ctx.template device_context(), output, -1); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL( - ctc_align, GPU, ALL_LAYOUT, ops::CTCAlignOpCUDAKernel, int, int64_t) {} diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h deleted file mode 100644 index faa2efab772a6..0000000000000 --- a/paddle/fluid/operators/ctc_align_op.h +++ /dev/null @@ -1,119 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class CTCAlignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* output = ctx.Output("Output"); - size_t blank = static_cast(ctx.Attr("blank")); - bool merge_repeated = ctx.Attr("merge_repeated"); - T* output_data = output->mutable_data(ctx.GetPlace()); - auto input_dims = common::vectorize(input->dims()); - const T* input_data = input->data(); - - // support tensor input, no lod information - if (input->lod().empty()) { - size_t padding_value = - static_cast(ctx.Attr("padding_value")); - auto* input_length = ctx.Input("InputLength"); - const T* input_length_data = input_length->data(); - - auto* output_length = ctx.Output("OutputLength"); - T* output_length_data = output_length->mutable_data(ctx.GetPlace()); - - for (size_t batch_id = 0; batch_id < (unsigned)input_dims[0]; - batch_id++) { - T prev_token = -1; - size_t output_idx = 0; - for (size_t i = 0; i < (unsigned)input_length_data[batch_id]; i++) { - size_t input_ind = batch_id * input_dims[1] + i; - if ((unsigned)input_data[input_ind] != blank && - !(merge_repeated && input_data[input_ind] == prev_token)) { - output_data[batch_id * input_dims[1] + output_idx] = - input_data[input_ind]; - ++output_idx; - } - prev_token = input_data[input_ind]; - } - output_length_data[batch_id] = output_idx; - for (size_t j = output_idx; j < (unsigned)input_dims[1]; j++) - output_data[batch_id * input_dims[1] + j] = padding_value; - } - } else { - const size_t level = 0; - auto input_lod = framework::ToAbsOffset(input->lod()); - - // check input dims and lod - PADDLE_ENFORCE_EQ( - input_dims[0], - static_cast(input_lod[level].back()), - platform::errors::InvalidArgument( - "The first dimension %d of CTCAlign operator Input(Input) should " - "be equal to " - "the sum of all sequences' lengths %d.", - input_dims[0], - static_cast(input_lod[level].back()))); - - const size_t num_sequences = input_lod[level].size() - 1; - - // merge repeated tokens and delete blank - size_t output_idx = 0; - std::vector output_lod0(1, 0); - for (size_t seq_idx = 0; seq_idx < num_sequences; ++seq_idx) { - T prev_token = -1; - for (size_t i = input_lod[level][seq_idx]; - i < input_lod[level][seq_idx + 1]; - ++i) { - if ((unsigned)input_data[i] != blank && - !(merge_repeated && input_data[i] == prev_token)) { - output_data[output_idx] = input_data[i]; - ++output_idx; - } - prev_token = input_data[i]; - } - output_lod0.push_back(output_idx); - } - - // set output lod - framework::LoD output_lod; - output_lod.push_back(output_lod0); - output->set_lod(output_lod); - // resize output dims - output->Resize({static_cast(output_lod0.back()), 1}); - // for empty sequence - if (output_lod0.back() == 0) { - output->Resize({1, 1}); - output_data = output->mutable_data(ctx.GetPlace()); - output_data[0] = -1; - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/cudnn_rnn_cache.h b/paddle/fluid/operators/cudnn_rnn_cache.h index 9b6774af5832a..eaca6842d350c 100644 --- a/paddle/fluid/operators/cudnn_rnn_cache.h +++ b/paddle/fluid/operators/cudnn_rnn_cache.h @@ -267,7 +267,7 @@ class CudnnRNNCache { PADDLE_ENFORCE_EQ( weights_size_, cudnn_size * weight_numel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The cudnn lstm and setting weight size should be same.")); int dim_w[3]; diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc index d63197af754f2..d45a1a5a6a675 100644 --- a/paddle/fluid/operators/custom_device_common_op_registry.cc +++ b/paddle/fluid/operators/custom_device_common_op_registry.cc @@ -65,19 +65,19 @@ class CConcatOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_concat must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_concat must be " "less than that of nranks (%d).", rank, @@ -107,7 +107,7 @@ class CConcatOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( nranks, comm->GetSize(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "nranks: %s should equal to %s", nranks, comm->GetSize())); int64_t send_numel = x->numel(); @@ -160,7 +160,7 @@ class CIdentityOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rid, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ring_id (%d) for c_identity op must be non-negative.", rid)); ctx.device_context().Alloc(out); @@ -180,19 +180,19 @@ class CSplitOpCustomDeviceKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(rank, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_split must be " "greater than or equal to 0.", rank)); PADDLE_ENFORCE_GE(nranks, 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of nranks (%d) for c_split must be " "greater than or equal to 2.", nranks)); PADDLE_ENFORCE_LT(rank, nranks, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of rank (%d) for c_split must be " "less than that of nranks (%d).", rank, @@ -259,7 +259,7 @@ class CEmbeddingOpCustomDeviceKernel : public framework::OpKernel { *reinterpret_cast(out_tensor.impl().get())) .Resize(out_dims); } else { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "CustomDevice c_embedding ids only support int32 or int64.")); } } @@ -319,7 +319,7 @@ class CEmbeddingGradOpCustomDeviceKernel : public framework::OpKernel { table_grad_t->ShareDataWith( *reinterpret_cast(table_grad_tensor.impl().get())); } else { - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "CustomDevice c_embedding ids only support int32 or int64.")); } } @@ -543,11 +543,11 @@ class CAllReduceOpCustomDeviceKernel : public framework::OpKernel { auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` tensor should be on cpu place")); PADDLE_ENFORCE_EQ(cond->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The input `cond` should be shape [1]")); if (!cond->data()[0]) { VLOG(4) << "Skip all reduce Op since cond is 0"; @@ -594,8 +594,8 @@ class CAllReduceOpCustomDeviceKernel : public framework::OpKernel { break; default: - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Invalid reduce type: %d", red_type)); + PADDLE_THROW(phi::errors::InvalidArgument("Invalid reduce type: %d", + red_type)); } auto task = pg->AllReduce(in_tensor, out_tensor, opts); @@ -910,14 +910,14 @@ class GlobalScatterOpCustomDeviceKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(local_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in local_count.")); - PADDLE_ENFORCE_EQ(global_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in global_count.")); + PADDLE_ENFORCE_EQ( + local_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in local_count.")); + PADDLE_ENFORCE_EQ( + global_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in global_count.")); auto map = distributed::ProcessGroupMapFromGid::getInstance(); const int64_t* cpu_local_count_data; @@ -1124,14 +1124,14 @@ class GlobalGatherOpCustomDeviceKernel : public framework::OpKernel { auto place = ctx.GetPlace(); auto out = ctx.Output("Out"); - PADDLE_ENFORCE_EQ(local_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in local_count.")); - PADDLE_ENFORCE_EQ(global_count->dtype(), - phi::DataType::INT64, - platform::errors::InvalidArgument( - "Please use int64 type in global_count.")); + PADDLE_ENFORCE_EQ( + local_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in local_count.")); + PADDLE_ENFORCE_EQ( + global_count->dtype(), + phi::DataType::INT64, + phi::errors::InvalidArgument("Please use int64 type in global_count.")); const int64_t* cpu_local_count_data; const int64_t* cpu_global_count_data; @@ -1370,7 +1370,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CConcatOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_split, device_type, @@ -1382,7 +1382,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { int>, paddle::operators::CSplitOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_embedding, device_type, @@ -1391,7 +1391,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CEmbeddingOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_embedding_grad, device_type, @@ -1400,7 +1400,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { float>, paddle::operators::CEmbeddingGradOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>); + phi::dtype::float16>); REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_softmax_with_cross_entropy, @@ -1413,7 +1413,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSoftmaxWithCrossEntropyOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_softmax_with_cross_entropy_grad, @@ -1426,7 +1426,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSoftmaxWithCrossEntropyGradCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_identity, @@ -1445,7 +1445,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { int64_t>, paddle::operators::CIdentityOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_sync_calc_stream, @@ -1467,7 +1467,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { double>, paddle::operators::CSyncCalcStreamCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( c_allreduce_sum, device_type, @@ -1481,7 +1481,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1504,7 +1504,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::SUM>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1527,7 +1527,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::MIN>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::MIN>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1550,7 +1550,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::MAX>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::MAX>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1573,7 +1573,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { phi::ccl::CCLReduceOp::PRODUCT>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, - paddle::platform::float16, + phi::dtype::float16, phi::ccl::CCLReduceOp::PRODUCT>, paddle::operators::CAllReduceOpCustomDeviceKernel< paddle::platform::CustomDeviceContext, @@ -1590,8 +1590,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::CBroadcastOpCustomDeviceKernel, paddle::operators::CBroadcastOpCustomDeviceKernel, paddle::operators::CBroadcastOpCustomDeviceKernel, - paddle::operators::CBroadcastOpCustomDeviceKernel< - paddle::platform::float16>) {} + paddle::operators::CBroadcastOpCustomDeviceKernel) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( barrier, device_type, @@ -1614,7 +1613,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::RandomRoutingOpCustomDeviceKernel, paddle::operators::RandomRoutingOpCustomDeviceKernel, paddle::operators::RandomRoutingOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( assign_pos, device_type, @@ -1628,7 +1627,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::GlobalScatterOpCustomDeviceKernel, paddle::operators::GlobalScatterOpCustomDeviceKernel, paddle::operators::GlobalScatterOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} REGISTER_OP_CUSTOM_DEVICE_KERNEL( global_gather, device_type, @@ -1637,7 +1636,7 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) { paddle::operators::GlobalGatherOpCustomDeviceKernel, paddle::operators::GlobalGatherOpCustomDeviceKernel, paddle::operators::GlobalGatherOpCustomDeviceKernel< - paddle::platform::float16>) {} + phi::dtype::float16>) {} #endif } diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index 1e414ff217c2f..a305263338769 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -33,8 +33,8 @@ class CVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2UL, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 2, but got %d", x_dims.size())); + phi::errors::InvalidArgument("Input(X)'s rank should be 2, but got %d", + x_dims.size())); if (ctx->Attrs().Get("use_cvm")) { ctx->SetOutputDim("Y", {x_dims[0], x_dims[1]}); @@ -77,23 +77,23 @@ class CVMGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", x_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + x_dims.size())); PADDLE_ENFORCE_EQ( dy_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", dy_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + dy_dims.size())); PADDLE_ENFORCE_EQ( cvm_dims.size(), 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", cvm_dims.size())); + phi::errors::InvalidArgument("Expect Input(X)'s rank == 2, but got %d", + cvm_dims.size())); PADDLE_ENFORCE_EQ( x_dims[0], dy_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 1st dimension of Input(X) and Input(Y@Grad) should " "be equal, X is %d, Y@Grad is %d", x_dims[0], @@ -102,7 +102,7 @@ class CVMGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cvm_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When Attr(soft_label) == false, the 2nd dimension of " "Input(CVM) should be 2, but got %d cvm_dims[1]")); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index 1fbce90e494a0..5e127a532267b 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -110,7 +110,7 @@ class CVMCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( batch_size, lod[lod.size() - 1], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Input(X)'s dim[0] must be equal to last element of lod")); CvmComputeKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, @@ -164,7 +164,7 @@ class CVMGradCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( batch_size, lod[lod.size() - 1], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Output(X@GRAD)'s dim[0] must be equal to last element of lod")); phi::MixVector mixv_lod(&lod); CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index cc3a224a7e862..750310547306d 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -57,11 +57,11 @@ class DataNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("scale_w"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(scale_w) of DataNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("bias"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(bias) of DataNormOp should not be null.")); } @@ -69,39 +69,39 @@ class DataNormOp : public framework::OperatorWithKernel { const DataLayout data_layout = common::StringToDataLayout( ctx->Attrs().Get("data_layout")); - PADDLE_ENFORCE_EQ(x_dims.size() >= 2 && x_dims.size() <= 5, - true, - platform::errors::InvalidArgument( - "Input X must have 2 to 5 dimensions.")); + PADDLE_ENFORCE_EQ( + x_dims.size() >= 2 && x_dims.size() <= 5, + true, + phi::errors::InvalidArgument("Input X must have 2 to 5 dimensions.")); const int64_t C = (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSize").size(), - 1UL, - platform::errors::InvalidArgument( - "The input dim of BatchSize should be 1")); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSum").size(), - 1UL, - platform::errors::InvalidArgument( - "The input dim of BatchSum should be 1")); + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("BatchSize").size(), + 1UL, + phi::errors::InvalidArgument("The input dim of BatchSize should be 1")); + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("BatchSum").size(), + 1UL, + phi::errors::InvalidArgument("The input dim of BatchSum should be 1")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSquareSum").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim of BatchSquareSum should be 1")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSize")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSize should be C")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSum")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSum should be C")); PADDLE_ENFORCE_EQ(ctx->GetInputDim("BatchSquareSum")[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim[0] of BatchSquareSum should be C")); } @@ -112,21 +112,21 @@ class DataNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument("the dimension of scale" - "must equal to 1. But received: " - "the shape of scale is [%s], " - "the dimension of scale is [%d]", - scale_dim, - scale_dim.size())); + phi::errors::InvalidArgument("the dimension of scale" + "must equal to 1. But received: " + "the shape of scale is [%s], " + "the dimension of scale is [%d]", + scale_dim, + scale_dim.size())); PADDLE_ENFORCE_EQ( bias_dim.size(), 1UL, - platform::errors::InvalidArgument("the dimension of bias" - "must equal to 1. But received: " - "the shape of bias is [%s]," - "the dimension of bias is [%d]", - bias_dim, - bias_dim.size())); + phi::errors::InvalidArgument("the dimension of bias" + "must equal to 1. But received: " + "the shape of bias is [%s]," + "the dimension of bias is [%d]", + bias_dim, + bias_dim.size())); bool check = true; if ((!ctx->IsRuntime()) && @@ -137,14 +137,14 @@ class DataNormOp : public framework::OperatorWithKernel { if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -171,28 +171,28 @@ class DataNormOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ(dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "BatchSize"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "BatchSize input should be of float type")); - PADDLE_ENFORCE_EQ(dn_param_type, - OperatorWithKernel::IndicateVarDataType(ctx, "BatchSum"), - platform::errors::InvalidArgument( - "BatchSum input should be of float type")); + PADDLE_ENFORCE_EQ( + dn_param_type, + OperatorWithKernel::IndicateVarDataType(ctx, "BatchSum"), + phi::errors::InvalidArgument("BatchSum input should be of float type")); PADDLE_ENFORCE_EQ( dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "BatchSquareSum"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "BatchSquareSum input should be of float type")); bool enable_scale_and_shift = ctx.Attr("enable_scale_and_shift"); if (enable_scale_and_shift) { PADDLE_ENFORCE_EQ(dn_param_type, OperatorWithKernel::IndicateVarDataType(ctx, "scale_w"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "scale_w input should be of float type")); - PADDLE_ENFORCE_EQ(dn_param_type, - OperatorWithKernel::IndicateVarDataType(ctx, "bias"), - platform::errors::InvalidArgument( - "bias input should be of float type")); + PADDLE_ENFORCE_EQ( + dn_param_type, + OperatorWithKernel::IndicateVarDataType(ctx, "bias"), + phi::errors::InvalidArgument("bias input should be of float type")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -208,7 +208,7 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be between 0.0 and 0.001.")); }); AddAttr("slot_dim", @@ -279,7 +279,7 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("The Input dim size should be 2")); + phi::errors::InvalidArgument("The Input dim size should be 2")); const int N = static_cast(x_dims[0]); const int C = static_cast(data_layout == DataLayout::kNCHW ? x_dims[1] @@ -287,11 +287,11 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_LT(0, N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); PADDLE_ENFORCE_LT(0, C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); auto *y = ctx.Output("Y"); @@ -401,7 +401,7 @@ class DataNormKernel : public framework::OpKernel { break; } default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unknown storage order: %d, please use NCHW or NHWC", data_layout)); } } @@ -421,17 +421,17 @@ class DataNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSize"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSize) of DataNormGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSum"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSum) of DataNormGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BatchSquareSum"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BatchSquareSum) of DataNormGradOp should not be null.")); OP_INOUT_CHECK(ctx->HasInput("Means"), "Input", "Means", "DataNormGrad"); OP_INOUT_CHECK(ctx->HasInput("Scales"), "Input", "Scales", "DataNormGrad"); @@ -471,7 +471,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ((has_scale_grad == has_bias_grad), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Scale@GRAD) and Output(Bias@GRAD)" "must be null or not be null at same time. " "But now, has Scale@Grad=[%d], has Bias@GRAD=[%d]", @@ -489,7 +489,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Y@GRAD can not be found for computation")); } const phi::DenseTensor *t = nullptr; @@ -497,7 +497,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { t = &var->Get(); } if (t == nullptr) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Y@GRAD can not be found for computation")); } @@ -524,7 +524,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("The Input dim size should be 2")); + phi::errors::InvalidArgument("The Input dim size should be 2")); const int N = static_cast(x_dims[0]); const int C = static_cast(data_layout == DataLayout::kNCHW ? x_dims[1] @@ -710,7 +710,7 @@ class DataNormGradKernel : public framework::OpKernel { break; } default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unknown storage order: %s, please use NCHW or NHWC", data_layout_str)); } diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index 33cd6a8e6e49c..4be27b671d8a5 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -115,17 +115,17 @@ class DataNormKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::PreconditionNotMet("The Input dim size should be 2")); + phi::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; PADDLE_ENFORCE_LT(0, N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); PADDLE_ENFORCE_LT(0, C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims of Input(X) should be greater than 0.")); const T *batch_size_in = @@ -174,7 +174,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::PreconditionNotMet("The Input dim size should be 2")); + phi::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; @@ -226,7 +226,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(rid)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -238,7 +238,7 @@ class DataNormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( comm_ctx, nullptr, - platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { @@ -305,7 +305,7 @@ class DataNormGradKernel : public framework::OpKernel { } platform::GpuStreamSync(stream); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "PaddlePaddle should compile with GPU, and need_sync_stats connot be " "supported on windows now.")); #endif diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cc b/paddle/fluid/operators/deformable_psroi_pooling_op.cc index 5b339cf96c2b1..1b6ed2ba0be62 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc @@ -148,7 +148,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( rois_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(ROIs) should be a 2-D phi::DenseTensor of shape (num_rois, " "4) " "given as [[ x1, y1, x2, y2], ...]. The rank of Input(ROIs) should " @@ -158,12 +158,12 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( trans_dims.size(), 4, - platform::errors::InvalidArgument("The rank of Input(Trans) should be " - "4 and the shape of Trans should be " - "(N, 2, H, W), but received Trans " - "rank is:%d and Trans shape is:[%s].", - trans_dims.size(), - trans_dims)); + phi::errors::InvalidArgument("The rank of Input(Trans) should be " + "4 and the shape of Trans should be " + "(N, 2, H, W), but received Trans " + "rank is:%d and Trans shape is:[%s].", + trans_dims.size(), + trans_dims)); auto pooled_height = ctx->Attrs().Get("pooled_height"); auto pooled_width = ctx->Attrs().Get("pooled_width"); auto spatial_scale = ctx->Attrs().Get("spatial_scale"); @@ -176,17 +176,17 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { auto part_width = part_size[1]; auto sample_per_part = ctx->Attrs().Get("sample_per_part"); auto trans_std = ctx->Attrs().Get("trans_std"); - PADDLE_ENFORCE_GE(trans_std, - 0., - platform::errors::InvalidArgument( - "Input(trans_std) should not be lower " - "than 0.0, but received trans_std " - "is:%f", - trans_std)); + PADDLE_ENFORCE_GE( + trans_std, + 0., + phi::errors::InvalidArgument("Input(trans_std) should not be lower " + "than 0.0, but received trans_std " + "is:%f", + trans_std)); PADDLE_ENFORCE_GE( input_dims[1], output_channels, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The channel of Input(Input) should not be lower than " "Input(output_dim), " "but received Input channel is:%d and output_dim is:%d.", @@ -195,70 +195,70 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( pooled_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(pooled_height) should be greater than 0, but received " "pooled_height is:%d.", pooled_height)); PADDLE_ENFORCE_GT( pooled_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(pooled_width) should be greater than 0, but received " "pooled_width is:%d.", pooled_width)); PADDLE_ENFORCE_GT( spatial_scale, 0., - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(spatial_scale) should be greater than 0., but received " "spatial_scale is:%f.", spatial_scale)); PADDLE_ENFORCE_EQ( group_size.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(group_size) should be 2, but received " "group_size length is:%d.", group_size.size())); PADDLE_ENFORCE_GT( group_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "group_height in Input(group_size) should be greater than 0, " "but received group_height is:%d.", group_height)); PADDLE_ENFORCE_GT( group_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "group_width in Input(group_size) should be greater than 0 " "but received group_width is:%d.", group_width)); PADDLE_ENFORCE_EQ( part_size.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(part_size) should be 2, but received " "part_size length is:%d.", part_size.size())); PADDLE_ENFORCE_GT( part_height, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_height in Input(part_size) should be greater than 0 " "but received part_height is:%d.", part_height)); PADDLE_ENFORCE_GT( part_width, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_width in Input(part_size) should be greater than 0 " "but received part_width is:%d.", part_width)); PADDLE_ENFORCE_LE( part_height, trans_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_height in Input(part_size) should not be greater than " "the height of Input(Trans), but received part_height is:%d, " "the height of Input(Trans) is:%d.", @@ -267,7 +267,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE( part_width, trans_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "part_width in Input(part_size) should not be greater than " "the width of Input(Trans), but received part_width is:%d, " "the width of Input(Trans) is:%d.", @@ -276,7 +276,7 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( sample_per_part, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(sample_per_part) should be greater than 0, but received " "sample_per_part is:%d.", sample_per_part)); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index a3f045fd50a5f..1dfc02943b7fb 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -213,7 +213,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( num_rois, out->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of Input(ROIs) should be same with the number of " "Output(Output), but received ROIs number is:%d, Output number " "is:%d.", @@ -225,7 +225,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { no_trans ? output_dim : output_dim / num_classes; PADDLE_ENFORCE_GE(channels_each_class, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "channels_each_class should not be lower than 1, but " "channels_each_class is:%d.", channels_each_class)); @@ -243,7 +243,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -251,7 +251,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, @@ -555,7 +555,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -564,7 +564,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index 1ff1c83206f50..417e2da3468aa 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -187,7 +187,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( num_rois, out->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of Input(ROIs) should be same with the number of " "Output(Output), but received ROIs number is:%d, Output number " "is:%d.", @@ -221,7 +221,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { auto channels_each_class = no_trans ? output_dim : output_dim / num_classes; PADDLE_ENFORCE_GE(channels_each_class, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "channels_each_class should not be lower than 1, but " "channels_each_class is:%d.", channels_each_class)); @@ -238,7 +238,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "rois_batch_size should be equal to the batch_size, but " "rois_batch_size is:%d, batch_size is:%d.", rois_batch_size, @@ -246,7 +246,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, @@ -542,7 +542,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { int rois_num_with_lod = rois_lod[rois_batch_size]; PADDLE_ENFORCE_EQ(num_rois, rois_num_with_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rois_num from input and lod must be same, but" "rois_num from input is:%d, rois_num from lod is:%d.", num_rois, diff --git a/paddle/fluid/operators/delete_var_op.cc b/paddle/fluid/operators/delete_var_op.cc deleted file mode 100644 index 671c29d40cfb9..0000000000000 --- a/paddle/fluid/operators/delete_var_op.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -class Scope; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { -class DeleteVarOp : public framework::OperatorBase { - public: - DeleteVarOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - dev_ctx.Wait(); - - auto delete_var_names = Inputs("X"); - const_cast(scope).EraseVars(delete_var_names); - } -}; - -class DeleteVarOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override {} -}; - -class DeleteVarOpInfoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input of delete op").AsDuplicable(); - AddComment(R"DOC( -Delete Operator. -It should not be configured by users directly. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR( - delete_var, - paddle::operators::DeleteVarOp, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::operators::DeleteVarOpInfoMaker, - paddle::operators::DeleteVarOpShapeInference); diff --git a/paddle/fluid/operators/dequantize_log_op.cc b/paddle/fluid/operators/dequantize_log_op.cc index 03ede45695148..7526bdb49eafd 100644 --- a/paddle/fluid/operators/dequantize_log_op.cc +++ b/paddle/fluid/operators/dequantize_log_op.cc @@ -62,14 +62,14 @@ class DequantizeLogOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of DequantizeLogOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of DequantizeLogOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of DequantizeLogOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of DequantizeLogOp is not found.")); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/dequeue_op.cc b/paddle/fluid/operators/dequeue_op.cc deleted file mode 100644 index 9e5b809e772b6..0000000000000 --- a/paddle/fluid/operators/dequeue_op.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" -using LoDTensorBlockingQueueHolder = - paddle::operators::reader::LoDTensorBlockingQueueHolder; - -namespace paddle { -namespace operators { - -class DequeueOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DequeueOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - const std::string& queue_name = Attr("queue_name"); - auto* queue_holder_var = scope.FindVar(queue_name); - PADDLE_ENFORCE_NOT_NULL( - queue_holder_var, - platform::errors::NotFound( - "No LoDTensorBlockingQueueHolder variable with name %s found.", - queue_name)); - auto* queue_holder = - queue_holder_var->template GetMutable(); - auto& out_names = Outputs("Out"); - PADDLE_ENFORCE_GT(out_names.size(), - 0, - platform::errors::InvalidArgument( - "The output for Op(dequeue) must be set.")); - for (const auto& out_name : out_names) { - auto out_var = scope.FindVar(out_name); - PADDLE_ENFORCE_NOT_NULL(out_var, - platform::errors::NotFound( - "No variable with name %s found", out_name)); - auto* out_tensor = out_var->GetMutable(); - PADDLE_ENFORCE_NOT_NULL( - out_tensor, - platform::errors::InvalidArgument( - "Variable with name %s has not been initialized.", out_name)); - - paddle::framework::LoDTensorArray lod_tensor_vec; - bool success = false; - lod_tensor_vec = queue_holder->GetQueue()->Pop(&success); - PADDLE_ENFORCE_EQ(lod_tensor_vec.size(), - 1, - platform::errors::InvalidArgument( - "Expected to pop only one element per Pop call for " - "Op(dequeue), but poped %d element.", - lod_tensor_vec.size())); - for (auto& lod_tensor : lod_tensor_vec) { - paddle::framework::TensorCopySync(lod_tensor, dev_place, out_tensor); - } - } - } -}; - -class DequeueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddAttr("queue_name", - "Name of the `LoDTensorBlockingQueueHolder` variable"); - AddOutput("Out", "A list of `lod_tensor` to dequeue and assigned.") - .AsDuplicable(); - AddComment(R"DOC( - Dequeue operator. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = ::paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(dequeue, ops::DequeueOp, ops::DequeueOpMaker); diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index c9bee1eb60705..9aa19af0ba809 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -31,31 +31,17 @@ endfunction() detection_library(density_prior_box_op SRCS density_prior_box_op.cc density_prior_box_op.cu) -if(WITH_XPU) - detection_library(iou_similarity_op SRCS iou_similarity_op.cc - iou_similarity_op_xpu.cc) -else() - detection_library(iou_similarity_op SRCS iou_similarity_op.cc - iou_similarity_op.cu) -endif() - detection_library(bipartite_match_op SRCS bipartite_match_op.cc) -detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc) detection_library(anchor_generator_op SRCS anchor_generator_op.cc anchor_generator_op.cu) detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc polygon_box_transform_op.cu) -detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common) -detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi - common) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) -detection_library(retinanet_detection_output_op SRCS - retinanet_detection_output_op.cc) if(WITH_GPU OR WITH_ROCM) if(WITH_GPU) diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cc b/paddle/fluid/operators/detection/anchor_generator_op.cc index 8c3705ba3e760..3b826d5c249e1 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cc +++ b/paddle/fluid/operators/detection/anchor_generator_op.cc @@ -25,24 +25,24 @@ class AnchorGeneratorOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Input"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Input) of AnchorGeneratorOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Anchors"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Anchors) of AnchorGeneratorOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Variances"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Variances) of AnchorGeneratorOp should not be null.")); auto input_dims = ctx->GetInputDim("Input"); PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); auto anchor_sizes = ctx->Attrs().Get>("anchor_sizes"); auto aspect_ratios = ctx->Attrs().Get>("aspect_ratios"); @@ -98,12 +98,12 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const std::vector& anchor_sizes) { PADDLE_ENFORCE_GT(anchor_sizes.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Size of anchor_sizes must be at least 1.")); for (size_t i = 0; i < anchor_sizes.size(); ++i) { PADDLE_ENFORCE_GT(anchor_sizes[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "anchor_sizes[%d] must be positive.", i)); } }); @@ -118,14 +118,14 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { "(vector) List of variances to be used " "in box regression deltas") .AddCustomChecker([](const std::vector& variances) { - PADDLE_ENFORCE_EQ(variances.size(), - 4UL, - platform::errors::InvalidArgument( - "Must provide 4 variance only.")); + PADDLE_ENFORCE_EQ( + variances.size(), + 4UL, + phi::errors::InvalidArgument("Must provide 4 variance only.")); for (size_t i = 0; i < variances.size(); ++i) { PADDLE_ENFORCE_GT(variances[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "variance[%d] must be greater than 0.", i)); } }); @@ -138,12 +138,12 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( stride.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Must provide 2 stride for width and height only.")); for (size_t i = 0; i < stride.size(); ++i) { PADDLE_ENFORCE_GT(stride[i], 0.0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "stride[%d] should be larger than 0.", i)); } }); diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 53c082add0fa5..32942a03f1ab4 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -26,24 +26,24 @@ class BipartiteMatchOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("DistMat"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(DistMat) of BipartiteMatch should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("ColToRowMatchIndices"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(ColToRowMatchIndices) of BipartiteMatch " "should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("ColToRowMatchDist"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(ColToRowMatchDist) of BipartiteMatch should not be null.")); auto dims = ctx->GetInputDim("DistMat"); - PADDLE_ENFORCE_EQ(dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(DistMat) must be 2.")); + PADDLE_ENFORCE_EQ( + dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(DistMat) must be 2.")); ctx->SetOutputDim("ColToRowMatchIndices", dims); ctx->SetOutputDim("ColToRowMatchDist", dims); @@ -75,7 +75,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist.dims().size(), 2, - platform::errors::InvalidArgument("The rank of dist must be 2.")); + phi::errors::InvalidArgument("The rank of dist must be 2.")); int64_t row = dist.dims()[0]; int64_t col = dist.dims()[1]; auto* dist_data = dist.data(); @@ -140,7 +140,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( match_indices[max_idx], -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The match_indices must be initialized to -1 at [%d].", max_idx)); match_indices[max_idx] = max_row_idx; @@ -183,7 +183,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( match_indices[j], -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The match_indices must be initialized to -1 at [%d].", j)); match_indices[j] = max_row_idx; match_dist[j] = max_dist; @@ -208,7 +208,7 @@ class BipartiteMatchKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_mat->lod().size(), 1UL, - platform::errors::InvalidArgument("Only support 1 level of LoD.")); + phi::errors::InvalidArgument("Only support 1 level of LoD.")); } match_indices->mutable_data({n, col}, context.GetPlace()); match_dist->mutable_data({n, col}, context.GetPlace()); diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc index 5af100b8f6407..8df39b759cabb 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cc +++ b/paddle/fluid/operators/detection/box_clip_op.cc @@ -24,12 +24,12 @@ class BoxClipOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true, - platform::errors::NotFound("Input(Input) of BoxClipOp " - "is not found.")); + phi::errors::NotFound("Input(Input) of BoxClipOp " + "is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) of BoxClipOp " - "is not found.")); + phi::errors::NotFound("Input(ImInfo) of BoxClipOp " + "is not found.")); auto input_box_dims = ctx->GetInputDim("Input"); auto im_info_dims = ctx->GetInputDim("ImInfo"); @@ -39,20 +39,20 @@ class BoxClipOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_box_dims[input_box_size - 1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(Input) in BoxClipOp must be 4. " "But received last dimension = %d", input_box_dims[input_box_size - 1])); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Input) in BoxClipOp must be 2." " But received rank = %d", im_info_dims.size())); PADDLE_ENFORCE_EQ( im_info_dims[1], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(ImInfo) of BoxClipOp must be 3. " "But received last dimension = %d", im_info_dims[1])); diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index c07185dec167c..18faf1e2fbbcd 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -31,7 +31,7 @@ class BoxClipKernel : public framework::OpKernel { if (input_box->lod().size()) { PADDLE_ENFORCE_EQ(input_box->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Input) of BoxClip only supports 1 level " "of LoD. But received the " "level = %d", diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc index 552a6da3b3425..a7b9ad490b56c 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc @@ -23,33 +23,33 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("PriorBox"), true, - platform::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(PriorBox) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("PriorBoxVar"), true, - platform::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp" - " is not found.")); + phi::errors::NotFound("Input(PriorBoxVar) of BoxDecoderAndAssignOp" + " is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("TargetBox"), true, - platform::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(TargetBox) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("BoxScore"), true, - platform::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp " - "is not found.")); + phi::errors::NotFound("Input(BoxScore) of BoxDecoderAndAssignOp " + "is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("DecodeBox"), true, - platform::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp" - " is not found.")); + phi::errors::NotFound("Output(DecodeBox) of BoxDecoderAndAssignOp" + " is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("OutputAssignBox"), true, - platform::errors::NotFound("Output(OutputAssignBox) of " - "BoxDecoderAndAssignOp is not found.")); + phi::errors::NotFound("Output(OutputAssignBox) of " + "BoxDecoderAndAssignOp is not found.")); auto prior_box_dims = ctx->GetInputDim("PriorBox"); auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); @@ -59,45 +59,45 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( prior_box_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of PriorBox must" - " be 2. But received rank = %d", - prior_box_dims.size())); + phi::errors::InvalidArgument("The rank of Input of PriorBox must" + " be 2. But received rank = %d", + prior_box_dims.size())); PADDLE_ENFORCE_EQ( prior_box_dims[1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of PriorBox is [N, 4], " "and the second dimension must be 4. But received dimension = %d", prior_box_dims[1])); PADDLE_ENFORCE_EQ( prior_box_var_dims.size(), 1, - platform::errors::InvalidArgument("The rank of Input of PriorBoxVar " - "must be 1. But received rank = %d", - prior_box_var_dims.size())); + phi::errors::InvalidArgument("The rank of Input of PriorBoxVar " + "must be 1. But received rank = %d", + prior_box_var_dims.size())); PADDLE_ENFORCE_EQ( prior_box_var_dims[0], 4, - platform::errors::InvalidArgument("The shape of PriorBoxVar is [4]. " - "But received dimension = %d", - prior_box_var_dims[0])); + phi::errors::InvalidArgument("The shape of PriorBoxVar is [4]. " + "But received dimension = %d", + prior_box_var_dims[0])); PADDLE_ENFORCE_EQ( target_box_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of TargetBox must " - "be 2. But received rank = %d", - target_box_dims.size())); + phi::errors::InvalidArgument("The rank of Input of TargetBox must " + "be 2. But received rank = %d", + target_box_dims.size())); PADDLE_ENFORCE_EQ( box_score_dims.size(), 2, - platform::errors::InvalidArgument("The rank of Input of BoxScore must " - "be 2. But received rank = %d", - box_score_dims.size())); + phi::errors::InvalidArgument("The rank of Input of BoxScore must " + "be 2. But received rank = %d", + box_score_dims.size())); if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( prior_box_dims[0], target_box_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of prior_box and " "target_box is the number of box and should be same. But " "received dimension of prior_box is %d, dimension of target_box " @@ -107,7 +107,7 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( prior_box_dims[0], box_score_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of prior_box and " "box_score is the number of box and should be same. But received " "dimension of prior_box is %d, dimension of box_score is %d", @@ -116,7 +116,7 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( target_box_dims[1], box_score_dims[1] * prior_box_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of target_box is " "[N, classnum * 4], The shape of box_score is [N, classnum], " "The shape of prior_box is [N, 4]. But received second dimension " diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index db2f9726db56a..fd5161932ff22 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -24,18 +24,18 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( context->HasInputs("MultiLevelRois"), true, - platform::errors::NotFound("Inputs(MultiLevelRois) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Inputs(MultiLevelRois) of " + "CollectFpnProposalsOp is not found")); PADDLE_ENFORCE_EQ( context->HasInputs("MultiLevelScores"), true, - platform::errors::NotFound("Inputs(MultiLevelScores) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Inputs(MultiLevelScores) of " + "CollectFpnProposalsOp is not found")); PADDLE_ENFORCE_EQ( context->HasOutput("FpnRois"), true, - platform::errors::NotFound("Outputs(MultiFpnRois) of " - "CollectFpnProposalsOp is not found")); + phi::errors::NotFound("Outputs(MultiFpnRois) of " + "CollectFpnProposalsOp is not found")); auto roi_dims = context->GetInputsDim("MultiLevelRois"); auto score_dims = context->GetInputsDim("MultiLevelScores"); auto post_nms_topN = context->Attrs().Get("post_nms_topN"); @@ -44,7 +44,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( roi_dim[1], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Second dimension of Input" "(MultiLevelRois) must be 4. But received dimension = %d", roi_dim[1])); @@ -53,7 +53,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( score_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Second dimension of Input" "(MultiLevelScores) must be 1. But received dimension = %d", score_dim[1])); @@ -79,7 +79,7 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( roi_lod, score_lod, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(MultiLevelRois) and " "Inputs(MultiLevelScores) should have same lod.")); } diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index 462b4a4584ece..81356170598bf 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -76,7 +76,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GE(post_nms_topN, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The parameter post_nms_topN must be " "a positive integer. But received post_nms_topN = %d", post_nms_topN)); @@ -85,7 +85,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( multi_layer_rois.size(), multi_layer_scores.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of RoIs and Scores should" " be the same. But received number of RoIs is %d, number of Scores " "is %d", diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc index e79de60b7690d..4a533615aab15 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op.cc @@ -29,7 +29,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( image_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Image) of Op(density_prior_box) should be a 4-D Tensor " "and data format is NCHW. But received Image's dimensions = %d, " "shape = [%s].", @@ -38,7 +38,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Input) of Op(density_prior_box) should be a 4-D Tensor " "and data format is NCHW. But received Input's dimensions = %d, " "shape = [%s].", @@ -49,7 +49,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( input_dims[2], image_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Input's height" "of DensityPriorBoxOp should be smaller than input tensor Image's" "height. But received Input's height = %d, Image's height = %d", @@ -59,7 +59,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( input_dims[3], image_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Input's width" "of DensityPriorBoxOp should be smaller than input tensor Image's" "width. But received Input's width = %d, Image's width = %d", @@ -76,7 +76,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( fixed_sizes.size(), densities.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of fixed_sizes and densities must be equal. " "But received: fixed_sizes's length is %d, densities's length " "is %d", @@ -139,14 +139,14 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const std::vector& variances) { PADDLE_ENFORCE_EQ(variances.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of variance must " "be 4. But received: variances' length is %d.", variances.size())); for (size_t i = 0; i < variances.size(); ++i) { PADDLE_ENFORCE_GT(variances[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "variance[%d] must be greater " "than 0. But received: variance[%d] = %f", i, @@ -165,24 +165,24 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { "Density prior boxes step across width, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_w) { - PADDLE_ENFORCE_GE(step_w, - 0.0, - platform::errors::InvalidArgument( - "step_w should be larger " - "than 0. But received: step_w = %f.", - step_w)); + PADDLE_ENFORCE_GE( + step_w, + 0.0, + phi::errors::InvalidArgument("step_w should be larger " + "than 0. But received: step_w = %f.", + step_w)); }); AddAttr( "step_h", "Density prior boxes step across height, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_h) { - PADDLE_ENFORCE_GE(step_h, - 0.0, - platform::errors::InvalidArgument( - "step_h should be larger " - "than 0. But received: step_h = %f.", - step_h)); + PADDLE_ENFORCE_GE( + step_h, + 0.0, + phi::errors::InvalidArgument("step_h should be larger " + "than 0. But received: step_h = %f.", + step_h)); }); AddAttr("offset", @@ -198,7 +198,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( fixed_sizes[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "fixed_sizes[%d] should be " "larger than 0. But received: fixed_sizes[%d] = %f", i, @@ -216,7 +216,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( fixed_ratios[i], 0.0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "fixed_ratios[%d] should be " "larger than 0. But received: fixed_ratios[%d] = %f", i, @@ -234,7 +234,7 @@ class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_GT( densities[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "densities[%d] should be " "larger than 0. But received: densities[%d] = %f.", i, diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index bf56a6f857e0d..5ee843d72387b 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -44,57 +44,57 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("GtClasses"), - true, - platform::errors::InvalidArgument( - "Input(GtClasses) shouldn't be null.")); + phi::errors::InvalidArgument("Input(ImInfo) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("GtClasses"), + true, + phi::errors::InvalidArgument("Input(GtClasses) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("IsCrowd"), true, - platform::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); + phi::errors::InvalidArgument("Input(IsCrowd) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtSegms"), true, - platform::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); + phi::errors::InvalidArgument("Input(GtSegms) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Rois"), true, - platform::errors::InvalidArgument("Input(Rois) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("LabelsInt32"), - true, - platform::errors::InvalidArgument( - "Input(LabelsInt32) shouldn't be null.")); + phi::errors::InvalidArgument("Input(Rois) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("LabelsInt32"), + true, + phi::errors::InvalidArgument("Input(LabelsInt32) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("MaskRois"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MaskRois) of GenerateMaskLabelsOp should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("RoiHasMaskInt32"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(RoiHasMaskInt32) of GenerateMaskLabelsOp " "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("MaskInt32"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MaskInt32) of GenerateMaskLabelsOp should not be null")); auto im_info_dims = ctx->GetInputDim("ImInfo"); auto gt_segms_dims = ctx->GetInputDim("GtSegms"); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(ImInfo) must be 2.")); - PADDLE_ENFORCE_EQ(gt_segms_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(GtSegms) must be 2.")); + PADDLE_ENFORCE_EQ( + im_info_dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(ImInfo) must be 2.")); + PADDLE_ENFORCE_EQ( + gt_segms_dims.size(), + 2, + phi::errors::InvalidArgument("The rank of Input(GtSegms) must be 2.")); PADDLE_ENFORCE_EQ(gt_segms_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of Input(GtSegms) must be 2.")); int num_classes = ctx->Attrs().Get("num_classes"); int resolution = ctx->Attrs().Get("resolution"); @@ -170,7 +170,7 @@ std::vector SampleMaskForOneImage( const int* label_int32_data = label_int32.data(); PADDLE_ENFORCE_EQ(roi_size, label_int32.dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of label [%d] is the different from " "roi_size [%d], they should be same.", label_int32.dims()[0], @@ -197,7 +197,7 @@ std::vector SampleMaskForOneImage( int e = static_cast(lod2[s_idx + j + 1]); PADDLE_ENFORCE_NE(s, e, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The start point and the end point in the poly " "segment [%d] should not be same, but received " "the start point [%d] and the end point [%d].", @@ -349,34 +349,34 @@ class GenerateMaskLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_classes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp gt_classes needs 1 level of LoD")); PADDLE_ENFORCE_EQ( is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp is_crowd needs 1 level of LoD")); PADDLE_ENFORCE_EQ(rois->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp rois needs 1 level of LoD")); PADDLE_ENFORCE_EQ( label_int32->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp label_int32 needs 1 level of LoD")); PADDLE_ENFORCE_EQ( gt_segms->lod().size(), 3UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateMaskLabelsOp gt_segms needs 3 level of LoD")); int64_t n = static_cast(gt_classes->lod().back().size() - 1); PADDLE_ENFORCE_EQ( gt_segms->lod()[0].size() - 1, n, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of Input(gt_segms) and Input(gt_classes) should be " "same, but received gt_segms[%d], gt_classes[%d].", gt_segms->lod()[0].size() - 1, diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index a0fb3ec799eea..ad37aa2ae682f 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -68,49 +68,49 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("RpnRois"), true, - platform::errors::NotFound("Input(RpnRois) shouldn't be null.")); + phi::errors::NotFound("Input(RpnRois) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtClasses"), true, - platform::errors::NotFound("Input(GtClasses) shouldn't be null.")); + phi::errors::NotFound("Input(GtClasses) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("IsCrowd"), true, - platform::errors::NotFound("Input(IsCrowd) shouldn't be null.")); + phi::errors::NotFound("Input(IsCrowd) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("GtBoxes"), true, - platform::errors::NotFound("Input(GtBoxes) shouldn't be null.")); + phi::errors::NotFound("Input(GtBoxes) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) shouldn't be null.")); + phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Rois"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Rois) of GenerateProposalLabelsOp should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("LabelsInt32"), true, - platform::errors::NotFound("Output(LabelsInt32) of " - "GenerateProposalLabelsOp " - "should not be null")); + phi::errors::NotFound("Output(LabelsInt32) of " + "GenerateProposalLabelsOp " + "should not be null")); PADDLE_ENFORCE_EQ(ctx->HasOutput("BboxTargets"), true, - platform::errors::NotFound("Output(BboxTargets) of " - "GenerateProposalLabelsOp " - "should not be null")); + phi::errors::NotFound("Output(BboxTargets) of " + "GenerateProposalLabelsOp " + "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BboxInsideWeights"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BboxInsideWeights) of GenerateProposalLabelsOp " "should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput("BboxOutsideWeights"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(BboxOutsideWeights) of GenerateProposalLabelsOp " "should not be null")); @@ -120,21 +120,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(RpnRois) must be 2. " "But received dimensions size=[%d], dimensions=[%s].", rpn_rois_dims.size(), rpn_rois_dims)); PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(GtBoxes) must be 2. " "But received dimensions size=[%d], dimensions=[%s].", gt_boxes_dims.size(), gt_boxes_dims)); PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of Input(ImInfo) must be 2. But " "received dimensions size=[%d], dimensions=[%s].", im_info_dims.size(), @@ -146,7 +146,7 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("MaxOverlap"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(MaxOverlap) of GenerateProposalLabelsOp " "should not be null when is_cascade_rcnn is True.")); } @@ -544,7 +544,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rpn_rois->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", rpn_rois->lod().size(), @@ -552,7 +552,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_classes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp gt_classes needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", gt_classes->lod().size(), @@ -560,7 +560,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( is_crowd->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp is_crowd needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", is_crowd->lod().size(), @@ -568,7 +568,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( gt_boxes->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD. But " "received level of LoD is [%d], LoD is [%s].", gt_boxes->lod().size(), diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index 710db1668e237..5e961674cd774 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -35,23 +35,23 @@ class GenerateProposalsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Scores"), true, - platform::errors::NotFound("Input(Scores) shouldn't be null.")); + phi::errors::NotFound("Input(Scores) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("BboxDeltas"), true, - platform::errors::NotFound("Input(BboxDeltas) shouldn't be null.")); + phi::errors::NotFound("Input(BboxDeltas) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ImInfo"), true, - platform::errors::NotFound("Input(ImInfo) shouldn't be null.")); + phi::errors::NotFound("Input(ImInfo) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Anchors"), true, - platform::errors::NotFound("Input(Anchors) shouldn't be null.")); + phi::errors::NotFound("Input(Anchors) shouldn't be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Variances"), true, - platform::errors::NotFound("Input(Variances) shouldn't be null.")); + phi::errors::NotFound("Input(Variances) shouldn't be null.")); ctx->SetOutputDim("RpnRois", {-1, 4}); ctx->SetOutputDim("RpnRoiProbs", {-1, 1}); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index d24cbcb81d019..1bb494f7fa508 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -151,7 +151,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { float eta = context.Attr("eta"); PADDLE_ENFORCE_GE(eta, 1., - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Not support adaptive NMS. The attribute 'eta' " "should not less than 1. But received eta=[%d]", eta)); diff --git a/paddle/fluid/operators/detection/iou_similarity_op.cc b/paddle/fluid/operators/detection/iou_similarity_op.cc deleted file mode 100644 index 0f2ac1c86d628..0000000000000 --- a/paddle/fluid/operators/detection/iou_similarity_op.cc +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/iou_similarity_op.h" - -namespace paddle { -namespace operators { - -class IOUSimilarityOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "iou_similarity"); - OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "iou_similarity"); - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); - - PADDLE_ENFORCE_EQ( - x_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The rank of Input(X) must be 2, but got dimension = %d.", - x_dims.size())); - PADDLE_ENFORCE_EQ( - x_dims[1], - 4UL, - platform::errors::InvalidArgument( - "The shape of X is [N, 4], bug got dimension = %d.", x_dims[1])); - PADDLE_ENFORCE_EQ( - y_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The rank of Input(Y) must be 2, but got dimension = %d.", - y_dims.size())); - PADDLE_ENFORCE_EQ( - y_dims[1], - 4UL, - platform::errors::InvalidArgument( - "The shape of Y is [M, 4], but got dimension = %d.", y_dims[1])); - - ctx->ShareLoD("X", /*->*/ "Out"); - ctx->SetOutputDim("Out", common::make_ddim({x_dims[0], y_dims[0]})); - } -}; - -class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "X", - "(phi::DenseTensor, default phi::DenseTensor) " - "Box list X is a 2-D phi::DenseTensor with shape [N, 4] holds N boxes, " - "each box is represented as [xmin, ymin, xmax, ymax], " - "the shape of X is [N, 4]. [xmin, ymin] is the left top " - "coordinate of the box if the input is image feature map, they " - "are close to the origin of the coordinate system. " - "[xmax, ymax] is the right bottom coordinate of the box. " - "This tensor can contain LoD information to represent a batch " - "of inputs. One instance of this batch can contain different " - "numbers of entities."); - AddInput("Y", - "(Tensor, default Tensor) " - "Box list Y holds M boxes, each box is represented as " - "[xmin, ymin, xmax, ymax], the shape of X is [N, 4]. " - "[xmin, ymin] is the left top coordinate of the box if the " - "input is image feature map, and [xmax, ymax] is the right " - "bottom coordinate of the box."); - AddAttr("box_normalized", - "(bool, default true) " - "whether treat the priorbox as a normalized box") - .SetDefault(true); - AddOutput("Out", - "(phi::DenseTensor, the lod is same as input X) The output of " - "iou_similarity op, a tensor with shape [N, M] " - "representing pairwise iou scores."); - - AddComment(R"DOC( -**IOU Similarity Operator** - -Computes intersection-over-union (IOU) between two box lists. -Box list 'X' should be a phi::DenseTensor and 'Y' is a common Tensor, -boxes in 'Y' are shared by all instance of the batched inputs of X. -Given two boxes A and B, the calculation of IOU is as follows: - -$$ -IOU(A, B) = -\\frac{area(A\\cap B)}{area(A)+area(B)-area(A\\cap B)} -$$ - -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - iou_similarity, - ops::IOUSimilarityOp, - ops::IOUSimilarityOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL( - iou_similarity, CPU, ALL_LAYOUT, ops::IOUSimilarityKernel, float, double) {} diff --git a/paddle/fluid/operators/detection/iou_similarity_op.cu b/paddle/fluid/operators/detection/iou_similarity_op.cu deleted file mode 100644 index e4e001e096558..0000000000000 --- a/paddle/fluid/operators/detection/iou_similarity_op.cu +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/detection/iou_similarity_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL( - iou_similarity, GPU, ALL_LAYOUT, ops::IOUSimilarityKernel, float, double) {} diff --git a/paddle/fluid/operators/detection/iou_similarity_op.h b/paddle/fluid/operators/detection/iou_similarity_op.h deleted file mode 100644 index 75e7b9096241a..0000000000000 --- a/paddle/fluid/operators/detection/iou_similarity_op.h +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/for_range.h" - -template -inline HOSTDEVICE T IOUSimilarity(T xmin1, - T ymin1, - T xmax1, - T ymax1, - T xmin2, - T ymin2, - T xmax2, - T ymax2, - bool normalized, - T eps) { - constexpr T zero = static_cast(0); - T area1; - T area2; - if (!normalized) { - area1 = (ymax1 - ymin1 + 1) * (xmax1 - xmin1 + 1); - area2 = (ymax2 - ymin2 + 1) * (xmax2 - xmin2 + 1); - } else { - area1 = (ymax1 - ymin1) * (xmax1 - xmin1); - area2 = (ymax2 - ymin2) * (xmax2 - xmin2); - } - - T inter_xmax = xmax1 > xmax2 ? xmax2 : xmax1; - T inter_ymax = ymax1 > ymax2 ? ymax2 : ymax1; - T inter_xmin = xmin1 > xmin2 ? xmin1 : xmin2; - T inter_ymin = ymin1 > ymin2 ? ymin1 : ymin2; - T inter_height = inter_ymax - inter_ymin; - T inter_width = inter_xmax - inter_xmin; - if (!normalized) { - inter_height = inter_height + 1; - inter_width = inter_width + 1; - } - inter_height = inter_height > zero ? inter_height : zero; - inter_width = inter_width > zero ? inter_width : zero; - T inter_area = inter_width * inter_height; - T union_area = area1 + area2 - inter_area + eps; - T sim_score = inter_area / union_area; - return sim_score; -} - -template -struct IOUSimilarityFunctor { - IOUSimilarityFunctor( - const T* x, const T* y, T* z, int cols, bool normalized, T eps) - : x_(x), - y_(y), - z_(z), - cols_(static_cast(cols)), - normalized_(normalized), - eps_(eps) {} - - inline HOSTDEVICE void operator()(size_t tid) const { - size_t row_id = tid / cols_; - size_t col_id = tid % cols_; - - T x_min1 = x_[row_id * 4]; - T y_min1 = x_[row_id * 4 + 1]; - T x_max1 = x_[row_id * 4 + 2]; - T y_max1 = x_[row_id * 4 + 3]; - - T x_min2 = y_[col_id * 4]; - T y_min2 = y_[col_id * 4 + 1]; - T x_max2 = y_[col_id * 4 + 2]; - T y_max2 = y_[col_id * 4 + 3]; - - T sim = IOUSimilarity(x_min1, - y_min1, - x_max1, - y_max1, - x_min2, - y_min2, - x_max2, - y_max2, - normalized_, - eps_); - - z_[row_id * cols_ + col_id] = sim; - } - const T* x_; - const T* y_; - T* z_; - const size_t cols_; - bool normalized_; - T eps_; -}; - -namespace paddle { -namespace operators { - -template -class IOUSimilarityKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* in_x = ctx.Input("X"); - const phi::DenseTensor* in_y = ctx.Input("Y"); - bool normalized = ctx.Attr("box_normalized"); - phi::DenseTensor* out = ctx.Output("Out"); - - int x_n = in_x->dims()[0]; - int y_n = in_y->dims()[0]; - T eps = static_cast(1e-10); - IOUSimilarityFunctor functor(in_x->data(), - in_y->data(), - out->mutable_data(ctx.GetPlace()), - y_n, - normalized, - eps); - - platform::ForRange for_range( - static_cast(ctx.device_context()), x_n * y_n); - for_range(functor); - } -}; // namespace operators - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc b/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc deleted file mode 100644 index 27ffa64c2a892..0000000000000 --- a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/operators/detection/iou_similarity_op.h" - -namespace paddle { -namespace operators { - -template -class XPUIOUSimilarityKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* in_x = ctx.Input("X"); - const phi::DenseTensor* in_y = ctx.Input("Y"); - bool normalized = ctx.Attr("box_normalized"); - phi::DenseTensor* out = ctx.Output("Out"); - - int x_n = in_x->dims()[0]; - int y_n = in_y->dims()[0]; - T eps = static_cast(1e-10); - - auto& dev_ctx = ctx.template device_context(); - int r = xpu::iou_similarity(dev_ctx.x_context(), - in_x->data(), - in_y->data(), - out->mutable_data(ctx.GetPlace()), - x_n, - y_n, - eps, - normalized); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External( - "XPU iou_similarity kernel return wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -using XPU = paddle::platform::XPUDeviceContext; - -PD_REGISTER_STRUCT_KERNEL( - iou_similarity, XPU, ALL_LAYOUT, ops::XPUIOUSimilarityKernel, float) {} - -#endif diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc deleted file mode 100644 index 8d3ed1a033acf..0000000000000 --- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc +++ /dev/null @@ -1,524 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/detection/nms_util.h" - -namespace paddle { -namespace operators { - -class LocalityAwareNMSOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("BBoxes"), "Input", "BBoxes", "locality_aware_nms"); - OP_INOUT_CHECK( - ctx->HasInput("Scores"), "Input", "Scores", "locality_aware_nms"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "locality_aware_nms"); - - auto box_dims = ctx->GetInputDim("BBoxes"); - auto score_dims = ctx->GetInputDim("Scores"); - auto score_size = score_dims.size(); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - score_size, - 3, - platform::errors::InvalidArgument( - "The rank of Input(Scores) must be 3. But received %d.", - score_size)); - PADDLE_ENFORCE_EQ( - box_dims.size(), - 3, - platform::errors::InvalidArgument( - "The rank of Input(BBoxes) must be 3. But received %d.", - box_dims.size())); - PADDLE_ENFORCE_EQ( - box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || - box_dims[2] == 24 || box_dims[2] == 32, - true, - platform::errors::InvalidArgument( - "The last dimension of Input(BBoxes) must be 4 or 8, " - "represents the layout of coordinate " - "[xmin, ymin, xmax, ymax] or " - "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " - "8 points: [xi, yi] i= 1,2,...,8 or " - "12 points: [xi, yi] i= 1,2,...,12 or " - "16 points: [xi, yi] i= 1,2,...,16. " - "But received %d.", - box_dims[2])); - PADDLE_ENFORCE_EQ( - box_dims[1], - score_dims[2], - platform::errors::InvalidArgument( - "The 2nd dimension of Input(BBoxes) must be equal to " - "last dimension of Input(Scores), which represents the " - "predicted bboxes. But received the 2nd dimension of " - "Input(BBoxes) was %d, last dimension of Input(Scores) was %d.", - box_dims[1], - score_dims[2])); - } - // Here the box_dims[0] is not the real dimension of output. - // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {box_dims[1], box_dims[2] + 2}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Scores"), - platform::CPUPlace()); - } -}; - -template -void PolyWeightedMerge(const T* box1, - T* box2, - const T score1, - const T score2, - const size_t box_size) { - for (size_t i = 0; i < box_size; ++i) { - box2[i] = (box1[i] * score1 + box2[i] * score2) / (score1 + score2); - } -} - -template -void GetMaxScoreIndexWithLocalityAware( - T* scores, - T* bbox_data, - int64_t box_size, - const T threshold, - int top_k, - int64_t num_boxes, - std::vector>* sorted_indices, - const T nms_threshold, - const bool normalized) { - std::vector skip(num_boxes, true); - int index = -1; - for (int64_t i = 0; i < num_boxes; ++i) { - if (index > -1) { - T overlap = T(0.); - if (box_size == 4) { - overlap = phi::funcs::JaccardOverlap( - bbox_data + i * box_size, bbox_data + index * box_size, normalized); - } - // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 - if (box_size == 8 || box_size == 16 || box_size == 24 || box_size == 32) { - overlap = phi::funcs::PolyIoU(bbox_data + i * box_size, - bbox_data + index * box_size, - box_size, - normalized); - } - - if (overlap > nms_threshold) { - PolyWeightedMerge(bbox_data + i * box_size, - bbox_data + index * box_size, - scores[i], - scores[index], - box_size); - scores[index] += scores[i]; - } else { - skip[index] = false; - index = static_cast(i); - } - } else { - index = static_cast(i); - } - } - - if (index > -1) { - skip[index] = false; - } - for (int64_t i = 0; i < num_boxes; ++i) { - if (scores[i] > threshold && skip[i] == false) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), - sorted_indices->end(), - phi::funcs::SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -template -class LocalityAwareNMSKernel : public framework::OpKernel { - public: - void LocalityAwareNMSFast(phi::DenseTensor* bbox, - phi::DenseTensor* scores, - const T score_threshold, - const T nms_threshold, - const T eta, - const int64_t top_k, - std::vector* selected_indices, - const bool normalized) const { - // The total boxes for each instance. - int64_t num_boxes = bbox->dims()[0]; - // 4: [xmin ymin xmax ymax] - // 8: [x1 y1 x2 y2 x3 y3 x4 y4] - // 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16 - int64_t box_size = bbox->dims()[1]; - - std::vector> sorted_indices; - T adaptive_threshold = nms_threshold; - T* bbox_data = bbox->data(); - T* scores_data = scores->data(); - - GetMaxScoreIndexWithLocalityAware(scores_data, - bbox_data, - box_size, - score_threshold, - top_k, - num_boxes, - &sorted_indices, - nms_threshold, - normalized); - - selected_indices->clear(); - - while (!sorted_indices.empty()) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (int kept_idx : *selected_indices) { - if (keep) { - T overlap = T(0.); - // 4: [xmin ymin xmax ymax] - if (box_size == 4) { - overlap = - phi::funcs::JaccardOverlap(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, - normalized); - } - // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 - if (box_size == 8 || box_size == 16 || box_size == 24 || - box_size == 32) { - overlap = phi::funcs::PolyIoU(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, - box_size, - normalized); - } - keep = overlap <= adaptive_threshold; - } else { - break; - } - } - if (keep) { - selected_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && eta < 1 && adaptive_threshold > 0.5) { - adaptive_threshold *= eta; - } - } - // delete bbox_data; - } - - void LocalityAwareNMS(const framework::ExecutionContext& ctx, - phi::DenseTensor* scores, - phi::DenseTensor* bboxes, - const int scores_size, - std::map>* indices, - int* num_nmsed_out) const { - int64_t background_label = ctx.Attr("background_label"); - int64_t nms_top_k = ctx.Attr("nms_top_k"); - int64_t keep_top_k = ctx.Attr("keep_top_k"); - bool normalized = ctx.Attr("normalized"); - T nms_threshold = static_cast(ctx.Attr("nms_threshold")); - T nms_eta = static_cast(ctx.Attr("nms_eta")); - T score_threshold = static_cast(ctx.Attr("score_threshold")); - - int num_det = 0; - - int64_t class_num = scores->dims()[0]; - phi::DenseTensor bbox_slice, score_slice; - for (int64_t c = 0; c < class_num; ++c) { - if (c == background_label) continue; - - score_slice = scores->Slice(c, c + 1); - bbox_slice = *bboxes; - - LocalityAwareNMSFast(&bbox_slice, - &score_slice, - score_threshold, - nms_threshold, - nms_eta, - nms_top_k, - &((*indices)[c]), // NOLINT - normalized); - num_det += (*indices)[c].size(); // NOLINT - } - - *num_nmsed_out = num_det; - const T* scores_data = scores->data(); - if (keep_top_k > -1 && num_det > keep_top_k) { - const T* sdata; - std::vector>> score_index_pairs; - for (const auto& it : *indices) { - int label = it.first; - - sdata = scores_data + label * scores->dims()[1]; - - const std::vector& label_indices = it.second; - for (auto idx : label_indices) { - score_index_pairs.push_back( - std::make_pair(sdata[idx], std::make_pair(label, idx))); - } - } - // Keep top k results per image. - std::stable_sort(score_index_pairs.begin(), - score_index_pairs.end(), - phi::funcs::SortScorePairDescend>); - score_index_pairs.resize(keep_top_k); - - // Store the new indices. - std::map> new_indices; - for (auto& score_index_pair : score_index_pairs) { - int label = score_index_pair.second.first; - int idx = score_index_pair.second.second; - new_indices[label].push_back(idx); - } - - new_indices.swap(*indices); - *num_nmsed_out = keep_top_k; // NOLINT - } - } - - void LocalityAwareNMSOutput( - const platform::DeviceContext& ctx, - const phi::DenseTensor& scores, - const phi::DenseTensor& bboxes, - const std::map>& selected_indices, - const int scores_size, - phi::DenseTensor* outs, - int* oindices = nullptr, - const int offset = 0) const { - int64_t predict_dim = scores.dims()[1]; - int64_t box_size = bboxes.dims()[1]; - if (scores_size == 2) { - box_size = bboxes.dims()[2]; - } - int64_t out_dim = box_size + 2; - auto* scores_data = scores.data(); - auto* bboxes_data = bboxes.data(); - auto* odata = outs->data(); - const T* sdata; - phi::DenseTensor bbox; - bbox.Resize({scores.dims()[0], box_size}); - int count = 0; - for (const auto& it : selected_indices) { - int label = it.first; - const std::vector& indices = it.second; - sdata = scores_data + label * predict_dim; - for (auto idx : indices) { - odata[count * out_dim] = label; // label - const T* bdata; - bdata = bboxes_data + idx * box_size; - odata[count * out_dim + 1] = sdata[idx]; // score - if (oindices != nullptr) { - oindices[count] = offset + idx; - } - - // xmin, ymin, xmax, ymax or multi-points coordinates - std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T)); - count++; - } - } - } - - void Compute(const framework::ExecutionContext& ctx) const override { - auto* boxes_input = ctx.Input("BBoxes"); - auto* scores_input = ctx.Input("Scores"); - auto* outs = ctx.Output("Out"); - auto& score_dims = scores_input->dims(); - auto score_size = score_dims.size(); - auto& dev_ctx = ctx.template device_context(); - - phi::DenseTensor scores; - phi::DenseTensor boxes; - paddle::framework::TensorCopySync( - *scores_input, platform::CPUPlace(), &scores); - paddle::framework::TensorCopySync( - *boxes_input, platform::CPUPlace(), &boxes); - std::vector>> all_indices; - std::vector batch_starts = {0}; - int64_t batch_size = score_dims[0]; - int64_t box_dim = boxes.dims()[2]; - int64_t out_dim = box_dim + 2; - int num_nmsed_out = 0; - phi::DenseTensor boxes_slice, scores_slice; - int n = static_cast(batch_size); - for (int i = 0; i < n; ++i) { - scores_slice = scores.Slice(i, i + 1); - scores_slice.Resize({score_dims[1], score_dims[2]}); - boxes_slice = boxes.Slice(i, i + 1); - boxes_slice.Resize({score_dims[2], box_dim}); - - std::map> indices; - LocalityAwareNMS(ctx, - &scores_slice, - &boxes_slice, - score_size, - &indices, - &num_nmsed_out); - all_indices.push_back(indices); - batch_starts.push_back(batch_starts.back() + num_nmsed_out); - } - - int num_kept = static_cast(batch_starts.back()); - if (num_kept == 0) { - T* od = outs->mutable_data({1, 1}, ctx.GetPlace()); - od[0] = -1; - batch_starts = {0, 1}; - } else { - outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); - int offset = 0; - int* oindices = nullptr; - for (int i = 0; i < n; ++i) { - scores_slice = scores.Slice(i, i + 1); - boxes_slice = boxes.Slice(i, i + 1); - scores_slice.Resize({score_dims[1], score_dims[2]}); - boxes_slice.Resize({score_dims[2], box_dim}); - - int64_t s = static_cast(batch_starts[i]); - int64_t e = static_cast(batch_starts[i + 1]); - if (e > s) { - phi::DenseTensor out = outs->Slice(s, e); - LocalityAwareNMSOutput(dev_ctx, - scores_slice, - boxes_slice, - all_indices[i], - score_dims.size(), - &out, - oindices, - offset); - } - } - } - - framework::LoD lod; - lod.emplace_back(batch_starts); - outs->set_lod(lod); - } -}; - -class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("BBoxes", - "Two types of bboxes are supported:" - "1. (Tensor) A 3-D Tensor with shape " - "[N, M, 4 or 8 16 24 32] represents the " - "predicted locations of M bounding bboxes, N is the batch size. " - "Each bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax], when box size equals to 4."); - AddInput("Scores", - "Two types of scores are supported:" - "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the " - "predicted confidence predictions. N is the batch size, C is the " - "class number, M is number of bounding boxes. For each category " - "there are total M scores which corresponding M bounding boxes. " - " Please note, M is equal to the 2nd dimension of BBoxes. "); - AddAttr( - "background_label", - "(int, default: -1) " - "The index of background label, the background label will be ignored. " - "If set to -1, then all categories will be considered.") - .SetDefault(-1); - AddAttr("score_threshold", - "(float) " - "Threshold to filter out bounding boxes with low " - "confidence score. If not provided, consider all boxes."); - AddAttr("nms_top_k", - "(int64_t) " - "Maximum number of detections to be kept according to the " - "confidences after the filtering detections based on " - "score_threshold"); - AddAttr("nms_threshold", - "(float, default: 0.3) " - "The threshold to be used in NMS.") - .SetDefault(0.3); - AddAttr("nms_eta", - "(float) " - "The parameter for adaptive NMS.") - .SetDefault(1.0); - AddAttr("keep_top_k", - "(int64_t) " - "Number of total bboxes to be kept per image after NMS " - "step. -1 means keeping all bboxes after NMS step."); - AddAttr("normalized", - "(bool, default true) " - "Whether detections are normalized.") - .SetDefault(true); - AddOutput("Out", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] " - "represents the " - "detections. Each row has 6 values: " - "[label, confidence, xmin, ymin, xmax, ymax] or " - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] " - "represents the " - "detections. Each row has 10 values: " - "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the " - "total number of detections in this mini-batch." - "For each instance, " - "the offsets in first dimension are called LoD, the number of " - "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " - "no detected bbox."); - AddComment(R"DOC( -This operator is to do locality-aware non maximum suppression (NMS) on a batched -of boxes and scores. -Firstly, this operator merge box and score according their IOU(intersection over union). -In the NMS step, this operator greedily selects a subset of detection bounding -boxes that have high scores larger than score_threshold, if providing this -threshold, then selects the largest nms_top_k confidences scores if nms_top_k -is larger than -1. Then this operator prunes away boxes that have high IOU -(intersection over union) overlap with already selected boxes by adaptive -threshold NMS based on parameters of nms_threshold and nms_eta. -After NMS step, at most keep_top_k number of total bboxes are to be kept -per image if keep_top_k is larger than -1. -This operator support multi-class and batched inputs. It applying NMS -independently for each class. The outputs is a 2-D LoDTensor, for each -image, the offsets in first dimension of phi::DenseTensor are called LoD, the number -of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, -means there is no detected bbox for this image. - -Please get more information from the following papers: -https://arxiv.org/abs/1704.03155. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - locality_aware_nms, - ops::LocalityAwareNMSOp, - ops::LocalityAwareNMSOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(locality_aware_nms, - CPU, - ALL_LAYOUT, - ops::LocalityAwareNMSKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/mine_hard_examples_op.cc b/paddle/fluid/operators/detection/mine_hard_examples_op.cc deleted file mode 100644 index 0ce9979ff2a3d..0000000000000 --- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc +++ /dev/null @@ -1,412 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -enum MiningType { kNone = 0, kMaxNegative, kHardExample }; - -template -bool SortScoreDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -inline bool IsEligibleMining(const MiningType mining_type, - const int match_idx, - const float match_dist, - const float neg_dist_threshold) { - if (mining_type == MiningType::kMaxNegative) { - return match_idx == -1 && match_dist < neg_dist_threshold; - } else if (mining_type == MiningType::kHardExample) { - return true; - } else { - return false; - } -} - -inline MiningType GetMiningType(std::string str) { - if (str == "max_negative") { - return MiningType::kMaxNegative; - } else if (str == "hard_example") { - return MiningType::kHardExample; - } else { - return MiningType::kNone; - } -} - -template -class MineHardExamplesKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_cls_loss = ctx.Input("ClsLoss"); - auto* in_loc_loss = ctx.Input("LocLoss"); - auto* in_matched_indices = ctx.Input("MatchIndices"); - auto* in_match_dist = ctx.Input("MatchDist"); - float neg_pos_ratio = ctx.Attr("neg_pos_ratio"); - T neg_dist_threshold = - static_cast(ctx.Attr("neg_dist_threshold")); - int sample_size = ctx.Attr("sample_size"); - MiningType mining_type = - GetMiningType(ctx.Attr("mining_type")); - - auto out_neg_indices = ctx.Output("NegIndices"); - auto out_match_indices = - ctx.Output("UpdatedMatchIndices"); - - framework::TensorCopy( - *in_matched_indices, ctx.GetPlace(), out_match_indices); - - int batch_size = static_cast(in_matched_indices->dims()[0]); - int prior_num = static_cast(in_matched_indices->dims()[1]); - - auto match_indices = framework::EigenMatrix::From(*in_matched_indices); - - auto match_indices_et = - framework::EigenMatrix::From(*out_match_indices); - - auto match_dist = framework::EigenMatrix::From(*in_match_dist); - - const T* cls_loss = in_cls_loss->data(); - const T* loc_loss = nullptr; - if (in_loc_loss) { - loc_loss = in_loc_loss->data(); - } - - std::vector> all_neg_indices; - std::vector batch_starts = {0}; - for (int n = 0; n < batch_size; ++n) { - std::vector> loss_idx; - int neg_sel = 0; - for (int m = 0; m < prior_num; ++m) { - if (IsEligibleMining(mining_type, - match_indices(n, m), - match_dist(n, m), - neg_dist_threshold)) { - T loss = cls_loss[n * prior_num + m]; - if (mining_type == MiningType::kHardExample && loc_loss != nullptr) { - loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m]; - } - loss_idx.push_back(std::make_pair(loss, m)); - ++neg_sel; - } - } - - if (mining_type == MiningType::kMaxNegative) { - int num_pos = 0; - for (int m = 0; m < prior_num; ++m) { - if (match_indices(n, m) != -1) ++num_pos; - } - neg_sel = std::min(static_cast(num_pos * neg_pos_ratio), // NOLINT - neg_sel); - } else if (mining_type == MiningType::kHardExample) { - neg_sel = std::min(sample_size, neg_sel); - } - - std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend); - std::set sel_indices; - std::vector neg_indices; - std::transform(loss_idx.begin(), - loss_idx.begin() + neg_sel, - std::inserter(sel_indices, sel_indices.begin()), - [](std::pair& l) -> int { - return static_cast(l.second); - }); - - if (mining_type == MiningType::kHardExample) { - for (int m = 0; m < prior_num; ++m) { - if (match_indices(n, m) > -1) { - if (sel_indices.find(m) == sel_indices.end()) { - match_indices_et(n, m) = -1; - } - } else { - if (sel_indices.find(m) != sel_indices.end()) { - neg_indices.push_back(m); - } - } - } - } else { - neg_indices.resize(sel_indices.size()); - std::copy(sel_indices.begin(), sel_indices.end(), neg_indices.begin()); - } - - all_neg_indices.push_back(neg_indices); - batch_starts.push_back(batch_starts.back() + neg_indices.size()); - } - - framework::LoD out_neg_indices_lod; - out_neg_indices_lod.emplace_back(batch_starts); - int neg_offset = 0; - auto neg_data = out_neg_indices->mutable_data( - common::make_ddim({static_cast(batch_starts.back()), 1}), - ctx.GetPlace()); - - for (auto neg_indices : all_neg_indices) { - std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset); - neg_offset += static_cast(neg_indices.size()); - } - out_neg_indices->set_lod(out_neg_indices_lod); - return; - } -}; - -class MineHardExamplesOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("ClsLoss"), "Input", "ClsLoss", "mine_hard_examples"); - OP_INOUT_CHECK(ctx->HasInput("MatchIndices"), - "Input", - "MatchIndices", - "mine_hard_examples"); - OP_INOUT_CHECK( - ctx->HasInput("MatchDist"), "Input", "MatchDist", "mine_hard_examples"); - OP_INOUT_CHECK(ctx->HasOutput("NegIndices"), - "Output", - "NegIndices", - "mine_hard_examples"); - OP_INOUT_CHECK(ctx->HasOutput("UpdatedMatchIndices"), - "Output", - "UpdatedMatchIndices", - "mine_hard_examples"); - - auto cls_loss_dims = ctx->GetInputDim("ClsLoss"); - auto idx_dims = ctx->GetInputDim("MatchIndices"); - auto dis_dims = ctx->GetInputDim("MatchDist"); - - PADDLE_ENFORCE_EQ(cls_loss_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The shape of ClsLoss is [N, Np]. But received %d.", - cls_loss_dims.size())); - PADDLE_ENFORCE_EQ( - idx_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The shape of MatchIndices is [N, Np]. But received %d.", - idx_dims.size())); - PADDLE_ENFORCE_EQ(dis_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The shape of MatchDist is [N, Np]. But received %d.", - dis_dims.size())); - - if (ctx->HasInput("LocLoss")) { - auto loc_loss_dims = ctx->GetInputDim("LocLoss"); - PADDLE_ENFORCE_EQ(loc_loss_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The shape of LocLoss is [N, Np]. But received %d.", - loc_loss_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(cls_loss_dims[0], - loc_loss_dims[0], - platform::errors::InvalidArgument( - "Batch size of ClsLoss and LocLoss must be the " - "same. But received batch size of ClsLoss was " - "%d, batch size of LocLoss was %d.", - cls_loss_dims[0], - loc_loss_dims[0])); - PADDLE_ENFORCE_EQ(cls_loss_dims[1], - loc_loss_dims[1], - platform::errors::InvalidArgument( - "Prior box number of ClsLoss and LocLoss must be " - "the same. But received box number of ClsLoss " - "was %d, box number of LocLoss was %d.", - cls_loss_dims[1], - loc_loss_dims[1])); - } - } - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(cls_loss_dims[0], - idx_dims[0], - platform::errors::InvalidArgument( - "Batch size of ClsLoss and MatchIndices must be " - "the same. But received batch size of ClsLoss was " - "%d, batch size of MatchIndices was %d.", - cls_loss_dims[0], - idx_dims[0])); - PADDLE_ENFORCE_EQ( - cls_loss_dims[1], - idx_dims[1], - platform::errors::InvalidArgument( - "Prior box number of ClsLoss and " - "MatchIndices must be the same. But received box number of " - "ClsLoss was %d, box number of MatchIndices was %d.", - cls_loss_dims[1], - idx_dims[1])); - - PADDLE_ENFORCE_EQ(cls_loss_dims[0], - dis_dims[0], - platform::errors::InvalidArgument( - "Batch size of ClsLoss and MatchDist must be the " - "same. But received batch size of ClsLoss was %d, " - "batch size of MatchDist was %d.", - cls_loss_dims[0], - dis_dims[0])); - PADDLE_ENFORCE_EQ(cls_loss_dims[1], - idx_dims[1], - platform::errors::InvalidArgument( - "Prior box number of ClsLoss and MatchDist must be " - "the same. But received box number of ClsLoss was " - "%d, box number of MatchDist was %d.", - cls_loss_dims[1], - idx_dims[1])); - } - - auto mining_type = - GetMiningType(ctx->Attrs().Get("mining_type")); - - PADDLE_ENFORCE_NE(mining_type, - MiningType::kNone, - platform::errors::InvalidArgument( - "mining_type must be hard_example or max_negative")); - - if (mining_type == MiningType::kMaxNegative) { - auto neg_pos_ratio = ctx->Attrs().Get("neg_pos_ratio"); - auto neg_dist_threshold = ctx->Attrs().Get("neg_dist_threshold"); - PADDLE_ENFORCE_GT(neg_pos_ratio, - 0.0f, - platform::errors::InvalidArgument( - "neg_pos_ratio must greater than zero in " - "max_negative mode. But received %f.", - neg_pos_ratio)); - PADDLE_ENFORCE_LT(neg_dist_threshold, - 1.0f, - platform::errors::InvalidArgument( - "neg_dist_threshold must less than one in " - "max_negative mode. But received %f.", - neg_dist_threshold)); - PADDLE_ENFORCE_GT(neg_dist_threshold, - 0.0f, - platform::errors::InvalidArgument( - "neg_dist_threshold must greater " - "than zero in max_negative mode. But received %f.", - neg_dist_threshold)); - } else if (mining_type == MiningType::kHardExample) { - auto sample_size = ctx->Attrs().Get("sample_size"); - PADDLE_ENFORCE_GT(sample_size, - 0, - platform::errors::InvalidArgument( - "sample_size must greater than zero in " - "hard_example mode. But received %d.", - sample_size)); - } - - ctx->SetOutputDim("UpdatedMatchIndices", idx_dims); - // The first dimension of NegIndices will be set correcttly in Compute. - ctx->SetOutputDim("NegIndices", {-1, 1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "ClsLoss"), - platform::CPUPlace()); - } -}; - -class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "ClsLoss", - "(Tensor, default Tensor), The classification loss with shape " - "[N, Np], N is the batch size and Np is the number of prior box."); - AddInput("LocLoss", - "(Tensor, optional, default Tensor), The localization loss " - "with shape [N, Np], N is the batch size and Np is the number of " - "prior box.") - .AsDispensable(); - AddInput("MatchIndices", - "(Tensor, Tensor), Matched indices with shape [N, Np], N is " - "the batch size and Np is the number of prior box. " - "MatchIndices[i][j] equal -1 means the j-th prior box in i-th " - "instance does not match any entity, otherwise means it is " - "matched to row."); - AddInput("MatchDist", - "(Tensor, default Tensor) Matched indices with shape [N, " - "Np], N is the batch size and Np is the number of prior box."); - AddAttr("neg_pos_ratio", - "(float) The ratio of the negative box to the positive " - "box. Use only when mining_type is max_negative.") - .SetDefault(1.0); - AddAttr("neg_dist_threshold", - "(float) The negative overlap upper bound for the unmatched " - "predictions. Use only when mining_type is max_negative.") - .SetDefault(0.5); - AddAttr("sample_size", - "(float) The max sample size of negative box. Use only when " - "mining_type is hard_example.") - .SetDefault(0); - AddAttr("mining_type", - "(float) The mining algorithm name, the value is " - "hard_example or max_negative.") - .SetDefault("max_negative") - .InEnum({"hard_example", "max_negative"}); - - AddOutput("NegIndices", - "(phi::DenseTensor) The output of negative example indices. " - "a phi::DenseTensor " - "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, " - "and each element is the prior box index. " - "For example, the batch size is 2, the lod is [[0, 1, 2]], " - "the sample 0's box 1(MatchIndices[0][1]) is selected, " - "and sample 1's box 0 is selected. The output NegIndices is " - "[[1], [0]]."); - - AddOutput("UpdatedMatchIndices", - "(Tensor) The output of updated MatchIndices, a tensor with " - "shape [N, Np]. Only update when mining_type is " - "hard_example. The input MatchIndices elements will be update to " - "-1 when it is not in the candidate high loss list of negative " - "examples."); - - AddComment(R"DOC( -Mine hard examples Operator. -This operator implements hard example mining to select a subset of negative box indices. -For each image, selects the box with highest losses. subject to the condition that the -box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. -The selected number is min(sample_size, max_negative_box_number) when mining_type is -hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) -when mining_type is max_negative, where the max_negative_box_number is the count of -MatchIndices elements with value -1. -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - mine_hard_examples, - ops::MineHardExamplesOp, - ops::MineHardExamplesOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(mine_hard_examples, - CPU, - ALL_LAYOUT, - ops::MineHardExamplesKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 9cd9e76772424..73ec6caa61c27 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -47,22 +47,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(score_size == 2 || score_size == 3, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Scores) must be 2 or 3" ". But received rank = %d", score_size)); - PADDLE_ENFORCE_EQ(box_dims.size(), - 3, - platform::errors::InvalidArgument( - "The rank of Input(BBoxes) must be 3" - ". But received rank = %d", - box_dims.size())); + PADDLE_ENFORCE_EQ( + box_dims.size(), + 3, + phi::errors::InvalidArgument("The rank of Input(BBoxes) must be 3" + ". But received rank = %d", + box_dims.size())); if (score_size == 3) { PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || box_dims[2] == 24 || box_dims[2] == 32, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input" "(BBoxes) must be 4 or 8, " "represents the layout of coordinate " @@ -74,7 +74,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( box_dims[1], score_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input(BBoxes) must be equal to " "last dimension of Input(Scores), which represents the " "predicted bboxes." @@ -84,14 +84,14 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(box_dims[2], 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input" "(BBoxes) must be 4. But received dimension = %d", box_dims[2])); PADDLE_ENFORCE_EQ( box_dims[1], score_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 2nd dimension of Input" "(BBoxes) must be equal to the 2nd dimension of Input(Scores). " "But received box dimension = %d, score dimension = %d", diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 0059aedcdc86c..35518b224e5ad 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -21,10 +21,9 @@ template class PolygonBoxTransformCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_cpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* in = ctx.Input("Input"); auto in_dims = common::vectorize(in->dims()); const T* in_data = in->data(); @@ -66,12 +65,12 @@ class PolygonBoxTransformOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input's rank must be 4. But received: Input rank is [%d]", in_dim.size())); PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input's second dimension must be even. But " "received: Input 2nd dimension is [%d]", in_dim[1])); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu index 4f182464f77b5..b23a8d4e41bc5 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cu +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -45,7 +45,7 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The polygon_box_transform operator needs to be executed on GPU.")); auto* in = ctx.Input("Input"); auto in_dims = in->dims(); diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc deleted file mode 100644 index b97cfe81a5a17..0000000000000 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ /dev/null @@ -1,676 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_GE( - ctx->Inputs("BBoxes").size(), - 1UL, - platform::errors::InvalidArgument("The length of Input(BBoxes) should " - "be greater than 0, but received " - "BBoxes length is:%d.", - ctx->Inputs("BBoxes").size())); - PADDLE_ENFORCE_GE( - ctx->Inputs("Scores").size(), - 1UL, - platform::errors::InvalidArgument("The length of Input(Scores) should " - "be greater than 0, but received " - "Scores length is:%d.", - ctx->Inputs("Scores").size())); - PADDLE_ENFORCE_GE( - ctx->Inputs("Anchors").size(), - 1UL, - platform::errors::InvalidArgument("The length of Input(Anchors) should " - "be greater than 0, but received " - "Anchors length is:%d.", - ctx->Inputs("Anchors").size())); - PADDLE_ENFORCE_EQ( - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Scores").size(), - platform::errors::InvalidArgument( - "Input(BBoxes) and Input(Scores) should have the same length, but " - "received BBoxes length is:%d, Scores length is:%d.", - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Scores").size())); - PADDLE_ENFORCE_EQ( - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Anchors").size(), - platform::errors::InvalidArgument( - "Input(BBoxes) and Input(Anchors) should have the same length, but " - "received BBoxes length is:%d, Anchors length is:%d.", - ctx->Inputs("BBoxes").size(), - ctx->Inputs("Anchors").size())); - OP_INOUT_CHECK(ctx->HasInput("ImInfo"), - "Input", - "ImInfo", - "retinanet_detection_output"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "retinanet_detection_output"); - - auto bboxes_dims = ctx->GetInputsDim("BBoxes"); - auto scores_dims = ctx->GetInputsDim("Scores"); - auto anchors_dims = ctx->GetInputsDim("Anchors"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - const size_t b_n = bboxes_dims.size(); - PADDLE_ENFORCE_GT(b_n, - 0, - platform::errors::InvalidArgument( - "The number of Variables in Input(BBoxes) " - "should be greater than 0, " - "but received number is:%d.", - b_n)); - const size_t s_n = scores_dims.size(); - PADDLE_ENFORCE_GT(s_n, - 0, - platform::errors::InvalidArgument( - "The number of Variables in Input(Scores) " - "should be greater than 0, " - "but received number is:%d.", - s_n)); - const size_t a_n = anchors_dims.size(); - PADDLE_ENFORCE_GT(a_n, - 0, - platform::errors::InvalidArgument( - "The number of Variables in Input(Anchors) " - "should be greater than 0, " - "but received number is:%d.", - a_n)); - auto bbox_dims = bboxes_dims[0]; - auto score_dims = scores_dims[0]; - auto anchor_dims = anchors_dims[0]; - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - score_dims.size(), - 3, - platform::errors::InvalidArgument( - "The rank of each Variable in Input(Scores) must be 3, " - "but received rank is:%d.", - score_dims.size())); - PADDLE_ENFORCE_EQ( - bbox_dims.size(), - 3, - platform::errors::InvalidArgument( - "The rank of each Variable in Input(BBoxes) must be 3, " - "but received rank is:%d.", - bbox_dims.size())); - PADDLE_ENFORCE_EQ( - anchor_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of each Variable in Input(Anchors) must be 2, " - "but received rank is:%d.", - anchor_dims.size())); - PADDLE_ENFORCE_EQ( - bbox_dims[2], - 4, - platform::errors::InvalidArgument( - "The last dimension of each Variable in Input(BBoxes) must be 4 " - "representing the layout of coordinate [xmin, ymin, xmax, ymax], " - "but received dimension is:%d.", - bbox_dims[2])); - PADDLE_ENFORCE_EQ(bbox_dims[1], - score_dims[1], - platform::errors::InvalidArgument( - "The 2nd dimension of Variables in Input(BBoxes) " - "and Input(Scores) " - "must be same, which represents the number of the " - "predicted boxes, " - "but received BBoxes 2nd dimension is:%d, Scores " - "2nd dimension is:%d.", - bbox_dims[1], - score_dims[1])); - PADDLE_ENFORCE_EQ( - anchor_dims[0], - bbox_dims[1], - platform::errors::InvalidArgument( - "The 1st dimension of each Variables in Input(Anchors) must be " - "equal " - "to the 2nd dimension of corresponding Variables in " - "Input(BBoxes), " - "which represents the number of the predicted boxes, but " - "received " - "Anchors 1st dimension is:%d, BBoxes 2nd dimension is:%d.", - anchor_dims[0], - bbox_dims[1])); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(ImInfo) must be 2, but " - "received ImInfo rank is:%d.", - im_info_dims.size())); - } - // Here the box_dims[0] is not the real dimension of output. - // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {bbox_dims[1], bbox_dims[2] + 2}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input_data_type = - OperatorWithKernel::IndicateVarDataType(ctx, "Scores"); - return phi::KernelKey(input_data_type, - platform::CPUPlace()); // ctx.GetPlace()); - } -}; - -template -bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -template -bool SortScoreTwoPairDescend(const std::pair>& pair1, - const std::pair>& pair2) { - return pair1.first > pair2.first; -} - -template -static inline void GetMaxScoreIndex( - const std::vector& scores, - const T threshold, - int top_k, - std::vector>* sorted_indices) { - for (size_t i = 0; i < scores.size(); ++i) { - if (scores[i] > threshold) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), - sorted_indices->end(), - SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -template -static inline T BBoxArea(const std::vector& box, const bool normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return static_cast(0.); - } else { - const T w = box[2] - box[0]; - const T h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -template -static inline T JaccardOverlap(const std::vector& box1, - const std::vector& box2, - const bool normalized) { - if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || - box2[3] < box1[1]) { - return static_cast(0.); - } else { - const T inter_xmin = std::max(box1[0], box2[0]); - const T inter_ymin = std::max(box1[1], box2[1]); - const T inter_xmax = std::min(box1[2], box2[2]); - const T inter_ymax = std::min(box1[3], box2[3]); - T norm = normalized ? static_cast(0.) : static_cast(1.); - T inter_w = inter_xmax - inter_xmin + norm; - T inter_h = inter_ymax - inter_ymin + norm; - const T inter_area = inter_w * inter_h; - const T bbox1_area = BBoxArea(box1, normalized); - const T bbox2_area = BBoxArea(box2, normalized); - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - -template -class RetinanetDetectionOutputKernel : public framework::OpKernel { - public: - void NMSFast(const std::vector>& cls_dets, - const T nms_threshold, - const T eta, - std::vector* selected_indices) const { - int64_t num_boxes = cls_dets.size(); - std::vector> sorted_indices; - for (int64_t i = 0; i < num_boxes; ++i) { - sorted_indices.push_back(std::make_pair(cls_dets[i][4], i)); - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices.begin(), - sorted_indices.end(), - SortScorePairDescend); - selected_indices->clear(); - T adaptive_threshold = nms_threshold; - - while (!sorted_indices.empty()) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (const auto kept_idx : *selected_indices) { - if (keep) { - T overlap = T(0.); - overlap = JaccardOverlap(cls_dets[idx], cls_dets[kept_idx], false); - keep = overlap <= adaptive_threshold; - } else { - break; - } - } - if (keep) { - selected_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && eta < 1 && adaptive_threshold > 0.5) { - adaptive_threshold *= eta; - } - } - } - - void DeltaScoreToPrediction( - const std::vector& bboxes_data, - const std::vector& anchors_data, - T im_height, - T im_width, - T im_scale, - int class_num, - const std::vector>& sorted_indices, - std::map>>* preds) const { - im_height = static_cast(round(im_height / im_scale)); - im_width = static_cast(round(im_width / im_scale)); - T zero(0); - int i = 0; - for (const auto& it : sorted_indices) { - T score = it.first; - int idx = it.second; - int a = idx / class_num; - int c = idx % class_num; - - int box_offset = a * 4; - T anchor_box_width = - anchors_data[box_offset + 2] - anchors_data[box_offset] + 1; - T anchor_box_height = - anchors_data[box_offset + 3] - anchors_data[box_offset + 1] + 1; - T anchor_box_center_x = anchors_data[box_offset] + anchor_box_width / 2; - T anchor_box_center_y = - anchors_data[box_offset + 1] + anchor_box_height / 2; - T target_box_center_x = 0, target_box_center_y = 0; - T target_box_width = 0, target_box_height = 0; - target_box_center_x = - bboxes_data[box_offset] * anchor_box_width + anchor_box_center_x; - target_box_center_y = - bboxes_data[box_offset + 1] * anchor_box_height + anchor_box_center_y; - target_box_width = - std::exp(bboxes_data[box_offset + 2]) * anchor_box_width; - target_box_height = - std::exp(bboxes_data[box_offset + 3]) * anchor_box_height; - T pred_box_xmin = target_box_center_x - target_box_width / 2; - T pred_box_ymin = target_box_center_y - target_box_height / 2; - T pred_box_xmax = target_box_center_x + target_box_width / 2 - 1; - T pred_box_ymax = target_box_center_y + target_box_height / 2 - 1; - pred_box_xmin = pred_box_xmin / im_scale; - pred_box_ymin = pred_box_ymin / im_scale; - pred_box_xmax = pred_box_xmax / im_scale; - pred_box_ymax = pred_box_ymax / im_scale; - - pred_box_xmin = std::max(std::min(pred_box_xmin, im_width - 1), zero); - pred_box_ymin = std::max(std::min(pred_box_ymin, im_height - 1), zero); - pred_box_xmax = std::max(std::min(pred_box_xmax, im_width - 1), zero); - pred_box_ymax = std::max(std::min(pred_box_ymax, im_height - 1), zero); - - std::vector one_pred; - one_pred.push_back(pred_box_xmin); - one_pred.push_back(pred_box_ymin); - one_pred.push_back(pred_box_xmax); - one_pred.push_back(pred_box_ymax); - one_pred.push_back(score); - (*preds)[c].push_back(one_pred); - i++; - } - } - - void MultiClassNMS(const std::map>>& preds, - int class_num, - const int keep_top_k, - const T nms_threshold, - const T nms_eta, - std::vector>* nmsed_out, - int* num_nmsed_out) const { - std::map> indices; - int num_det = 0; - for (int c = 0; c < class_num; ++c) { - if (static_cast(preds.count(c))) { - const std::vector> cls_dets = preds.at(c); - NMSFast(cls_dets, nms_threshold, nms_eta, &(indices[c])); - num_det += static_cast(indices[c].size()); - } - } - - std::vector>> score_index_pairs; - for (const auto& it : indices) { - int label = it.first; - const std::vector& label_indices = it.second; - for (auto idx : label_indices) { - score_index_pairs.push_back(std::make_pair(preds.at(label)[idx][4], - std::make_pair(label, idx))); - } - } - // Keep top k results per image. - std::stable_sort(score_index_pairs.begin(), - score_index_pairs.end(), - SortScoreTwoPairDescend); - if (num_det > keep_top_k) { - score_index_pairs.resize(keep_top_k); - } - - // Store the new indices. - std::map> new_indices; - for (const auto& it : score_index_pairs) { - int label = it.second.first; - int idx = it.second.second; - std::vector one_pred; - one_pred.push_back(label); - one_pred.push_back(preds.at(label)[idx][4]); - one_pred.push_back(preds.at(label)[idx][0]); - one_pred.push_back(preds.at(label)[idx][1]); - one_pred.push_back(preds.at(label)[idx][2]); - one_pred.push_back(preds.at(label)[idx][3]); - nmsed_out->push_back(one_pred); - } - - *num_nmsed_out = (num_det > keep_top_k ? keep_top_k : num_det); - } - - void RetinanetDetectionOutput(const framework::ExecutionContext& ctx, - const std::vector& scores, - const std::vector& bboxes, - const std::vector& anchors, - const phi::DenseTensor& im_info, - std::vector>* nmsed_out, - int* num_nmsed_out) const { - int64_t nms_top_k = ctx.Attr("nms_top_k"); - int64_t keep_top_k = ctx.Attr("keep_top_k"); - T nms_threshold = static_cast(ctx.Attr("nms_threshold")); - T nms_eta = static_cast(ctx.Attr("nms_eta")); - T score_threshold = static_cast(ctx.Attr("score_threshold")); - - int64_t class_num = scores[0].dims()[1]; - std::map>> preds; - for (size_t l = 0; l < scores.size(); ++l) { - // Fetch per level score - phi::DenseTensor scores_per_level = scores[l]; - // Fetch per level bbox - phi::DenseTensor bboxes_per_level = bboxes[l]; - // Fetch per level anchor - phi::DenseTensor anchors_per_level = anchors[l]; - - int64_t scores_num = scores_per_level.numel(); - int64_t bboxes_num = bboxes_per_level.numel(); - std::vector scores_data(scores_num); - std::vector bboxes_data(bboxes_num); - std::vector anchors_data(bboxes_num); - std::copy_n(scores_per_level.data(), scores_num, scores_data.begin()); - std::copy_n(bboxes_per_level.data(), bboxes_num, bboxes_data.begin()); - std::copy_n( - anchors_per_level.data(), bboxes_num, anchors_data.begin()); - std::vector> sorted_indices; - - // For the highest level, we take the threshold 0.0 - T threshold = (l < (scores.size() - 1) ? score_threshold : 0.0); - GetMaxScoreIndex(scores_data, threshold, nms_top_k, &sorted_indices); - auto* im_info_data = im_info.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - DeltaScoreToPrediction(bboxes_data, - anchors_data, - im_height, - im_width, - im_scale, - class_num, - sorted_indices, - &preds); - } - - MultiClassNMS(preds, - class_num, - keep_top_k, - nms_threshold, - nms_eta, - nmsed_out, - num_nmsed_out); - } - - void MultiClassOutput(const platform::DeviceContext& ctx, - const std::vector>& nmsed_out, - phi::DenseTensor* outs) const { - auto* odata = outs->data(); - int count = 0; - int64_t out_dim = 6; - for (size_t i = 0; i < nmsed_out.size(); ++i) { - odata[count * out_dim] = nmsed_out[i][0] + 1; // label - odata[count * out_dim + 1] = nmsed_out[i][1]; // score - odata[count * out_dim + 2] = nmsed_out[i][2]; // xmin - odata[count * out_dim + 3] = nmsed_out[i][3]; // xmin - odata[count * out_dim + 4] = nmsed_out[i][4]; // xmin - odata[count * out_dim + 5] = nmsed_out[i][5]; // xmin - count++; - } - } - - void Compute(const framework::ExecutionContext& ctx) const override { - auto boxes = ctx.MultiInput("BBoxes"); - auto scores = ctx.MultiInput("Scores"); - auto anchors = ctx.MultiInput("Anchors"); - auto* im_info = ctx.Input("ImInfo"); - auto* outs = ctx.Output("Out"); - - std::vector boxes_list(boxes.size()); - std::vector scores_list(scores.size()); - std::vector anchors_list(anchors.size()); - for (size_t j = 0; j < boxes_list.size(); ++j) { - boxes_list[j] = *boxes[j]; - scores_list[j] = *scores[j]; - anchors_list[j] = *anchors[j]; - } - auto score_dims = scores_list[0].dims(); - int64_t batch_size = score_dims[0]; - auto box_dims = boxes_list[0].dims(); - int64_t box_dim = box_dims[2]; - int64_t out_dim = box_dim + 2; - - auto& dev_ctx = ctx.template device_context(); - - std::vector>> all_nmsed_out; - std::vector batch_starts = {0}; - for (int i = 0; i < batch_size; ++i) { - int num_nmsed_out = 0; - std::vector box_per_batch_list(boxes_list.size()); - std::vector score_per_batch_list(scores_list.size()); - for (size_t j = 0; j < boxes_list.size(); ++j) { - const auto& score_dims = scores_list[j].dims(); - score_per_batch_list[j] = scores_list[j].Slice(i, i + 1); - score_per_batch_list[j].Resize({score_dims[1], score_dims[2]}); - box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1); - box_per_batch_list[j].Resize({score_dims[1], box_dim}); - } - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - - std::vector> nmsed_out; - RetinanetDetectionOutput(ctx, - score_per_batch_list, - box_per_batch_list, - anchors_list, - im_info_slice, - &nmsed_out, - &num_nmsed_out); - all_nmsed_out.push_back(nmsed_out); - batch_starts.push_back(batch_starts.back() + num_nmsed_out); - } - - int num_kept = static_cast(batch_starts.back()); - if (num_kept == 0) { - outs->Resize({0, out_dim}); - } else { - outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); - for (int i = 0; i < batch_size; ++i) { - int64_t s = static_cast(batch_starts[i]); - int64_t e = static_cast(batch_starts[i + 1]); - if (e > s) { - phi::DenseTensor out = outs->Slice(s, e); - MultiClassOutput(dev_ctx, all_nmsed_out[i], &out); - } - } - } - - framework::LoD lod; - lod.emplace_back(batch_starts); - - outs->set_lod(lod); - } -}; - -class RetinanetDetectionOutputOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("BBoxes", - "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D phi::DenseTensor with shape [N, Mi, 4] " - "represents the " - "predicted locations of Mi bounding boxes, N is the batch size. " - "Mi is the number of bounding boxes from i-th FPN level. Each " - "bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax].") - .AsDuplicable(); - AddInput("Scores", - "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D phi::DenseTensor with shape [N, Mi, C] " - "represents the " - "predicted confidence from its FPN level. N is the batch size, " - "C is the class number (excluding background), Mi is the number " - "of bounding boxes from i-th FPN level. For each bounding box, " - "there are total C scores.") - .AsDuplicable(); - AddInput( - "Anchors", - "(List) A list of tensors from multiple FPN levels. Each" - "element is a 2-D phi::DenseTensor with shape [Mi, 4] represents the " - "locations of Mi anchor boxes from i-th FPN level. Each " - "bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax].") - .AsDuplicable(); - AddInput("ImInfo", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] " - "represents the " - "image information. N is the batch size, each image information " - "includes height, width and scale."); - AddAttr("score_threshold", - "(float) " - "Threshold to filter out bounding boxes with a confidence " - "score."); - AddAttr("nms_top_k", - "(int64_t) " - "Maximum number of detections per FPN layer to be kept " - "according to the confidence before NMS."); - AddAttr("nms_threshold", - "(float) " - "The threshold to be used in NMS."); - AddAttr("nms_eta", - "(float) " - "The parameter for adaptive NMS."); - AddAttr( - "keep_top_k", - "(int64_t) " - "Number of total bounding boxes to be kept per image after NMS " - "step."); - AddOutput("Out", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] " - "represents the " - "detections. Each row has 6 values: " - "[label, confidence, xmin, ymin, xmax, ymax]" - "No is the total number of detections in this mini-batch." - "For each instance, " - "the offsets in first dimension are called LoD, the number of " - "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " - "no detected bbox."); - AddComment(R"DOC( -This operator is to decode boxes and scores from each FPN layer and do -multi-class non maximum suppression (NMS) on merged predictions. - -Top-scoring predictions per FPN layer are decoded with the anchor -information. This operator greedily selects a subset of detection bounding -boxes from each FPN layer that have high scores larger than score_threshold, -if providing this threshold, then selects the largest nms_top_k confidences -scores per FPN layer, if nms_top_k is larger than -1. -The decoding schema is described below: - -ox = (pw * pxv * tx * + px) - tw / 2 - -oy = (ph * pyv * ty * + py) - th / 2 - -ow = exp(pwv * tw) * pw + tw / 2 - -oh = exp(phv * th) * ph + th / 2 - -where `tx`, `ty`, `tw`, `th` denote the predicted box's center coordinates, width -and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the -anchor's center coordinates, width and height. `pxv`, `pyv`, `pwv`, -`phv` denote the variance of the anchor box and `ox`, `oy`, `ow`, `oh` denote the -decoded coordinates, width and height. - -Then the top decoded prediction from all levels are merged followed by NMS. -In the NMS step, this operator prunes away boxes that have high IOU -(intersection over union) overlap with already selected boxes by adaptive -threshold NMS based on parameters of nms_threshold and nms_eta. -After NMS step, at most keep_top_k number of total bounding boxes are to be kept -per image if keep_top_k is larger than -1. -This operator support multi-class and batched inputs. It applying NMS -independently for each class. The outputs is a 2-D LoDTensor, for each -image, the offsets in first dimension of phi::DenseTensor are called LoD, the number -of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, -means there is no detected bounding box for this image. If there is no detected boxes -for all images, all the elements in LoD are set to 0, and the output tensor is -empty (None). -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - retinanet_detection_output, - ops::RetinanetDetectionOutputOp, - ops::RetinanetDetectionOutputOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(retinanet_detection_output, - CPU, - ALL_LAYOUT, - ops::RetinanetDetectionOutputKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc deleted file mode 100644 index 81e8d0d3edf7e..0000000000000 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ /dev/null @@ -1,1262 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -using EigenMatrix = framework::EigenMatrix; - -class RpnTargetAssignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Anchor"), "Input", "Anchor", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("GtBoxes"), "Input", "GtBoxes", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("IsCrowd"), "Input", "IsCrowd", "rpn_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("ImInfo"), "Input", "ImInfo", "rpn_target_assign"); - - OP_INOUT_CHECK(ctx->HasOutput("LocationIndex"), - "Output", - "LocationIndex", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ScoreIndex"), - "Output", - "ScoreIndex", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetLabel"), - "Output", - "TargetLabel", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetBBox"), - "Output", - "TargetBBox", - "rpn_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("BBoxInsideWeight"), - "Output", - "BBoxInsideWeight", - "rpn_target_assign"); - - auto anchor_dims = ctx->GetInputDim("Anchor"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - PADDLE_ENFORCE_EQ(anchor_dims.size(), - 2, - platform::errors::InvalidArgument( - "The dimensions size of Input(Anchor) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - anchor_dims.size(), - anchor_dims)); - PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), - 2, - platform::errors::InvalidArgument( - "The dimensions size of Input(GtBoxes) must be 2. " - "But received dimensions size=[%d], dimensions=[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ(im_info_dims.size(), - 2, - platform::errors::InvalidArgument( - "The dimensions size of Input(ImInfo) must be 2. But " - "received dimensions size=[%d], dimensions=[%s].", - im_info_dims.size(), - im_info_dims)); - - ctx->SetOutputDim("LocationIndex", {-1}); - ctx->SetOutputDim("ScoreIndex", {-1}); - ctx->SetOutputDim("TargetLabel", {-1, 1}); - ctx->SetOutputDim("TargetBBox", {-1, 4}); - ctx->SetOutputDim("BBoxInsideWeight", {-1, 4}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Anchor"), - platform::CPUPlace()); - } -}; - -template -void AppendRpns(phi::DenseTensor* out, - int64_t offset, - phi::DenseTensor* to_add) { - auto* out_data = out->data(); - auto* to_add_data = to_add->data(); - memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); -} - -template -std::vector FilterStraddleAnchor( - const phi::CPUContext& context, - const phi::DenseTensor* anchor, - const float rpn_straddle_thresh, - T im_height, - T im_width) { - std::vector inds_inside; - int anchor_num = static_cast(anchor->dims()[0]); - auto* anchor_data = anchor->data(); - if (rpn_straddle_thresh >= 0) { - int index = 0; - for (int i = 0; i < anchor_num; ++i) { - index = i * 4; - if ((anchor_data[index + 0] >= -rpn_straddle_thresh) && - (anchor_data[index + 1] >= -rpn_straddle_thresh) && - (anchor_data[index + 2] < im_width + rpn_straddle_thresh) && - (anchor_data[index + 3] < im_height + rpn_straddle_thresh)) { - inds_inside.emplace_back(i); - } - } - } else { - for (int i = 0; i < anchor_num; ++i) { - inds_inside.emplace_back(i); - } - } - int inside_num = static_cast(inds_inside.size()); - phi::DenseTensor inds_inside_t; - int* inds_inside_data = - inds_inside_t.mutable_data({inside_num}, context.GetPlace()); - std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data); - phi::DenseTensor inside_anchor_t; - T* inside_anchor_data = - inside_anchor_t.mutable_data({inside_num, 4}, context.GetPlace()); - Gather( - anchor->data(), 4, inds_inside_data, inside_num, inside_anchor_data); - std::vector res; - res.emplace_back(inds_inside_t); - res.emplace_back(inside_anchor_t); - return res; -} - -template -phi::DenseTensor FilterCrowdGt(const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* is_crowd) { - int gt_num = static_cast(gt_boxes->dims()[0]); - std::vector not_crowd_inds; - auto* is_crowd_data = is_crowd->data(); - for (int i = 0; i < gt_num; ++i) { - if (is_crowd_data[i] == 0) { - not_crowd_inds.emplace_back(i); - } - } - int ncrowd_num = static_cast(not_crowd_inds.size()); - phi::DenseTensor ncrowd_gt_boxes; - T* ncrowd_gt_boxes_data = - ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); - Gather(gt_boxes->data(), - 4, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_boxes_data); - return ncrowd_gt_boxes; -} - -void ReservoirSampling(const int num, - std::vector* inds, - std::minstd_rand engine, - bool use_random) { - std::uniform_real_distribution uniform(0, 1); - int len = static_cast(inds->size()); - if (len > num) { - if (use_random) { - for (int i = num; i < len; ++i) { - int rng_ind = std::floor(uniform(engine) * i); // NOLINT - if (rng_ind < num) - std::iter_swap(inds->begin() + rng_ind, inds->begin() + i); - } - } - inds->resize(num); - } -} - -template -void ScoreAssign(const T* anchor_by_gt_overlap_data, - const phi::DenseTensor& anchor_to_gt_max, - const phi::DenseTensor& gt_to_anchor_max, - const int rpn_batch_size_per_im, - const float rpn_fg_fraction, - const float rpn_positive_overlap, - const float rpn_negative_overlap, - std::vector* fg_inds, - std::vector* bg_inds, - std::vector* tgt_lbl, - std::vector* fg_fake, - std::vector* bbox_inside_weight, - std::minstd_rand engine, - bool use_random) { - float epsilon = 0.00001; - int anchor_num = static_cast(anchor_to_gt_max.dims()[0]); - int gt_num = static_cast(gt_to_anchor_max.dims()[0]); - std::vector target_label(anchor_num, -1); - std::vector fg_inds_fake; - std::vector bg_inds_fake; - const T* anchor_to_gt_max_data = anchor_to_gt_max.data(); - const T* gt_to_anchor_max_data = gt_to_anchor_max.data(); - // TODO(buxingyuan): Match with Detectron now - // but it seems here is a bug in two directions assignment - // in which the later one may overwrites the former one. - for (int64_t i = 0; i < anchor_num; ++i) { - bool is_anchors_with_max_overlap = false; - for (int64_t j = 0; j < gt_num; ++j) { - T value = anchor_by_gt_overlap_data[i * gt_num + j]; - T diff = std::abs(value - gt_to_anchor_max_data[j]); - if (diff < epsilon) { - is_anchors_with_max_overlap = true; - break; - } - } - bool is_anchor_great_than_thresh = - (anchor_to_gt_max_data[i] >= rpn_positive_overlap); - if (is_anchors_with_max_overlap || is_anchor_great_than_thresh) { - fg_inds_fake.push_back(i); // NOLINT - } - } - - // Reservoir Sampling - int fg_num = 0; - if (rpn_fg_fraction > 0 && rpn_batch_size_per_im > 0) { - fg_num = - static_cast(rpn_fg_fraction * rpn_batch_size_per_im); // NOLINT - ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random); - } else { - fg_num = static_cast(fg_inds_fake.size()); - } - int fg_fake_num = static_cast(fg_inds_fake.size()); - for (int64_t i = 0; i < fg_fake_num; ++i) { - target_label[fg_inds_fake[i]] = 1; - } - - for (int64_t i = 0; i < anchor_num; ++i) { - if (anchor_to_gt_max_data[i] < rpn_negative_overlap) { - bg_inds_fake.push_back(i); // NOLINT - } - } - int bg_num = 0; - if (rpn_fg_fraction > 0 && rpn_batch_size_per_im > 0) { - bg_num = rpn_batch_size_per_im - fg_fake_num; - ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random); - bg_num = static_cast(bg_inds_fake.size()); - } else { - bg_num = static_cast(bg_inds_fake.size()); - } - - int fake_num = 0; - for (int64_t i = 0; i < bg_num; ++i) { - // fg fake found - if (target_label[bg_inds_fake[i]] == 1) { - fake_num++; - fg_fake->emplace_back(fg_inds_fake[0]); - for (int j = 0; j < 4; ++j) { - bbox_inside_weight->emplace_back(T(0.)); - } - } - target_label[bg_inds_fake[i]] = 0; - } - - for (int64_t i = 0; i < (fg_fake_num - fake_num) * 4; ++i) { - bbox_inside_weight->emplace_back(T(1.)); - } - - for (int64_t i = 0; i < anchor_num; ++i) { - if (target_label[i] == 1) { - fg_inds->emplace_back(i); - fg_fake->emplace_back(i); - } - if (target_label[i] == 0) bg_inds->emplace_back(i); - } - fg_num = static_cast(fg_inds->size()); - bg_num = static_cast(bg_inds->size()); - - tgt_lbl->resize(fg_num + bg_num, 0); - std::vector fg_lbl(fg_num, 1); - std::vector bg_lbl(bg_num, 0); - std::copy(fg_lbl.begin(), fg_lbl.end(), tgt_lbl->data()); - std::copy(bg_lbl.begin(), bg_lbl.end(), tgt_lbl->data() + fg_num); -} - -template -std::vector SampleRpnFgBgGt( - const phi::CPUContext& ctx, - const phi::DenseTensor& anchor_by_gt_overlap, - const int rpn_batch_size_per_im, - const float rpn_positive_overlap, - const float rpn_negative_overlap, - const float rpn_fg_fraction, - std::minstd_rand engine, - bool use_random) { - auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); - int anchor_num = static_cast(anchor_by_gt_overlap.dims()[0]); - int gt_num = static_cast(anchor_by_gt_overlap.dims()[1]); - - std::vector fg_inds; - std::vector bg_inds; - std::vector gt_inds; - std::vector tgt_lbl; - std::vector fg_fake; - std::vector bbox_inside_weight; - // Calculate the max IoU between anchors and gt boxes - // Map from anchor to gt box that has highest overlap - auto place = ctx.GetPlace(); - phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; - anchor_to_gt_max.mutable_data({anchor_num}, place); - int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); - gt_to_anchor_max.mutable_data({gt_num}, place); - - auto anchor_by_gt_overlap_et = - framework::EigenMatrix::From(anchor_by_gt_overlap); - auto anchor_to_gt_max_et = - framework::EigenVector::Flatten(anchor_to_gt_max); - auto gt_to_anchor_max_et = - framework::EigenVector::Flatten(gt_to_anchor_max); - auto anchor_to_gt_argmax_et = - framework::EigenVector::Flatten(anchor_to_gt_argmax); - anchor_to_gt_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(1)); - anchor_to_gt_argmax_et = - anchor_by_gt_overlap_et.argmax(1).template cast(); - gt_to_anchor_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(0)); - - // Follow the Faster RCNN's implementation - ScoreAssign(anchor_by_gt_overlap_data, - anchor_to_gt_max, - gt_to_anchor_max, - rpn_batch_size_per_im, - rpn_fg_fraction, - rpn_positive_overlap, - rpn_negative_overlap, - &fg_inds, - &bg_inds, - &tgt_lbl, - &fg_fake, - &bbox_inside_weight, - engine, - use_random); - - int fg_num = static_cast(fg_inds.size()); - int bg_num = static_cast(bg_inds.size()); - int fg_fake_num = static_cast(fg_fake.size()); - gt_inds.reserve(fg_fake_num); - for (int i = 0; i < fg_fake_num; ++i) { - gt_inds.emplace_back(argmax[fg_fake[i]]); - } - phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, - bbox_inside_weight_t; - int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); - int* score_index_data = - score_index_t.mutable_data({fg_num + bg_num}, place); - int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); - int* gt_inds_data = gt_inds_t.mutable_data({fg_fake_num}, place); - T* bbox_inside_weight_data = - bbox_inside_weight_t.mutable_data({fg_fake_num, 4}, place); - std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data); - std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); - std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); - std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); - std::copy(bbox_inside_weight.begin(), - bbox_inside_weight.end(), - bbox_inside_weight_data); - std::vector loc_score_tgtlbl_gt; - loc_score_tgtlbl_gt.emplace_back(loc_index_t); - loc_score_tgtlbl_gt.emplace_back(score_index_t); - loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); - loc_score_tgtlbl_gt.emplace_back(gt_inds_t); - loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t); - - return loc_score_tgtlbl_gt; -} - -template -class RpnTargetAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 - auto* gt_boxes = context.Input("GtBoxes"); - auto* is_crowd = context.Input("IsCrowd"); - auto* im_info = context.Input("ImInfo"); - - auto* loc_index = context.Output("LocationIndex"); - auto* score_index = context.Output("ScoreIndex"); - auto* tgt_bbox = context.Output("TargetBBox"); - auto* tgt_lbl = context.Output("TargetLabel"); - auto* bbox_inside_weight = - context.Output("BBoxInsideWeight"); - - PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), - 1UL, - platform::errors::InvalidArgument( - "RpnTargetAssignOp gt_boxes needs 1 level of LoD. " - "But received level of LoD is [%d], LoD is [%s].", - gt_boxes->lod().size(), - gt_boxes->lod())); - PADDLE_ENFORCE_EQ(is_crowd->lod().size(), - 1UL, - platform::errors::InvalidArgument( - "RpnTargetAssignOp is_crowd needs 1 level of LoD. " - "But received level of LoD is [%d], LoD is [%s].", - is_crowd->lod().size(), - is_crowd->lod())); - int64_t anchor_num = static_cast(anchor->dims()[0]); - int64_t batch_num = static_cast(gt_boxes->lod().back().size() - 1); - - int rpn_batch_size_per_im = context.Attr("rpn_batch_size_per_im"); - float rpn_straddle_thresh = context.Attr("rpn_straddle_thresh"); - float rpn_positive_overlap = context.Attr("rpn_positive_overlap"); - float rpn_negative_overlap = context.Attr("rpn_negative_overlap"); - float rpn_fg_fraction = context.Attr("rpn_fg_fraction"); - bool use_random = context.Attr("use_random"); - - int64_t max_num = batch_num * rpn_batch_size_per_im; - auto place = context.GetPlace(); - - loc_index->mutable_data({max_num}, place); - score_index->mutable_data({max_num}, place); - tgt_bbox->mutable_data({max_num, 4}, place); - tgt_lbl->mutable_data({max_num, 1}, place); - bbox_inside_weight->mutable_data({max_num, 4}, place); - auto& dev_ctx = context.device_context(); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod_loc, loc_score; - std::vector lod0_loc(1, 0); - std::vector lod0_score(1, 0); - - int total_loc_num = 0; - int total_score_num = 0; - auto gt_boxes_lod = gt_boxes->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - for (int i = 0; i < batch_num; ++i) { - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - auto* im_info_data = im_info_slice.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - - // Filter straddle anchor - std::vector filter_output = FilterStraddleAnchor( - dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width); - phi::DenseTensor inds_inside = filter_output[0]; - phi::DenseTensor inside_anchor = filter_output[1]; - - // Filter crowd gt - phi::DenseTensor ncrowd_gt_boxes = - FilterCrowdGt(dev_ctx, >_boxes_slice, &is_crowd_slice); - auto ncrowd_gt_boxes_et = - framework::EigenTensor::From(ncrowd_gt_boxes); - ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - - phi::DenseTensor anchor_by_gt_overlap; - anchor_by_gt_overlap.mutable_data( - {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); - BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); - - auto loc_score_tgtlbl_gt = SampleRpnFgBgGt(dev_ctx, - anchor_by_gt_overlap, - rpn_batch_size_per_im, - rpn_positive_overlap, - rpn_negative_overlap, - rpn_fg_fraction, - engine, - use_random); - - phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; - phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; - - int loc_num = static_cast(sampled_loc_index.dims()[0]); - int score_num = static_cast(sampled_score_index.dims()[0]); - // unmap to all anchor - phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; - sampled_loc_index_unmap.mutable_data({loc_num}, place); - sampled_score_index_unmap.mutable_data({score_num}, place); - Gather(inds_inside.data(), - 1, - sampled_loc_index.data(), - loc_num, - sampled_loc_index_unmap.data()); - Gather(inds_inside.data(), - 1, - sampled_score_index.data(), - score_num, - sampled_score_index_unmap.data()); - - // get target bbox deltas - phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; - auto* sampled_anchor_data = - sampled_anchor.mutable_data({loc_num, 4}, place); - auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); - Gather(anchor->data(), - 4, - sampled_loc_index_unmap.data(), - loc_num, - sampled_anchor_data); - Gather(ncrowd_gt_boxes.data(), - 4, - sampled_gt_index.data(), - loc_num, - sampled_gt_data); - sampled_tgt_bbox.mutable_data({loc_num, 4}, place); - BoxToDelta(loc_num, - sampled_anchor, - sampled_gt, - nullptr, - false, - &sampled_tgt_bbox); - - // Add anchor offset - int anchor_offset = static_cast(i * anchor_num); - auto sampled_loc_index_unmap_et = - framework::EigenTensor::From(sampled_loc_index_unmap); - sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset; - auto sampled_score_index_unmap_et = - framework::EigenTensor::From(sampled_score_index_unmap); - sampled_score_index_unmap_et = - sampled_score_index_unmap_et + anchor_offset; - AppendRpns(loc_index, total_loc_num, &sampled_loc_index_unmap); - AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); - AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); - AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); - AppendRpns( - bbox_inside_weight, total_loc_num * 4, &sampled_bbox_inside_weight); - total_loc_num += loc_num; - - total_score_num += score_num; - lod0_loc.emplace_back(total_loc_num); - lod0_score.emplace_back(total_score_num); - } - - PADDLE_ENFORCE_LE( - total_loc_num, - max_num, - platform::errors::InvalidArgument( - "The number of sampled bboxes should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "bboxes is :%d.", - max_num, - total_loc_num)); - PADDLE_ENFORCE_LE( - total_score_num, - max_num, - platform::errors::InvalidArgument( - "The number of sampled scores should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "scores is :%d.", - max_num, - total_score_num)); - - lod_loc.emplace_back(lod0_loc); - loc_score.emplace_back(lod0_score); - loc_index->set_lod(lod_loc); - score_index->set_lod(loc_score); - tgt_bbox->set_lod(lod_loc); - tgt_lbl->set_lod(loc_score); - bbox_inside_weight->set_lod(lod_loc); - loc_index->Resize({total_loc_num}); - score_index->Resize({total_score_num}); - tgt_bbox->Resize({total_loc_num, 4}); - tgt_lbl->Resize({total_score_num, 1}); - bbox_inside_weight->Resize({total_loc_num, 4}); - } -}; - -class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Anchor", - "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); - AddInput("GtBoxes", - "(phi::DenseTensor) input ground-truth bbox with shape [K, 4]."); - AddInput("IsCrowd", - "(phi::DenseTensor) input which indicates ground-truth is crowd."); - AddInput("ImInfo", - "(phi::DenseTensor) input image information with shape [N, 3]. " - "N is the batch size, each image information includes height, " - "width and scale."); - AddAttr("rpn_batch_size_per_im", - "Total number of RPN examples per image.") - .SetDefault(256); - AddAttr( - "rpn_straddle_thresh", - "Remove RPN anchors that go outside the image by straddle_thresh " - "pixels, " - "Set to -1 or a large value, e.g. 100000, to disable pruning anchors."); - AddAttr( - "rpn_positive_overlap", - "Minimum overlap required between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a positive example.") - .SetDefault(0.7); - AddAttr( - "rpn_negative_overlap", - "Maximum overlap allowed between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a negative examples.") - .SetDefault(0.3); - AddAttr( - "rpn_fg_fraction", - "Target fraction of RoI minibatch that " - "is labeled foreground (i.e. class > 0), 0-th class is background.") - .SetDefault(0.25); - AddAttr("use_random", - "A flag indicating whether to use a ReservoirSampling. " - "NOTE: DO NOT set this flag to false in training. " - "Setting this flag to false is only useful in unittest.") - .SetDefault(true); - AddOutput( - "LocationIndex", - "(Tensor), The indexes of foreground anchors in all RPN anchors, the " - "shape of the LocationIndex is [F], F depends on the value of input " - "tensor and attributes."); - AddOutput( - "ScoreIndex", - "(Tensor), The indexes of foreground and background anchors in all " - "RPN anchors(The rest anchors are ignored). The shape of the " - "ScoreIndex is [F + B], F and B are sampled foreground and background " - " number."); - AddOutput("TargetBBox", - "(Tensor), The target bbox deltas with shape " - "[F, 4], F is the sampled foreground number."); - AddOutput( - "TargetLabel", - "(Tensor), The target labels of each anchor with shape " - "[F + B, 1], F and B are sampled foreground and background number."); - AddOutput("BBoxInsideWeight", - "(Tensor), The bbox inside weight with shape " - "[F, 4], F is the sampled foreground number."); - AddComment(R"DOC( -This operator can be, for a given set of ground truth bboxes and the -anchors, to assign classification and regression targets to each prediction. -The ScoreIndex and LocationIndex will be generated according to the anchor-groundtruth IOU. -The rest anchors would not contibute to the RPN training loss - -ScoreIndex is composed of foreground anchor indexes(positive labels) and -background anchor indexes(negative labels). LocationIndex is exactly same -as the foreground anchor indexes since we can not assign regression target to -the background anchors. - -The classification targets(TargetLabel) is a binary class label (of being -an object or not). Following the paper of Faster-RCNN, the positive labels -are two kinds of anchors: (i) the anchor/anchors with the highest IoU -overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap -higher than rpn_positive_overlap(0.7) with any ground-truth box. Note that -a single ground-truth box may assign positive labels to multiple anchors. -A non-positive anchor is when its IoU ratio is lower than rpn_negative_overlap -(0.3) for all ground-truth boxes. Anchors that are neither positive nor -negative do not contribute to the training objective. - -)DOC"); - } -}; - -class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Anchor", - "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); - AddInput("GtBoxes", - "(phi::DenseTensor) input ground-truth bbox with shape [K, 4]."); - AddInput("GtLabels", - "(phi::DenseTensor) input ground-truth label with shape [K, 1]."); - AddInput("IsCrowd", - "(phi::DenseTensor) input which indicates ground-truth is crowd."); - AddInput("ImInfo", - "(phi::DenseTensor) input image information with shape [N, 3]. " - "N is the batch size, each image information includes height, " - "width and scale."); - AddAttr( - "positive_overlap", - "Minimum overlap required between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a positive example.") - .SetDefault(0.5); - AddAttr( - "negative_overlap", - "Maximum overlap allowed between an anchor and ground-truth " - "box for the (anchor, gt box) pair to be a negative examples.") - .SetDefault(0.4); - AddOutput( - "LocationIndex", - "(Tensor), The indexes of foreground anchors in all anchors, the " - "shape of the LocationIndex is [F], F depends on the value of input " - "tensor and attributes."); - AddOutput( - "ScoreIndex", - "(Tensor), The indexes of foreground and background anchors in all " - "RPN anchors(The rest anchors are ignored). The shape of the " - "ScoreIndex is [F + B], F and B are foreground and background " - " number."); - AddOutput("TargetBBox", - "(Tensor), The target bbox deltas with shape " - "[F, 4], F is the foreground number."); - AddOutput("TargetLabel", - "(Tensor), The target labels of each anchor with shape " - "[F + B, 1], F and B are foreground and background number."); - AddOutput("BBoxInsideWeight", - "(Tensor), The bbox inside weight with shape " - "[F, 4], F is the foreground number."); - AddOutput("ForegroundNumber", - "(Tensor), The foreground number. " - "[1, 1]."); - AddComment(R"DOC( - This layer can be, for given the Intersection-over-Union (IoU) overlap - between anchors and ground truth boxes, to assign classification and - regression targets to each anchor, these target labels are used for - train retinanet. - - Every anchor is assigned with a length C one-hot vector of - classification targets, and a 4-vector of box regression targets, - where C is the class number. The assignment rules are as followed: - - 1. Anchors are assigned to ground-truth boxes when: (i) it has the highest - IoU overlap with a ground-truth box, or (ii) it has an IoU overlap higher - than positive_overlap(0.5) with any ground-truth box. - - 2. Anchors are assigned to background when its IoU ratio is lower than - negative_overlap (0.4) for all ground-truth boxes. - - When an anchor is assigned with a ground-truth box which is the i-th category, - the i-th entry in its C vector of targets is set to 1 and all other entries - are set to 0. When an anchor is assigned with background, all entries are set - to 0. Anchors that are not assigned do not contribute to the training - objective. The regression targets are the encoded ground-truth boxes - associated with the assigned anchors. - -)DOC"); - } -}; - -class RetinanetTargetAssignOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Anchor"), "Input", "Anchor", "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("GtBoxes"), - "Input", - "GtBoxes", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("GtLabels"), - "Input", - "GtLabels", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasInput("IsCrowd"), - "Input", - "IsCrowd", - "retinanet_target_assign"); - OP_INOUT_CHECK( - ctx->HasInput("ImInfo"), "Input", "ImInfo", "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("LocationIndex"), - "Output", - "LocationIndex", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ScoreIndex"), - "Output", - "ScoreIndex", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetLabel"), - "Output", - "TargetLabel", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("TargetBBox"), - "Output", - "TargetBBox", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("BBoxInsideWeight"), - "Output", - "BBoxInsideWeight", - "retinanet_target_assign"); - OP_INOUT_CHECK(ctx->HasOutput("ForegroundNumber"), - "Output", - "ForegroundNumber", - "retinanet_target_assign"); - - auto anchor_dims = ctx->GetInputDim("Anchor"); - auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto gt_labels_dims = ctx->GetInputDim("GtLabels"); - auto im_info_dims = ctx->GetInputDim("ImInfo"); - - PADDLE_ENFORCE_EQ( - anchor_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(Anchor) should be 2, but received Anchor " - "rank is :%d, Anchor shape is:[%s].", - anchor_dims.size(), - anchor_dims)); - PADDLE_ENFORCE_EQ( - gt_boxes_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(GtBoxes) should be 2, but received GtBoxes " - "rank is :%d, GtBoxes shape is:[%s].", - gt_boxes_dims.size(), - gt_boxes_dims)); - PADDLE_ENFORCE_EQ( - gt_labels_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(GtLabels) should be 2, but received GtLabels " - "rank is :%d, GtLabels shape is:[%s].", - gt_labels_dims.size(), - gt_labels_dims)); - PADDLE_ENFORCE_EQ( - im_info_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(ImInfo) should be 2, but received ImInfo " - "rank is :%d, ImInfo shape is:[%s].", - im_info_dims.size(), - im_info_dims)); - - ctx->SetOutputDim("LocationIndex", {gt_labels_dims[0]}); - ctx->SetOutputDim("ScoreIndex", {gt_labels_dims[0]}); - ctx->SetOutputDim("TargetBBox", {gt_labels_dims[0], 4}); - ctx->SetOutputDim("TargetLabel", {gt_labels_dims[0], 1}); - ctx->SetOutputDim("BBoxInsideWeight", {gt_labels_dims[0], 4}); - ctx->SetOutputDim("ForegroundNumber", {gt_labels_dims[0], 1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Anchor"), - platform::CPUPlace()); - } -}; - -template -std::vector FilterCrowdGtBoxLabel( - const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* gt_labels, - phi::DenseTensor* is_crowd) { - int gt_num = static_cast(gt_boxes->dims()[0]); - std::vector not_crowd_inds; - auto* is_crowd_data = is_crowd->data(); - for (int i = 0; i < gt_num; ++i) { - if (is_crowd_data[i] == 0) { - not_crowd_inds.emplace_back(i); - } - } - int ncrowd_num = static_cast(not_crowd_inds.size()); - phi::DenseTensor ncrowd_gt_boxes, ncrowd_gt_labels; - T* ncrowd_gt_boxes_data = - ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); - int* ncrowd_gt_labels_data = - ncrowd_gt_labels.mutable_data({ncrowd_num, 1}, context.GetPlace()); - Gather(gt_boxes->data(), - 4, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_boxes_data); - Gather(gt_labels->data(), - 1, - not_crowd_inds.data(), - ncrowd_num, - ncrowd_gt_labels_data); - std::vector res; - res.emplace_back(ncrowd_gt_boxes); - res.emplace_back(ncrowd_gt_labels); - return res; -} - -template -std::vector GetAllFgBgGt( - const phi::CPUContext& ctx, - const phi::DenseTensor& anchor_by_gt_overlap, - const phi::DenseTensor& ncrowd_gt_labels, - const float positive_overlap, - const float negative_overlap, - std::minstd_rand engine) { - auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); - int anchor_num = static_cast(anchor_by_gt_overlap.dims()[0]); - int gt_num = static_cast(anchor_by_gt_overlap.dims()[1]); - - std::vector fg_inds; - std::vector bg_inds; - std::vector gt_inds; - std::vector tgt_lbl; - std::vector fg_fake; - std::vector bbox_inside_weight; - // Calculate the max IoU between anchors and gt boxes - // Map from anchor to gt box that has highest overlap - auto place = ctx.GetPlace(); - phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; - anchor_to_gt_max.mutable_data({anchor_num}, place); - int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); - gt_to_anchor_max.mutable_data({gt_num}, place); - - auto anchor_by_gt_overlap_et = - framework::EigenMatrix::From(anchor_by_gt_overlap); - auto anchor_to_gt_max_et = - framework::EigenVector::Flatten(anchor_to_gt_max); - auto gt_to_anchor_max_et = - framework::EigenVector::Flatten(gt_to_anchor_max); - auto anchor_to_gt_argmax_et = - framework::EigenVector::Flatten(anchor_to_gt_argmax); - anchor_to_gt_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(1)); - anchor_to_gt_argmax_et = - anchor_by_gt_overlap_et.argmax(1).template cast(); - gt_to_anchor_max_et = - anchor_by_gt_overlap_et.maximum(Eigen::DSizes(0)); - - ScoreAssign(anchor_by_gt_overlap_data, - anchor_to_gt_max, - gt_to_anchor_max, - -1, - -1, - positive_overlap, - negative_overlap, - &fg_inds, - &bg_inds, - &tgt_lbl, - &fg_fake, - &bbox_inside_weight, - engine, - false); - const int* gt_labels_data = ncrowd_gt_labels.data(); - int64_t fg_num = static_cast(fg_inds.size()); - for (int64_t i = 0; i < fg_num; ++i) { - int gt_idx = argmax[fg_inds[i]]; - tgt_lbl[i] = gt_labels_data[gt_idx]; - } - - int bg_num = static_cast(bg_inds.size()); - int fg_fake_num = static_cast(fg_fake.size()); - gt_inds.reserve(fg_fake_num); - for (int i = 0; i < fg_fake_num; ++i) { - gt_inds.emplace_back(argmax[fg_fake[i]]); - } - - phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, - bbox_inside_weight_t; - phi::DenseTensor fg_num_t; - int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); - int* score_index_data = - score_index_t.mutable_data({fg_num + bg_num}, place); - int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); - int* gt_inds_data = gt_inds_t.mutable_data({fg_fake_num}, place); - int* fg_num_data = fg_num_t.mutable_data({1}, place); - T* bbox_inside_weight_data = - bbox_inside_weight_t.mutable_data({fg_fake_num, 4}, place); - std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data); - std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); - std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); - std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); - std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); - std::copy(bbox_inside_weight.begin(), - bbox_inside_weight.end(), - bbox_inside_weight_data); - fg_num_data[0] = static_cast(fg_fake.size()) + 1; - std::vector loc_score_tgtlbl_gt; - loc_score_tgtlbl_gt.emplace_back(loc_index_t); - loc_score_tgtlbl_gt.emplace_back(score_index_t); - loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); - loc_score_tgtlbl_gt.emplace_back(gt_inds_t); - loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t); - loc_score_tgtlbl_gt.emplace_back(fg_num_t); - - return loc_score_tgtlbl_gt; -} - -template -class RetinanetTargetAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 - auto* gt_boxes = context.Input("GtBoxes"); - auto* gt_labels = context.Input("GtLabels"); - auto* is_crowd = context.Input("IsCrowd"); - auto* im_info = context.Input("ImInfo"); - - auto* loc_index = context.Output("LocationIndex"); - auto* score_index = context.Output("ScoreIndex"); - auto* tgt_bbox = context.Output("TargetBBox"); - auto* tgt_lbl = context.Output("TargetLabel"); - auto* bbox_inside_weight = - context.Output("BBoxInsideWeight"); - auto* fg_num = context.Output("ForegroundNumber"); - - PADDLE_ENFORCE_EQ( - gt_boxes->lod().size(), - 1UL, - platform::errors::InvalidArgument( - "The LoD level of Input(GtBoxes) should be 1, but received GtBoxes " - "LoD level is :%d.", - gt_boxes->lod().size())); - PADDLE_ENFORCE_EQ( - gt_labels->lod().size(), - 1UL, - platform::errors::InvalidArgument("The LoD level of Input(GtLabels) " - "should be 1, but received GtLabels " - "LoD level is :%d.", - gt_labels->lod().size())); - PADDLE_ENFORCE_EQ( - is_crowd->lod().size(), - 1UL, - platform::errors::InvalidArgument( - "The LoD level of Input(IsCrowd) should be 1, but received IsCrowd " - "LoD level is :%d.", - is_crowd->lod().size())); - - int64_t anchor_num = static_cast(anchor->dims()[0]); - int64_t batch_num = static_cast(gt_boxes->lod().back().size() - 1); - - float positive_overlap = context.Attr("positive_overlap"); - float negative_overlap = context.Attr("negative_overlap"); - - int64_t max_num = batch_num * anchor_num; - auto place = context.GetPlace(); - - loc_index->mutable_data({max_num}, place); - score_index->mutable_data({max_num}, place); - tgt_bbox->mutable_data({max_num, 4}, place); - tgt_lbl->mutable_data({max_num, 1}, place); - bbox_inside_weight->mutable_data({max_num, 4}, place); - fg_num->mutable_data({batch_num, 1}, place); - auto& dev_ctx = context.device_context(); - - std::random_device rnd; - std::minstd_rand engine; - int seed = static_cast(rnd()); - engine.seed(seed); - - framework::LoD lod_loc, loc_score, lod_fg; - std::vector lod0_loc(1, 0); - std::vector lod0_score(1, 0); - std::vector lod0_fg(1, 0); - - int total_loc_num = 0; - int total_score_num = 0; - int total_fg_num = 0; - auto gt_boxes_lod = gt_boxes->lod().back(); - auto gt_labels_lod = gt_labels->lod().back(); - auto is_crowd_lod = is_crowd->lod().back(); - for (int i = 0; i < batch_num; ++i) { - phi::DenseTensor gt_boxes_slice = - gt_boxes->Slice(static_cast(gt_boxes_lod[i]), - static_cast(gt_boxes_lod[i + 1])); - phi::DenseTensor gt_labels_slice = - gt_labels->Slice(static_cast(gt_labels_lod[i]), - static_cast(gt_labels_lod[i + 1])); - phi::DenseTensor is_crowd_slice = - is_crowd->Slice(static_cast(is_crowd_lod[i]), - static_cast(is_crowd_lod[i + 1])); - phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); - auto* im_info_data = im_info_slice.data(); - auto im_height = im_info_data[0]; - auto im_width = im_info_data[1]; - auto im_scale = im_info_data[2]; - - // Filter straddle anchor - std::vector filter_output = - FilterStraddleAnchor(dev_ctx, anchor, -1, im_height, im_width); - phi::DenseTensor inds_inside = filter_output[0]; - phi::DenseTensor inside_anchor = filter_output[1]; - - // Filter crowd gt - std::vector ncrowd_output = FilterCrowdGtBoxLabel( - dev_ctx, >_boxes_slice, >_labels_slice, &is_crowd_slice); - phi::DenseTensor ncrowd_gt_boxes = ncrowd_output[0]; - phi::DenseTensor ncrowd_gt_labels = ncrowd_output[1]; - - auto ncrowd_gt_boxes_et = - framework::EigenTensor::From(ncrowd_gt_boxes); - ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - - phi::DenseTensor anchor_by_gt_overlap; - anchor_by_gt_overlap.mutable_data( - {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); - BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); - - auto loc_score_tgtlbl_gt = GetAllFgBgGt(dev_ctx, - anchor_by_gt_overlap, - ncrowd_gt_labels, - positive_overlap, - negative_overlap, - engine); - - phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; - phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; - phi::DenseTensor sampled_fg_num = loc_score_tgtlbl_gt[5]; - - int loc_num = static_cast(sampled_loc_index.dims()[0]); - int score_num = static_cast(sampled_score_index.dims()[0]); - // unmap to all anchor - phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; - sampled_loc_index_unmap.mutable_data({loc_num}, place); - sampled_score_index_unmap.mutable_data({score_num}, place); - Gather(inds_inside.data(), - 1, - sampled_loc_index.data(), - loc_num, - sampled_loc_index_unmap.data()); - Gather(inds_inside.data(), - 1, - sampled_score_index.data(), - score_num, - sampled_score_index_unmap.data()); - - // get target bbox deltas - phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; - auto* sampled_anchor_data = - sampled_anchor.mutable_data({loc_num, 4}, place); - auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); - Gather(anchor->data(), - 4, - sampled_loc_index_unmap.data(), - loc_num, - sampled_anchor_data); - Gather(ncrowd_gt_boxes.data(), - 4, - sampled_gt_index.data(), - loc_num, - sampled_gt_data); - sampled_tgt_bbox.mutable_data({loc_num, 4}, place); - BoxToDelta(loc_num, - sampled_anchor, - sampled_gt, - nullptr, - false, - &sampled_tgt_bbox); - - // Add anchor offset - int anchor_offset = static_cast(i * anchor_num); - auto sampled_loc_index_unmap_et = - framework::EigenTensor::From(sampled_loc_index_unmap); - sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset; - auto sampled_score_index_unmap_et = - framework::EigenTensor::From(sampled_score_index_unmap); - sampled_score_index_unmap_et = - sampled_score_index_unmap_et + anchor_offset; - AppendRpns(loc_index, total_loc_num, &sampled_loc_index_unmap); - AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); - AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); - AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); - AppendRpns( - bbox_inside_weight, total_loc_num * 4, &sampled_bbox_inside_weight); - AppendRpns(fg_num, total_fg_num, &sampled_fg_num); - - total_loc_num += loc_num; - total_score_num += score_num; - total_fg_num += 1; - lod0_loc.emplace_back(total_loc_num); - lod0_score.emplace_back(total_score_num); - lod0_fg.emplace_back(total_fg_num); - } - - PADDLE_ENFORCE_LE( - total_loc_num, - max_num, - platform::errors::InvalidArgument( - "The number of sampled bboxes should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "bboxes is :%d.", - max_num, - total_loc_num)); - PADDLE_ENFORCE_LE( - total_score_num, - max_num, - platform::errors::InvalidArgument( - "The number of sampled scores should not be greater than the " - "number of all anchor boxes(%d), but the number of sampled " - "scores is :%d.", - max_num, - total_score_num)); - PADDLE_ENFORCE_LE( - total_fg_num, - batch_num, - platform::errors::InvalidArgument( - "The number of foreground numbers should not be greater than the " - "batch size(%d), but the number of foreground numbers is :%d.", - batch_num, - total_fg_num)); - - lod_loc.emplace_back(lod0_loc); - loc_score.emplace_back(lod0_score); - lod_fg.emplace_back(lod0_fg); - loc_index->set_lod(lod_loc); - score_index->set_lod(loc_score); - tgt_bbox->set_lod(lod_loc); - tgt_lbl->set_lod(loc_score); - bbox_inside_weight->set_lod(lod_loc); - fg_num->set_lod(lod_fg); - loc_index->Resize({total_loc_num}); - score_index->Resize({total_score_num}); - tgt_bbox->Resize({total_loc_num, 4}); - tgt_lbl->Resize({total_score_num, 1}); - bbox_inside_weight->Resize({total_loc_num, 4}); - fg_num->Resize({total_fg_num, 1}); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - rpn_target_assign, - ops::RpnTargetAssignOp, - ops::RpnTargetAssignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(rpn_target_assign, - CPU, - ALL_LAYOUT, - ops::RpnTargetAssignKernel, - float, - double) {} -REGISTER_OPERATOR( - retinanet_target_assign, - ops::RetinanetTargetAssignOp, - ops::RetinanetTargetAssignOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(retinanet_target_assign, - CPU, - ALL_LAYOUT, - ops::RetinanetTargetAssignKernel, - float, - double) {} diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index a0879337f5ae7..cee37d49eb69b 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -45,28 +45,28 @@ class DetectionMAPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( det_dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(DetectRes) ndim must be 2, the shape is [N, 6]," "but received the ndim is %d", det_dims.size())); PADDLE_ENFORCE_EQ( det_dims[1], 6UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape is of Input(DetectRes) [N, 6], but received" " shape is [N, %d]", det_dims[1])); auto label_dims = ctx->GetInputDim("Label"); PADDLE_ENFORCE_EQ(label_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ndim of Input(Label) must be 2, but received %d", label_dims.size())); if (ctx->IsRuntime() || label_dims[1] > 0) { PADDLE_ENFORCE_EQ( (label_dims[1] == 6 || label_dims[1] == 5), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Label) is [N, 6] or [N, 5], but received " "[N, %d]", label_dims[1])); @@ -75,12 +75,12 @@ class DetectionMAPOp : public framework::OperatorWithKernel { if (ctx->HasInput("PosCount")) { PADDLE_ENFORCE( ctx->HasInput("TruePos"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(TruePos) of DetectionMAPOp should not be null when " "Input(PosCount) is not null.")); PADDLE_ENFORCE( ctx->HasInput("FalsePos"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(FalsePos) of DetectionMAPOp should not be null when " "Input(PosCount) is not null.")); } @@ -197,7 +197,7 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_NE( GetAPType(ap_type), APType::kNone, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The ap_type should be 'integral' or '11point.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index ccf0834968793..24fea9c431c63 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -82,12 +82,12 @@ class DetectionMAPOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( label_lod.size(), 1UL, - platform::errors::InvalidArgument("Only support LodTensor of lod_level " - "with 1 in label, but received %d.", - label_lod.size())); + phi::errors::InvalidArgument("Only support LodTensor of lod_level " + "with 1 in label, but received %d.", + label_lod.size())); PADDLE_ENFORCE_EQ(label_lod[0].size(), detect_lod[0].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch_size of input(Label) and input(Detection) " "must be the same, but received %d:%d", label_lod[0].size(), @@ -212,7 +212,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( input_label.dims()[1], 5, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input label width" " must be 5, but received %d, please check your input data", input_label.dims()[1])); @@ -504,7 +504,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { mAP += average_precisions; ++count; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unkown ap version %s. Now only supports integral and l1point.", ap_type)); } diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.cc b/paddle/fluid/operators/dlnne/dlnne_engine_op.cc index 86508bfbf2720..a09e8aaec8156 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.cc +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.cc @@ -44,8 +44,8 @@ std::string ConvertType(phi::DataType type) { } default: { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Calibration only support " - "float/float16/int32_t/int64_t input.")); + phi::errors::Fatal("The DLNNE Calibration only support " + "float/float16/int32_t/int64_t input.")); } } } @@ -66,8 +66,8 @@ int GetDataByte(phi::DataType type) { } default: { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Calibration only support " - "float/float16/int32_t/int64_t input.")); + phi::errors::Fatal("The DLNNE Calibration only support " + "float/float16/int32_t/int64_t input.")); } } } @@ -93,7 +93,7 @@ void ConvertPaddle2Onnx(std::string onnx_file_name, PADDLE_ENFORCE_EQ( convert_flag, 0, - platform::errors::Unavailable("Convert paddle to onnx failed")); + phi::errors::Unavailable("Convert paddle to onnx failed")); } } @@ -108,10 +108,9 @@ void QuantizeOnnx(std::string onnx_file_name, << " --output-model " << rlym_file_name; LOG(INFO) << convert_cmd.str(); int convert_flag = system(convert_cmd.str().c_str()); - PADDLE_ENFORCE_EQ( - convert_flag, - 0, - platform::errors::Unavailable("Convert onnx to rlym failed")); + PADDLE_ENFORCE_EQ(convert_flag, + 0, + phi::errors::Unavailable("Convert onnx to rlym failed")); } if (!FileExists(quantized_rlym_file_name.c_str())) { @@ -121,9 +120,8 @@ void QuantizeOnnx(std::string onnx_file_name, << dataset_plugin_path << " " << rlym_file_name; LOG(INFO) << quantize_cmd.str(); int quantize_flag = system(quantize_cmd.str().c_str()); - PADDLE_ENFORCE_EQ(quantize_flag, - 0, - platform::errors::Unavailable("quantize model failed")); + PADDLE_ENFORCE_EQ( + quantize_flag, 0, phi::errors::Unavailable("quantize model failed")); } } diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h index d0063c51512e3..363e21545c9ab 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h @@ -37,7 +37,7 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/utils/io_utils.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -131,7 +131,7 @@ static phi::DataType DLNNE2FluidDataType(dl::nne::DataType type) { case dl::nne::DataType::kBOOL: return phi::DataType::BOOL; default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "unknown fluid datatype in Fluid op converter")); return phi::DataType::FLOAT32; } @@ -316,10 +316,9 @@ class DlnneEngineOp : public framework::OperatorBase { } builder = dl::nne::CreateInferBuilder(); - PADDLE_ENFORCE_NE( - builder, - nullptr, - platform::errors::Unavailable("nne create builder failed")); + PADDLE_ENFORCE_NE(builder, + nullptr, + phi::errors::Unavailable("nne create builder failed")); dl::nne::BuilderConfig builder_cfg; builder_cfg.max_batch_size = max_batch_size_; builder_cfg.ws_mode = weight_share_map[weight_share_mode_]; @@ -327,10 +326,9 @@ class DlnneEngineOp : public framework::OperatorBase { network = builder->CreateNetwork(); parser = dl::nne::CreateParser(); - PADDLE_ENFORCE_NE( - parser, - nullptr, - platform::errors::Unavailable("nne create parser failed")); + PADDLE_ENFORCE_NE(parser, + nullptr, + phi::errors::Unavailable("nne create parser failed")); if (dlnne_log_flag_) { LOG(INFO) << "set output for dlnne"; } @@ -402,7 +400,7 @@ class DlnneEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( input_names_.empty(), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Dlnne engine needs at least one input, but no input is found. " "Please check if you set the input correctly.")); @@ -440,7 +438,7 @@ class DlnneEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( first_batch, batch, - platform::errors::Unavailable( + phi::errors::Unavailable( "compute infer_batches is different from each other")); } infer_batch = first_batch; @@ -474,13 +472,13 @@ class DlnneEngineOp : public framework::OperatorBase { data_bytes = 4; dtype = 2; } else if (type == phi::DataType::FLOAT16) { - buffer = static_cast(t.data()); + buffer = static_cast(t.data()); data_bytes = 2; dtype = 3; } else { PADDLE_THROW( - platform::errors::Fatal("The DLNNE Engine OP only support " - "float/int32_t/int64_t/float16 input.")); + phi::errors::Fatal("The DLNNE Engine OP only support " + "float/int32_t/int64_t/float16 input.")); } input_buffers[bind_index] = buffer; @@ -555,7 +553,7 @@ class DlnneEngineOp : public framework::OperatorBase { auto *fluid_v = scope.FindVar(y); PADDLE_ENFORCE_NOT_NULL( fluid_v, - platform::errors::NotFound( + phi::errors::NotFound( "Output variable %s is not found in DLNNE subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 01df430f52161..d538164977277 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -71,7 +71,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_prob' must be between 0.0 and 1.0.")); }) .SupportTensor(); @@ -100,7 +100,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc index 191890865fb89..4029be65a00d6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc @@ -107,6 +107,7 @@ class ElementwiseDivDoubleGradMaker : public framework::SingleGradOpMaker { op->SetType("elementwise_div_grad_grad"); op->SetInput("Y", this->Input("Y")); op->SetInput("Out", this->Input("Out")); + op->SetInput("Out@GRAD", this->Input(framework::GradVarName("Out"))); op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); op->SetInput("DDY", this->OutputGrad(framework::GradVarName("Y"))); op->SetInput("DX", this->Output(framework::GradVarName("X"))); diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index d835caedbf3c8..00a5ca7a39d0e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -26,7 +26,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -44,7 +44,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputsVarType("Y").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, but the " "received is %s [%s].", ctx->GetInputsVarType("Y").front(), @@ -55,7 +55,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Y").size(), 1u, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the size of Y should be 1. " "But reveived the size of Y = %s.", @@ -63,14 +63,14 @@ class ElementwiseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Y")[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse(VarType.SELECTED_ROWS" "), Y must be scalar, the first dimension of Y should be 1. " "But reveived the first dimension of Y = %s.", ctx->GetInputDim("Y")[0])); } else if (ctx->GetInputsVarType("X").front() != framework::proto::VarType::LOD_TENSOR) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Input X's type[%s] is not supported by elementwise_op. Please set " "its type to LOD_TENSOR.", ctx->GetInputsVarType("X").front())); @@ -87,7 +87,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { if (x_dims.size() == y_dims.size()) { PADDLE_ENFORCE_EQ((axis == -1) || (axis == 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "axis should be -1 or 0 while the dimension of " "tensor X (%s) is equal to the dimension of " "tensor Y (%s), but received axis: %s", @@ -97,7 +97,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ((axis >= (-1 * max_dim)) && (axis < max_dim), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis range must be [%s, %s), but axis is %s. " "Please set the axis again.", -1 * max_dim, diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index afa2df659c42a..3d0fe2ab399bc 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -76,7 +76,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( x_var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Unable to get input Variable X, Variable name is %s.\n", ctx.InputName("X"))); auto *y = ctx.Input("Y"); @@ -89,13 +89,13 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, } else if (x_var->IsType()) { PADDLE_ENFORCE_EQ(y->dims().size() == 1 && y->dims()[0] == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "For elementwise_op, if X is Sparse, Y must be " "scalar. But received the size of Y = %d.", y->dims().size())); PADDLE_ENFORCE_NOT_NULL( x_for_selectedrows, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The parameter x_for_selectedrows is excepted to " "be valid, once input variable X`s class type is " "SelectedRows.\n")); @@ -110,7 +110,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, z = ctx.Output("Out")->mutable_value(); ins->emplace_back(x_for_selectedrows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " "phi::DenseTensor or SelectedRows.", framework::ToTypeName(x_var->Type()))); @@ -1403,7 +1403,7 @@ void FusedElemwiseAndActGradComputeEx(const framework::ExecutionContext &ctx, if (UseIntermediateOut) { PADDLE_ENFORCE_NOT_NULL( intermediate_out, - platform::errors::InvalidArgument("Intermediate out is null pointer.")); + phi::errors::InvalidArgument("Intermediate out is null pointer.")); } if (x_dim == y_dim) { FusedElemwiseAndActGradComputeNoBroadcastIsType(), true, - platform::errors::InvalidArgument("XPU only support phi::DenseTensor, " - "Input(X) is not phi::DenseTensor")); + phi::errors::InvalidArgument("XPU only support phi::DenseTensor, " + "Input(X) is not phi::DenseTensor")); auto x = x_var->Get(); auto* y = ctx.Input("Y"); diff --git a/paddle/fluid/operators/enqueue_op.cc b/paddle/fluid/operators/enqueue_op.cc deleted file mode 100644 index c8279719789c4..0000000000000 --- a/paddle/fluid/operators/enqueue_op.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" - -namespace paddle { -namespace framework { -class OpDesc; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -using LoDTensorBlockingQueueHolder = - paddle::operators::reader::LoDTensorBlockingQueueHolder; - -namespace paddle { -namespace operators { - -class EnqueueOp : public framework::OperatorBase { - public: - EnqueueOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - const std::string& queue_name = Attr("queue_name"); - auto* queue_holder_var = scope.FindVar(queue_name); - PADDLE_ENFORCE_NOT_NULL( - queue_holder_var, - platform::errors::NotFound( - "No LoDTensorBlockingQueueHolder variable with name %s found.", - queue_name)); - const std::string& var_name = Input("X"); - auto* in_var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(in_var, - platform::errors::NotFound( - "No variable with name %s found.", var_name)); - auto* in_tensor = in_var->GetMutable(); - auto* queue_holder = - queue_holder_var->template GetMutable(); - - paddle::framework::LoDTensorArray lod_tensor_vec; - lod_tensor_vec.emplace_back(*in_tensor); - queue_holder->GetQueue()->Push(lod_tensor_vec); - } -}; - -class EnqueueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "`lod_tensor` to enqueue"); - AddAttr("queue_name", - "Name of the `LoDTensorBlockingQueueHolder` variable"); - AddComment(R"DOC( - Enqueue operator. - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = ::paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(enqueue, ops::EnqueueOp, ops::EnqueueOpMaker); diff --git a/paddle/fluid/operators/expand_as_v2_op.h b/paddle/fluid/operators/expand_as_v2_op.h index 2c62dc570ff21..a9dd1f08c385b 100644 --- a/paddle/fluid/operators/expand_as_v2_op.h +++ b/paddle/fluid/operators/expand_as_v2_op.h @@ -17,9 +17,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#define MAX_RANK_SUPPORTED 6 +#define MAX_RANK_SUPPORTED 8 namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 71295296218f0..4f57a35a1039e 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -36,7 +36,7 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( static_cast(x_dims.size()), expand_times.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of 'expand_times' for " "Op(expand) must be equal to the number of dimensions " "(%d) of the input.", @@ -44,10 +44,11 @@ class ExpandOp : public framework::OperatorWithKernel { static_cast(x_dims.size()))); PADDLE_ENFORCE_LE( x_dims.size(), - 6, - platform::errors::InvalidArgument( + MAX_RANK_SUPPORTED, + phi::errors::InvalidArgument( "The number of dimensions of the input for Op(expand) " - "must not be greater than 6, but the value received is %d.", + "must not be greater than %d, but the value received is %d.", + MAX_RANK_SUPPORTED, x_dims.size())); std::vector out_shape(x_dims.size()); @@ -58,7 +59,7 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( expand_times[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %uth element of 'expand_times' for Op(expand) must be " "greater than 0, but the value given is %d.", i, @@ -98,7 +99,7 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "(Tensor, default Tensor). A tensor with rank in [1, 8]." "X is the input to be expanded."); AddInput("ExpandTimes", "(Tensor), optional). If provided, expand according to " @@ -112,7 +113,7 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { .AsDuplicable() .AsDispensable(); AddOutput("Out", - "(Tensor, default Tensor). A tensor with rank in [1, 6]." + "(Tensor, default Tensor). A tensor with rank in [1, 8]." "The rank of Output(Out) have the same with Input(X). " "After expanding, size of each dimension of Output(Out) is equal " "to size of the corresponding dimension of Input(X) multiplying " @@ -123,7 +124,7 @@ class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Expand operator tiles the input by given times number. You should set times number for each dimension by providing attribute 'expand_times'. The rank of X -should be in [1, 6]. Please note that size of 'expand_times' must be the same +should be in [1, 8]. Please note that size of 'expand_times' must be the same with X's rank. Following is a using case: Input(X) is a 3-D tensor with shape [2, 3, 1]: [ @@ -163,7 +164,7 @@ class ExpandGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension size (%d) of Input(Out@GRAD) should be " "equal to the corresponding dimension size (%d) of Input(X)", out_dims[0], @@ -179,7 +180,7 @@ class ExpandGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[i] * expand_times[i], out_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %uth dimension size (%d) of Input(Out@GRAD) should be " "equal to the multiplication of the corresponding dimension " "sizes of Input(X) (%d) and expand_times (%d).", @@ -284,19 +285,18 @@ REGISTER_OP_CPU_KERNEL(expand_grad, ops::ExpandGradKernel, ops::ExpandGradKernel); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -REGISTER_OP_CUDA_KERNEL( - expand, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel, - ops::ExpandKernel); +REGISTER_OP_CUDA_KERNEL(expand, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CUDA_KERNEL( expand_grad, ops::ExpandGradKernel, ops::ExpandGradKernel, - ops::ExpandGradKernel, + ops::ExpandGradKernel, ops::ExpandGradKernel, ops::ExpandGradKernel); #endif diff --git a/paddle/fluid/operators/expand_op.h b/paddle/fluid/operators/expand_op.h index ee100b3b48418..3d539cbf0c944 100644 --- a/paddle/fluid/operators/expand_op.h +++ b/paddle/fluid/operators/expand_op.h @@ -19,9 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#define MAX_RANK_SUPPORTED 6 +#define MAX_RANK_SUPPORTED 8 namespace paddle { namespace operators { @@ -97,14 +97,14 @@ class ExpandKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( rank, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'x' for Op(expand) " "must be greater than or equal to 1, but the value received is %d.", rank)); PADDLE_ENFORCE_LE( rank, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'x' for Op(expand) " "must be less than or equal to %d, but the value received is %d.", MAX_RANK_SUPPORTED, @@ -128,6 +128,12 @@ class ExpandKernel : public framework::OpKernel { case 6: Expand<6>(context); break; + case 7: + Expand<7>(context); + break; + case 8: + Expand<8>(context); + break; } } @@ -140,7 +146,7 @@ class ExpandKernel : public framework::OpKernel { auto expand_times = get_expand_times(context); PADDLE_ENFORCE_EQ(static_cast(in_dims.size()), expand_times.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements (%d) of 'expand_times' for " "Op(expand) must be equal to the number " "of dimensions (%d) of the input.", @@ -166,10 +172,10 @@ class ExpandKernel : public framework::OpKernel { // use 32-bit index to speed up bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); if (use_32bit_index) { - EigenBroadcast, T, Rank>::Eval( + phi::funcs::EigenBroadcast, T, Rank>::Eval( place, To32BitIndex(y), To32BitIndex(x), bcast_dims); } else { - EigenBroadcast, T, Rank>::Eval( + phi::funcs::EigenBroadcast, T, Rank>::Eval( place, y, x, bcast_dims); } } @@ -216,7 +222,7 @@ class ExpandGradKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_GE(dims, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input " "'Out@GRAD' for Op(expand_grad)" " must be greater than or equal to 1, but " @@ -224,7 +230,7 @@ class ExpandGradKernel : public framework::OpKernel { dims)); PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' " "for Op(expand_grad) must be less than or equal " "to %d, but the value received is %d.", @@ -249,10 +255,17 @@ class ExpandGradKernel : public framework::OpKernel { case 6: ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); break; + case 7: + ExpandBackward<7>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 8: + ExpandBackward<8>(context, reshape_dims_vec, reduce_dims_vec); + break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Only support tensor with rank being between 1 and 6. But " + PADDLE_THROW(phi::errors::InvalidArgument( + "Only support tensor with rank being between 1 and %d. But " "received tensor's rank = %d.", + MAX_RANK_SUPPORTED, dims)); } } @@ -267,14 +280,14 @@ class ExpandGradKernel : public framework::OpKernel { size_t reduce_size = reduce_dims_vec.size(); PADDLE_ENFORCE_EQ(reshape_size, reshape_dims_vec.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inconsistent size between template Dims (%d) and " "reshape dimensions (%d).", reshape_size, reshape_dims_vec.size())); PADDLE_ENFORCE_EQ(reduce_size, reduce_dims_vec.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inconsistent size between template Dims (%d) and " "reduce dimensions (%d).", reduce_size, @@ -294,8 +307,8 @@ class ExpandGradKernel : public framework::OpKernel { auto out_grad = EigenVector::Flatten(*in0); auto& place = *context.template device_context().eigen_device(); - EigenBroadcastGrad, T, Dims>::Eval( - place, x_grad, out_grad, reduce_dims, reshape_dims); + phi::funcs::EigenBroadcastGrad, T, Dims>:: + Eval(place, x_grad, out_grad, reduce_dims, reshape_dims); } }; diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index 0a70faddb7d58..57013d5eb8bd1 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#define MAX_RANK_SUPPORTED 6 +#define MAX_RANK_SUPPORTED 8 namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index ee64d5e1c5cc5..e527ae2d876e9 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -232,12 +232,12 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int& quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("x_num_col_dims", "The x_num_col_dims of mul. Only used for mul or matmul.") @@ -245,7 +245,7 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker .AddCustomChecker([](const int& x_num_col_dims) { PADDLE_ENFORCE_EQ(x_num_col_dims == 0, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'x_num_col_dims' should be larger than 0, but " "the received is %d", x_num_col_dims)); diff --git a/paddle/fluid/operators/fake_dequantize_op.cu b/paddle/fluid/operators/fake_dequantize_op.cu index ea069daa40d7d..1e9c28661e23c 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu +++ b/paddle/fluid/operators/fake_dequantize_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fake_dequantize_op.cu.h" namespace ops = paddle::operators; -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; PD_REGISTER_STRUCT_KERNEL(fake_dequantize_max_abs, GPU, diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index 57887721308d4..420996e878b76 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -82,7 +82,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( scales[0]->numel(), in->dims()[quant_axis], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of first scale values must be the same with " "quant_axis dimension value of Input(X) when the `Scales` has " "only one element, but %ld != %ld here.", @@ -93,7 +93,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( scales[0]->numel(), in->dims()[x_num_col_dims], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The number of first scale values must be the same with " "corresponding dimension value of Input(X) when the `Scales` " "has two elements, but %ld != %ld here.", @@ -101,7 +101,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { in->dims()[1])); PADDLE_ENFORCE_EQ(scales[1]->numel(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The second scale tensor should only have one " "value at now, but it has %ld values here.", scales[1]->numel())); diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index a5169892187a2..d7d9a1416d919 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -54,9 +54,9 @@ struct FindChannelAbsMaxFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *in_data = in_tensor.data(); auto in_dims = in_tensor.dims(); const int64_t channel = in_dims[quant_axis]; @@ -167,9 +167,9 @@ struct ChannelClipAndFakeQuantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *scale_data = scale.data(); auto *in_data = in.data(); auto *out_data = out->mutable_data(ctx.GetPlace()); @@ -247,9 +247,9 @@ struct ChannelClipFakeQuantDequantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); auto *scale_data = scale.data(); auto *in_data = in.data(); @@ -426,7 +426,7 @@ class FakeQuantOrWithDequantAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -493,19 +493,19 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int &quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -574,19 +574,19 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker "and mul, the quant_axis is equal to the cout axis.") .SetDefault(0) .AddCustomChecker([](const int &quant_axis) { - PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, - true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, + true, + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -654,7 +654,7 @@ class FakeQuantizeRangeAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -735,7 +735,7 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 68ceaca46d04f..240fd119ff09a 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fake_quantize_op.cu.h" namespace ops = paddle::operators; -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; PD_REGISTER_STRUCT_KERNEL(fake_quantize_abs_max, GPU, diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index bdf8a80debb64..cb2f498c22b0b 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -31,7 +31,7 @@ struct QuantizeDataType { }; template <> -struct QuantizeDataType { +struct QuantizeDataType { using type = float; }; @@ -92,7 +92,7 @@ struct FindAbsMaxFunctor { }; template struct FindAbsMaxFunctor; -template struct FindAbsMaxFunctor; +template struct FindAbsMaxFunctor; template __global__ void FindChannelAbsMaxKernelQuantAxis0(const T *in, @@ -172,9 +172,9 @@ struct FindChannelAbsMaxFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); const int num = in_tensor.numel(); auto in_dims = in_tensor.dims(); const T *in_data = in_tensor.data(); @@ -419,9 +419,9 @@ struct ChannelClipAndFakeQuantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); int64_t num = in.numel(); auto in_dims = in.dims(); @@ -665,9 +665,9 @@ struct ChannelClipFakeQuantDequantFunctor { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, - platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); int num = in.numel(); auto in_dims = in.dims(); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 6387018d1865e..39af6b5d5dec2 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -453,11 +453,11 @@ class StraightThroughEstimatorGradKernel : public framework::OpKernel { context.Input(framework::GradVarName("Out")); auto x_grad_name = framework::GradVarName("X"); auto *d_x = context.Output(x_grad_name); - PADDLE_ENFORCE_NOT_NULL(d_x, - platform::errors::PreconditionNotMet( - "StraightThroughEstimatorGradKernel " - "doesn't have the output named %s.", - x_grad_name)); + PADDLE_ENFORCE_NOT_NULL( + d_x, + phi::errors::PreconditionNotMet("StraightThroughEstimatorGradKernel " + "doesn't have the output named %s.", + x_grad_name)); // Initialize dx as same as d_out d_x->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index 8a27649af864b..730ba969c779f 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -33,7 +33,7 @@ class FillConstantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each value of attribute 'shape' is expected to be no less " "than 0. But received: shape[%u] = %d; shape = [%s].", i, @@ -96,7 +96,7 @@ class FillConstantOp : public framework::OperatorWithKernel { kt.set_backend(phi::Backend::XPU); break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Could NOT determine the place of variable, place_type = %d .", place_type)); } diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index c00d23928a70c..e398e94e4ba09 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -29,7 +29,7 @@ PD_REGISTER_STRUCT_KERNEL(fill_zeros_like, int64_t, float, double, - plat::float16, + phi::dtype::float16, bool, plat::complex, plat::complex) {} @@ -42,7 +42,7 @@ PD_REGISTER_STRUCT_KERNEL(fill_zeros_like2, int64_t, float, double, - plat::float16, + phi::dtype::float16, bool, plat::complex, plat::complex) {} diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index 48c1a23b8591d..b6b67e12fd24d 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -43,12 +43,12 @@ class Flatten2Op : public framework::OperatorWithKernel { const auto &in_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE(axis, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis should be greater than or equal to 0.")); PADDLE_ENFORCE_LE( axis, in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis should be less than or equal to input tensor's rank")); const auto &out_dims = Flatten2Op::GetOutputShape(axis, in_dims); diff --git a/paddle/fluid/operators/fsp_op.cc b/paddle/fluid/operators/fsp_op.cc index d2c1d2c45d685..c447f9d485f5c 100644 --- a/paddle/fluid/operators/fsp_op.cc +++ b/paddle/fluid/operators/fsp_op.cc @@ -34,28 +34,28 @@ class FSPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) must have shape [batch_size, channel, height, width]." "Now the dimension of 'X' is %d.", x_dims.size())); PADDLE_ENFORCE_EQ( y_dims.size(), 4UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) must have shape [batch_size, channel, height, width]." "Now the dimension of 'Y' is %d.", y_dims.size())); PADDLE_ENFORCE_EQ( x_dims[2], y_dims[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)(%d) and Input(Y)(%d) should have the same height.", x_dims[2], y_dims[2])); PADDLE_ENFORCE_EQ( x_dims[3], y_dims[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)(%d) and Input(Y)(%d) should have the same width.", x_dims[3], y_dims[3])); diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index ced20a0108a52..713ad1931ce23 100755 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -6,7 +6,6 @@ endif() register_operators( EXCLUDES fused_bn_activation_op - fusion_conv_inception_op yolo_box_head_op yolo_box_post_op fusion_group_op @@ -39,11 +38,7 @@ if(WITH_GPU OR WITH_ROCM) op_library(fused_bn_activation_op) endif() # HIP not support cudnnTransformTensor - # fusion_conv_inception_op needs cudnn 7 above # HIP not support cudnnConvolutionBiasActivationForward - if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7100)) - op_library(fusion_conv_inception_op) - endif() op_library(yolo_box_head_op) op_library(yolo_box_post_op) op_library(fused_gate_attention_op) diff --git a/paddle/fluid/operators/fused/attn_bias_add.cu.h b/paddle/fluid/operators/fused/attn_bias_add.cu.h index 8ea1e11cd29f4..2f1847d951058 100644 --- a/paddle/fluid/operators/fused/attn_bias_add.cu.h +++ b/paddle/fluid/operators/fused/attn_bias_add.cu.h @@ -170,7 +170,7 @@ void LaunchBiasAddFwKernel(const phi::GPUContext& ctx, break; } default: { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported vectorized size: %d !", vec_size)); break; } diff --git a/paddle/fluid/operators/fused/attn_feed_forward.h b/paddle/fluid/operators/fused/attn_feed_forward.h index 77339f1fa0d64..25ba1cc13ead2 100644 --- a/paddle/fluid/operators/fused/attn_feed_forward.h +++ b/paddle/fluid/operators/fused/attn_feed_forward.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/fused/attn_bias_add.cu.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/blas/blas.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/attn_gemm_int8.h b/paddle/fluid/operators/fused/attn_gemm_int8.h index 8dc4810b1f3b9..a6865649b26ae 100644 --- a/paddle/fluid/operators/fused/attn_gemm_int8.h +++ b/paddle/fluid/operators/fused/attn_gemm_int8.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/cublaslt.h" #include "paddle/fluid/operators/fused/quant_dequant_kernel.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" @@ -87,12 +87,12 @@ class AttnMatmulINT8 { std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, phi::funcs::AddFunctor()); - PADDLE_ENFORCE_EQ(cudaGetLastError(), - cudaSuccess, - platform::errors::Fatal( - "cuda error occurred after computing bias. " - "But it does not mean this error is caused by " - "bias computing")); + PADDLE_ENFORCE_EQ( + cudaGetLastError(), + cudaSuccess, + phi::errors::Fatal("cuda error occurred after computing bias. " + "But it does not mean this error is caused by " + "bias computing")); } } @@ -141,12 +141,12 @@ class AttnMatmulINT8 { std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, phi::funcs::AddFunctor()); - PADDLE_ENFORCE_EQ(cudaGetLastError(), - cudaSuccess, - platform::errors::Fatal( - "cuda error occurred after computing bias. " - "But it does not mean this error is caused by " - "bias computing")); + PADDLE_ENFORCE_EQ( + cudaGetLastError(), + cudaSuccess, + phi::errors::Fatal("cuda error occurred after computing bias. " + "But it does not mean this error is caused by " + "bias computing")); } } diff --git a/paddle/fluid/operators/fused/cublaslt.h b/paddle/fluid/operators/fused/cublaslt.h index e9728c58b55dc..e3f96b9ec1d3d 100644 --- a/paddle/fluid/operators/fused/cublaslt.h +++ b/paddle/fluid/operators/fused/cublaslt.h @@ -54,7 +54,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -69,7 +69,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmulDescCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -81,7 +81,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmulDescSetAttribute execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -91,7 +91,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -100,7 +100,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -109,7 +109,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatrixLayoutCreate execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); @@ -212,7 +212,7 @@ class CublasLtHelper { PADDLE_ENFORCE_EQ( status, CUBLAS_STATUS_SUCCESS, - platform::errors::External( + phi::errors::External( "cublasLtMatmul execution error" "refer https://docs.nvidia.com/cuda/cublas/index.html to get more " "information")); diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index 8f3b5e4f09a06..5fb6f38b4c682 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -39,7 +39,7 @@ struct BNStatsFinalizeArgs { PADDLE_ENFORCE_EQ( param_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), @@ -160,11 +160,11 @@ class CudnnBNStatsFinalize { CUDNN_BATCHNORM_SPATIAL_PERSISTENT); // Check workspace size, also creates plan. size_t workspace_size_bytes = train_op_.GetWorkspaceSizeInBytes(handle); - PADDLE_ENFORCE_EQ(workspace_size_bytes, - 0U, - platform::errors::InvalidArgument( - "Unexpected non-zero workspace size for " - "CudnnBNStatsFinalize.")); + PADDLE_ENFORCE_EQ( + workspace_size_bytes, + 0U, + phi::errors::InvalidArgument("Unexpected non-zero workspace size for " + "CudnnBNStatsFinalize.")); train_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, static_cast(nullptr)); train_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, @@ -192,11 +192,11 @@ class CudnnBNStatsFinalize { CUDNN_BATCHNORM_SPATIAL_PERSISTENT); // Check workspace size, also creates plan. size_t workspace_size_bytes = inference_op_.GetWorkspaceSizeInBytes(handle); - PADDLE_ENFORCE_EQ(workspace_size_bytes, - 0U, - platform::errors::InvalidArgument( - "Unexpected non-zero workspace size for " - "CudnnBNStatsFinalize.")); + PADDLE_ENFORCE_EQ( + workspace_size_bytes, + 0U, + phi::errors::InvalidArgument("Unexpected non-zero workspace size for " + "CudnnBNStatsFinalize.")); inference_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, static_cast(nullptr)); inference_op_.SetOpVariantParamAttrPtr(CUDNN_PTR_WORKSPACE, diff --git a/paddle/fluid/operators/fused/cudnn_fusion_helper.h b/paddle/fluid/operators/fused/cudnn_fusion_helper.h index 7b738383f6ac7..f1df14c4f60de 100644 --- a/paddle/fluid/operators/fused/cudnn_fusion_helper.h +++ b/paddle/fluid/operators/fused/cudnn_fusion_helper.h @@ -52,7 +52,7 @@ class CudnnFusionOp { PADDLE_ENFORCE_EQ( plan_created_, true, - platform::errors::Fatal( + phi::errors::Fatal( "CudnnFusionOp exec requested without a valid 'plan', need: " ", GetWorkspaceSizeBytes(), Execute().")); PADDLE_ENFORCE_GPU_SUCCESS( diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index 9dbb8a8eaebc8..5d0e6c44c4e63 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -55,7 +55,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( input_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of input_shape is expected to 4. But received " "input_shape's size is %d, input_shape is [%s].", input_shape.size(), @@ -63,7 +63,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( filter_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of filter_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", filter_shape.size(), @@ -71,13 +71,13 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ(filter_shape[1] == filter_shape[2] && (filter_shape[1] == 1 || filter_shape[1] == 3), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The filter_shape is expected to store as nhwc, and " "h = w = 1 or 3. But received filter_shape is [%s].", common::make_ddim(filter_shape))); PADDLE_ENFORCE_EQ((filter_shape[0] % 32 == 0 && filter_shape[3] % 8 == 0), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input channel is expected to be multiple of 8, " "and the output channel is expected to be multiple " "of 32. But received input channel is %d, output " @@ -87,7 +87,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( output_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of output_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", output_shape.size(), @@ -96,7 +96,7 @@ struct NormConvolutionArgs { PADDLE_ENFORCE_EQ( is_support, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current test is only supported in the platforms with " "compatiblity greater than or equal to 70 and the kernel size " "must be equal to 1 or 3. When the kernel size is 1, " diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 8b731e2c55408..7f47ea40e6cea 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -43,7 +43,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( data_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of data_shape is expected to 4. But received " "data_shape's size is %d, data_shape is [%s].", data_shape.size(), @@ -51,7 +51,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( param_shape.size(), 4U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), @@ -59,7 +59,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( bitmask_shape.size(), 3U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of bitmask_shape is expected to 3. But received " "bitmask_shape's size is %d, bitmask_shape is [%s].", bitmask_shape.size(), @@ -76,7 +76,7 @@ struct ScaleBiasAddReluArgs { PADDLE_ENFORCE_EQ( act_type, "relu", - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only relu activation supported in normalized convolution.")); mode = CUDNN_ACTIVATION_RELU; } diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 843b5009a6fcc..2a43eea07535a 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -118,7 +118,7 @@ void InvokeTransposeRemovePadding(const phi::GPUContext& dev_ctx, PADDLE_ENFORCE_EQ( head_dim % PackSize, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "dim_head=%d must be divisible by vec_size=%d", head_dim, PackSize)); const int32_t pack_num = elem_cnt / PackSize; const int32_t block_size = 128; @@ -666,7 +666,7 @@ class FMHARef { dev_ctx_, qk_out_grad_tensor); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Only used for the backward elementwise_add op when" "dy is not needed and dx is not reduce")); return; diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index 8ae1a60ad3b94..d46265de1b354 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -124,7 +124,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { if (transpose_qkv_wb) { PADDLE_ENFORCE_EQ(y_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 2 if enable" "transpose_qkv_wb: (dim_embed, 3 * dim_embed)," "but received dimensions of" @@ -132,13 +132,13 @@ class FusedAttentionOp : public framework::OperatorWithKernel { y_dim.size())); PADDLE_ENFORCE_GT(num_heads, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num_heads must be provided and greater than 0 " "if enable transpose_qkv_wb, but we got %d.", num_heads)); PADDLE_ENFORCE_EQ(y_dim[0] % num_heads, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "First dim of qkv_w must be divisible by num heads " "if enable transpose_qkv_wb, but receive first " "dim of qkv_w is %d and num_heads is %d.", @@ -147,7 +147,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { if (ctx->Attrs().Get("ring_id") == -1) { PADDLE_ENFORCE_EQ(y_dim[0] * 3, y_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 2" "(dim_embed, 3 * dim_embed).")); } else { @@ -159,21 +159,21 @@ class FusedAttentionOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(y_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4 if not" "enable transpose_qkv_wb: (3, num_head, dim_head, " "dim_embed), but received [%d]", y_dim.size())); PADDLE_ENFORCE_EQ(y_dim[0], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "First dim of qkv_w must be 3 if disable " "transpose_qkv_wb, but we got %d.", y_dim[0])); if (ctx->Attrs().Get("ring_id") == -1) { PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2], y_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(3, num_head, dim_head, dim_embed)," "and must satisfy the limitations: " @@ -186,15 +186,15 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); PADDLE_ENFORCE_EQ(x_dim[2], hidden_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3] " "(y_dim[1] if enable transpose_qkv_w) " "must be equal. But received: the shape " @@ -245,23 +245,23 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], num_heads, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", num_heads, @@ -272,14 +272,14 @@ class FusedAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( c_dim[3], 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The forth dim of CacheKV must be greater than 0, but got %d", c_dim[3])); // cache_seq_len } PADDLE_ENFORCE_EQ(c_dim[4], dim_head, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", dim_head, @@ -400,7 +400,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -413,7 +413,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'attn_dropout_rate' must be between 0.0 and 1.0.")); }); AddAttr("is_test", @@ -449,7 +449,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -459,7 +459,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout_fix_seed", @@ -479,7 +479,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -489,7 +489,7 @@ class FusedAttentionOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float &ln_epsilon) { PADDLE_ENFORCE_EQ(ln_epsilon >= 0.0f && ln_epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' of the second LayerNorm in Fused " "attention op should be between" "0.0 and 0.001, But received [%s].", @@ -540,7 +540,7 @@ class FusedAttentionGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GradOp is only callable when is_test is false")); if (ctx->Attrs().Get("pre_layer_norm") == false) { diff --git a/paddle/fluid/operators/fused/fused_attention_utils.h b/paddle/fluid/operators/fused/fused_attention_utils.h index 39eb4c821e00a..18e3a513b3053 100644 --- a/paddle/fluid/operators/fused/fused_attention_utils.h +++ b/paddle/fluid/operators/fused/fused_attention_utils.h @@ -62,7 +62,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT // Use New Communication Library PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -73,7 +73,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT comm_context_manager.Get(std::to_string(ring_id))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index 2ea40d840d2b3..69869cd3b7729 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -27,57 +27,57 @@ namespace operators { void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Scale"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Scale) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Bias"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Bias) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Mean"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Mean) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Variance"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Variance) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Y"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Y) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("MeanOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(MeanOut) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("VarianceOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(VarianceOut) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SavedMean"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(SavedMean) of BatchNormOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SavedVariance"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(SavedVariance) of BatchNormOp should not be null.")); // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Mean and MeanOut should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("Variance")[0], ctx->Outputs("VarianceOut")[0], - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Variance and VarianceOut should share the same memory")); const auto x_dims = ctx->GetInputDim("X"); @@ -85,23 +85,23 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::PreconditionNotMet("ShapeError: the dimension of input " - "X must greater than or equal to 2." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, - x_dims.size())); + phi::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must greater than or equal to 2." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, + x_dims.size())); PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::PreconditionNotMet("ShapeError: the dimension of input " - "X must smaller than or equal to 5." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, - x_dims.size())); + phi::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must smaller than or equal to 5." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, + x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; @@ -111,7 +111,7 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -119,7 +119,7 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { scale_dim.size())); PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -135,14 +135,14 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -166,25 +166,25 @@ phi::KernelKey FusedBatchNormActOp::GetExpectedKernelType( if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::PreconditionNotMet( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::PreconditionNotMet( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Mean")->dtype()), - platform::errors::PreconditionNotMet( - "Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::PreconditionNotMet("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::PreconditionNotMet("Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), + phi::errors::PreconditionNotMet("Mean input should be of float type")); PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( ctx.Input("Variance")->dtype()), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Variance input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -197,7 +197,7 @@ void FusedBatchNormActOpMaker::Make() { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(epsilon) should be between 0.0 and 0.001, " "but received value is %f.", epsilon)); @@ -252,37 +252,37 @@ void FusedBatchNormActGradOp::InferShape( PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Scale"), true, - platform::errors::InvalidArgument("Input(Scale) should not be null.")); + phi::errors::InvalidArgument("Input(Scale) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Y")), true, - platform::errors::InvalidArgument("Input(Y@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("SavedMean"), - true, - platform::errors::InvalidArgument( - "Input(SavedMean) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("SavedVariance"), - true, - platform::errors::InvalidArgument( - "Input(SavedVariance) should not be null")); + phi::errors::InvalidArgument("Input(Y@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("SavedMean"), + true, + phi::errors::InvalidArgument("Input(SavedMean) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("SavedVariance"), + true, + phi::errors::InvalidArgument("Input(SavedVariance) should not be null")); // check output PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::InvalidArgument("Output(X@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Scale")), - true, - platform::errors::InvalidArgument( - "Output(Scale@GRAD) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Bias")), - true, - platform::errors::InvalidArgument( - "Output(Bias@GRAD) should not be null.")); + phi::errors::InvalidArgument("Output(X@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("Scale")), + true, + phi::errors::InvalidArgument("Output(Scale@GRAD) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("Bias")), + true, + phi::errors::InvalidArgument("Output(Bias@GRAD) should not be null.")); const auto x_dims = ctx->GetInputDim("X"); const int C = x_dims[x_dims.size() - 1]; @@ -297,8 +297,8 @@ phi::KernelKey FusedBatchNormActGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + PADDLE_THROW( + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -306,7 +306,7 @@ phi::KernelKey FusedBatchNormActGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + phi::errors::NotFound("Can not get the tensor value of Y@GRAD.")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index ac198e9cf2c25..ff903ee6ca716 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -51,35 +51,35 @@ void FusedBatchNormAddActOp::InferShape( const auto x_dims = ctx->GetInputDim("X"); const auto z_dims = ctx->GetInputDim("Z"); - PADDLE_ENFORCE_EQ(x_dims, - z_dims, - platform::errors::InvalidArgument( - "ShapeError: the shapes of input " - "must be equal. But received: the shape " - "of input X = [%s], and the shape of " - "input Y = [%s]", - x_dims, - z_dims)); + PADDLE_ENFORCE_EQ( + x_dims, + z_dims, + phi::errors::InvalidArgument("ShapeError: the shapes of input " + "must be equal. But received: the shape " + "of input X = [%s], and the shape of " + "input Y = [%s]", + x_dims, + z_dims)); PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument("ShapeError: the dimensions of input " - "must greater than or equal to 2." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); + phi::errors::InvalidArgument("ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); PADDLE_ENFORCE_LE( x_dims.size(), 5, - platform::errors::InvalidArgument("ShapeError: the dimensions of input " - "must smaller than or equal to 5." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); + phi::errors::InvalidArgument("ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; @@ -89,7 +89,7 @@ void FusedBatchNormAddActOp::InferShape( PADDLE_ENFORCE_EQ( scale_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of scale must equal to 1." "But received: the shape of scale is [%s], the dimension " "of scale is [%d]", @@ -97,7 +97,7 @@ void FusedBatchNormAddActOp::InferShape( scale_dim.size())); PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of bias must equal to 1." "But received: the shape of bias is [%s],the dimension " "of bias is [%d]", @@ -113,14 +113,14 @@ void FusedBatchNormAddActOp::InferShape( if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of scale must equal to [%d]" "But received: the shape of scale is [%d]", C, scale_dim[0])); PADDLE_ENFORCE_EQ(bias_dim[0], C, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the shape of bias must equal to [%d]" "But received: the shape of bias is [%d]", C, @@ -145,12 +145,12 @@ phi::KernelKey FusedBatchNormAddActOp::GetExpectedKernelType( bn_param_type, framework::TransToProtoVarType( ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument("Scale input should be of float type")); + phi::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, framework::TransToProtoVarType( ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument("Bias input should be of float type")); + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -194,7 +194,7 @@ void FusedBatchNormAddActOpMaker::Make() { .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' should be between 0.0 and 0.001.")); }); AddAttr("act_type", "The activation type to be fused.") @@ -261,8 +261,8 @@ phi::KernelKey FusedBatchNormAddActGradOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { - PADDLE_THROW(platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + PADDLE_THROW( + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); } const phi::DenseTensor *t = nullptr; if (var->IsType()) { @@ -270,7 +270,7 @@ phi::KernelKey FusedBatchNormAddActGradOp::GetExpectedKernelType( } if (t == nullptr) { PADDLE_THROW( - platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + phi::errors::NotFound("Can not get the tensor value of Y@GRAD.")); } return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), diff --git a/paddle/fluid/operators/fused/fused_dropout_common.h b/paddle/fluid/operators/fused/fused_dropout_common.h index ccd099109487c..737909be4d8bf 100644 --- a/paddle/fluid/operators/fused/fused_dropout_common.h +++ b/paddle/fluid/operators/fused/fused_dropout_common.h @@ -22,9 +22,9 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/quant_dequant_kernel.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/functors.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h index 2b1f6b14c33e5..9e9a89015652b 100644 --- a/paddle/fluid/operators/fused/fused_dropout_helper.h +++ b/paddle/fluid/operators/fused/fused_dropout_helper.h @@ -288,7 +288,7 @@ class FusedDropoutHelper { quant_max_bound, quant_min_bound); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Currently only supports gelu or relu activation functions!")); } } @@ -332,7 +332,7 @@ class FusedDropoutHelper { d_bias, ctx); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Currently only supports gelu or relu activation functions!")); } } diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc index b11840866d46b..b17a6827af0e9 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc @@ -24,7 +24,7 @@ bool IsUnaryCompound(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -39,7 +39,7 @@ bool HasInPlaceUnary(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -55,7 +55,7 @@ bool InputXCanBeAbsent(const std::vector &functor_list) { PADDLE_ENFORCE_EQ( functor_list.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functor_list.size(), 2)); @@ -73,7 +73,7 @@ static bool IsSupportedCompound(const std::vector &functors) { PADDLE_ENFORCE_EQ( functors.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid functor list size %d, which should be equal to %d.", functors.size(), 2)); @@ -89,12 +89,12 @@ static bool IsSupportedCompound(const std::vector &functors) { } else if (binary_fun.count(functors[1])) { unary_fun_str = functors[0]; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "%s and %s are not included in fused_list.", functors[0], functors[1])); } PADDLE_ENFORCE_EQ(unary_fun.count(unary_fun_str), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "%s is not included in fused_list.", unary_fun_str)); return true; } @@ -107,17 +107,17 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of FusedElemwiseActivationOp op should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Y"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) of FusedElemwiseActivationOp op should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of FusedElemwiseActivationOp op should not be null.")); auto x_dim = ctx->GetInputDim("X"); @@ -134,7 +134,7 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasOutput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(IntermediateOut) of FusedElemwiseActivationOp " "should not be null.")); @@ -176,7 +176,7 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), ctx.Input("Y")->dtype(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The element's type of input should be the same.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); @@ -214,7 +214,7 @@ class FusedElemwiseActivationMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( IsSupportedCompound(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input functors should support compounding.")); }); @@ -317,10 +317,10 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@Grad) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@Grad) should not be null.")); auto functor_list = ctx->Attrs().Get>("functor_list"); @@ -328,14 +328,14 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { if (ctx->Attrs().Get("save_intermediate_out")) { PADDLE_ENFORCE_EQ(ctx->HasInput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(IntermediateOut) should not be null.")); } else { if (!InputXCanBeAbsent(functor_list)) { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } } @@ -353,7 +353,7 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when BinaryFunctor is elementwise_add, the 'X' " "could be absent.")); @@ -370,7 +370,7 @@ class FusedElemwiseActivationOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("Y"), true, - platform::errors::InvalidArgument("Input(Y) should not be null.")); + phi::errors::InvalidArgument("Input(Y) should not be null.")); ctx->SetOutputDim(y_grad_name, ctx->GetInputDim("Y")); ctx->ShareLoD("Y", y_grad_name); } @@ -414,7 +414,7 @@ class FusedElemwiseAddActivationOp : public FusedElemwiseActivationOp { PADDLE_ENFORCE_EQ( elemntwise_add_detected, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When the FusedElemwiseAddActivationOp Is used in fused pass, the " "elementwise_add Op must be" "detected and used, Please check the fuse pass pattern")); @@ -439,7 +439,7 @@ class FusedElemwiseAddActivationOpGrad : public FusedElemwiseActivationOpGrad { PADDLE_ENFORCE_EQ( elemntwise_add_grad_detected, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When the FusedElemwiseAddActivationOpGrad Is used in fused pass, " "the elementwise_add_grad Op must be" "detected and used, Please check the fuse pass pattern")); diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu index e7c436dd1fa0c..e712b78c42669 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cu @@ -23,14 +23,14 @@ PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation, ops::FusedElemwiseActivationKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_activation_grad, GPU, ALL_LAYOUT, ops::FusedElemwiseActivationGradKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, GPU, @@ -38,11 +38,11 @@ PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation, ops::FusedElemwiseAddActivationKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(fused_elemwise_add_activation_grad, GPU, ALL_LAYOUT, ops::FusedElemwiseAddActivationGradKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index ad7f79307e628..6c476afd340fa 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -415,8 +415,8 @@ static void RunFunctors(const framework::ExecutionContext &ctx, in_y, outputs); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "%s has not been implemented.", funcs_str)); + PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", + funcs_str)); } } @@ -611,8 +611,8 @@ static void RunGradFunctors(const framework::ExecutionContext &ctx, y_grad, d_intermediate_out); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "%s has not been implemented.", funcs_str)); + PADDLE_THROW(phi::errors::InvalidArgument("%s has not been implemented.", + funcs_str)); } } @@ -629,10 +629,10 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { "Y", "FusedElemwiseActivation"); - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::InvalidArgument( - "The output(Out) should not be empty")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::InvalidArgument("The output(Out) should not be empty")); auto output = ctx.Output("Out"); std::vector outputs; @@ -641,7 +641,7 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { if (ctx.Attr("save_intermediate_out")) { PADDLE_ENFORCE_EQ(ctx.HasOutput("IntermediateOut"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The save_intermediate_out is enable, so the " "IntermediateOut should not be empty.")); @@ -663,16 +663,16 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_y, nullptr, - platform::errors::InvalidArgument("Input(Y) should not be nullptr.")); + phi::errors::InvalidArgument("Input(Y) should not be nullptr.")); phi::DenseTensor *in_out = const_cast(ctx.Input("Out")); auto in_out_grad = ctx.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_NE(in_out_grad, - nullptr, - platform::errors::InvalidArgument( - "Input(Out@Grad) should not be nullptr.")); + PADDLE_ENFORCE_NE( + in_out_grad, + nullptr, + phi::errors::InvalidArgument("Input(Out@Grad) should not be nullptr.")); phi::DenseTensor *in_x = const_cast(ctx.Input("X")); @@ -695,7 +695,7 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { ctx.Input("IntermediateOut")); PADDLE_ENFORCE_NE(in_intermediate_out, nullptr, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The option of 'save_intermediate_out' is opened," " so the number of 'Out' should be two.")); } else { @@ -703,7 +703,7 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_x, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } } @@ -712,13 +712,13 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_x, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } else { // If functor_list contains elementwise_add, the backward doesn't use // in_x, in_y and in_out. PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when the compoundfunctor contains " "elementwise_add_grad, the 'X' could be absent.")); in_x = const_cast(in_out_grad); @@ -729,13 +729,13 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( in_out, nullptr, - platform::errors::InvalidArgument("Input(X) should not be null.")); + phi::errors::InvalidArgument("Input(X) should not be null.")); } else { // If functor_list contains elementwise_add, the backward doesn't use // in_x, in_y and in_out. PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only when the compoundfunctor contains " "elementwise_add_grad, the 'X' could be absent.")); in_out = const_cast(in_out_grad); diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index e69825fdd9076..e4c43e4e4efb2 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -50,12 +50,12 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Embeddings's rank should be 2, but received value is:%d.", table_dims.size())); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the 'Ids' tensor must be 1, but " "received value is:%d.", ids_dims[ids_rank - 1])); @@ -64,14 +64,14 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids)'s rank must be 2, but received value is:%d.", x_dims.size())); if (ctx->HasInput("H0")) { PADDLE_ENFORCE_EQ(ctx->HasInput("C0"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Cell) and Input(Hidden) of LSTM should exist " "at the same time.")); auto h_dims = ctx->GetInputDim("H0"); @@ -79,7 +79,7 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) " "should be the same, but received H0 dim is:[%s], C0 dim is[%s]", h_dims, @@ -91,19 +91,19 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( wh_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightH) should be 2, but received value is:%d.", wh_dims.size())); PADDLE_ENFORCE_EQ(wh_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightH) should equal to " "frame size:%d, but received value is:%d.", frame_size, wh_dims[0])); PADDLE_ENFORCE_EQ(wh_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WeightH) should equal " "to 4 * %d, but received value is:%d.", frame_size, @@ -113,19 +113,19 @@ void FusedEmbeddingFCLSTMOp::InferShape( PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received value is:%d.", b_dims.size())); - PADDLE_ENFORCE_EQ(b_dims[0], - 1, - platform::errors::InvalidArgument( - "The first dimension of Input(Bias) " - "should be 1, but received value is:%d.", - b_dims[0])); + PADDLE_ENFORCE_EQ( + b_dims[0], + 1, + phi::errors::InvalidArgument("The first dimension of Input(Bias) " + "should be 1, but received value is:%d.", + b_dims[0])); PADDLE_ENFORCE_EQ( b_dims[1], (ctx->Attrs().Get("use_peepholes") ? 7 : 4) * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "7 * %d if enable peepholes connection or" "4 * %d if disable peepholes, bias dim is:%d, use_peepholes:%d", @@ -417,11 +417,11 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], row_number, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should less than dict size %d.", i, row_number)); PADDLE_ENFORCE_GE(ids_data[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should greater than ZERO.", i)); memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, @@ -530,11 +530,11 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], row_number, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should less than dict size %d.", i, row_number)); PADDLE_ENFORCE_GE(ids_data[i], 0, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Value of Ids %d should greater than ZERO.", i)); memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index a0ee64bd2eced..4a7691bd33844 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -37,21 +37,21 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dim size of the input tensor 'W' should be 2. " "But received W's size = %d.", table_dims.size())); PADDLE_ENFORCE_EQ( ids_dims[ids_dims.size() - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of the input tensor 'Ids' should be 1. " "But received Ids's size in the last dimension = %d.", ids_dims[ids_dims.size() - 1])); // we only support sum now PADDLE_ENFORCE_EQ(combiner, "sum", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "The pooling type of sequence_pool only support sum " "now. So the 'combiner' must be 'sum'.")); @@ -61,7 +61,7 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { PADDLE_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Ids")[0]); PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "In compile time, the LoD Level of Ids should be 1. " "But received the LoD Level of Ids = %d.", ids_desc->GetLoDLevel())); diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index e0186d99acb03..2a9a1e71dbd2b 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -95,7 +95,7 @@ struct EmbeddingVSumFunctor { PADDLE_ENFORCE_LE(table_width * idx_width, out_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "table_width * idx_width should be less than or " "equal to out_width. But received " "table_width * idx_width = %s, out_width = %d.", @@ -103,7 +103,7 @@ struct EmbeddingVSumFunctor { out_width)); PADDLE_ENFORCE_GT(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor ids's LoD[0] should be greater than 1. " "But received the ids's LoD[0] = %d.", ids_lod.size())); @@ -152,7 +152,7 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel { // in run time, the LoD of ids must be 1 PADDLE_ENFORCE_EQ(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(Ids) should be 1. But " "received Ids's LoD level = %d.", ids_lod.size())); @@ -236,7 +236,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto *table_t = context.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::PermissionDenied( + PADDLE_THROW(phi::errors::PermissionDenied( "The parameter W of a LookupTable " "must be either phi::DenseTensor or SelectedRows.")); } @@ -293,7 +293,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { const auto &ids_lod = ids->lod(); PADDLE_ENFORCE_EQ(ids_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The LoD level of Input(Ids) should be 1. But " "received Ids's LoD level = %d.", ids_lod.size())); diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index f6343f5bd1cbf..5956ea5a839a7 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -79,8 +79,8 @@ class FusedFeedForwardOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( mat_dim_x.width_, static_cast(1), - platform::errors::InvalidArgument("Product from the X shape[1] to " - "shape[n-1] must be larger than 1!")); + phi::errors::InvalidArgument("Product from the X shape[1] to " + "shape[n-1] must be larger than 1!")); auto dim_Linear1Weight = context->GetInputDim("Linear1Weight"); auto tmp_dim_x = dim_x; tmp_dim_x[dim_x.size() - 1] = @@ -190,7 +190,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout1_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout2_rate", "the dropout rate of second dropout") @@ -199,7 +199,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout2_rate' must be between 0.0 and 1.0.")); }); AddAttr("dropout1_implementation", @@ -209,7 +209,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout1_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -220,7 +220,7 @@ class FusedFeedForwardOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout2_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -266,7 +266,7 @@ class FusedFeedForwardOpGrad : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "GradOp is only callable when is_test is false")); bool pre_layer_norm = ctx->Attrs().Get("pre_layer_norm"); OP_INOUT_CHECK(ctx->HasInput("Dropout1Mask"), diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h index 69fbca0f9be0f..cc1d0de18ada1 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention.h +++ b/paddle/fluid/operators/fused/fused_gate_attention.h @@ -156,8 +156,8 @@ struct GateAttentionConfig { if (merge_qkv) { PADDLE_ENFORCE_NOT_NULL( qkv_weight, - platform::errors::NotFound("The input qkv_weight can not be nullptr " - "when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_weight can not be nullptr " + "when merge_qkv is true.")); // When q_dim == kv_dim, QKV matmul can be computed merged. // qkv_weight: shape=[3, num_heads, head_dim, q_dim] @@ -172,12 +172,12 @@ struct GateAttentionConfig { } else { PADDLE_ENFORCE_NOT_NULL( key, - platform::errors::NotFound( + phi::errors::NotFound( "The input key can not be nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( query_weight, - platform::errors::NotFound("The input query_weight can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input query_weight can not be " + "nullptr when merge_qkv is false.")); // When q_dim != kv_dim, QKV matmul must be computed saparately. // key: shape=[batch_size, seq_len_m, m_size, kv_dim] @@ -414,8 +414,8 @@ class FMHAGateRef { // qkv_transpose_out = transpose(qkv_out) PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); phi::DenseTensor* qkv_out = config->GetQKVOut(); ComputeQKVTransposeForward(*qkv_out, qkv_transpose_out); @@ -429,16 +429,16 @@ class FMHAGateRef { } else { PADDLE_ENFORCE_NOT_NULL( q_transpose_out, - platform::errors::NotFound("The input q_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input q_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( k_transpose_out, - platform::errors::NotFound("The input k_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input k_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( v_transpose_out, - platform::errors::NotFound("The input v_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input v_transpose_out can not be " + "nullptr when merge_qkv is false.")); phi::DenseTensor* query_out = config->GetQueryOut(); phi::DenseTensor* key_out = config->GetKeyOut(); @@ -544,8 +544,8 @@ class FMHAGateRef { if (merge_qkv_) { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); int64_t q_size = config->GetQuerySize(); q_ptr = qkv_transpose_out->data(); @@ -562,16 +562,16 @@ class FMHAGateRef { } else { PADDLE_ENFORCE_NOT_NULL( q_transpose_out, - platform::errors::NotFound("The input q_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input q_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( k_transpose_out, - platform::errors::NotFound("The input k_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input k_transpose_out can not be " + "nullptr when merge_qkv is false.")); PADDLE_ENFORCE_NOT_NULL( v_transpose_out, - platform::errors::NotFound("The input v_transpose_out can not be " - "nullptr when merge_qkv is false.")); + phi::errors::NotFound("The input v_transpose_out can not be " + "nullptr when merge_qkv is false.")); q_ptr = q_transpose_out->data(); k_ptr = k_transpose_out->data(); @@ -787,11 +787,11 @@ class FMHAGateRef { phi::DenseTensor* nonbatched_bias_grad) { PADDLE_ENFORCE_NOT_NULL( qk_out_grad, - platform::errors::NotFound("The qk_out_grad can not be nullptr.")); + phi::errors::NotFound("The qk_out_grad can not be nullptr.")); PADDLE_ENFORCE_EQ(qk_out_grad->dims(), softmax_out->dims(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of qk_out_grad and softmax_out is " "expected to be the same. But received qk_out_grad's " "shape = %s, softmax_out's shape = %s.", @@ -800,7 +800,7 @@ class FMHAGateRef { PADDLE_ENFORCE_EQ(src_mask_grad, nullptr, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "src_mask_grad is expected to be nullptr.")); phi::SoftmaxBackwardCUDAKernelDriver( @@ -874,8 +874,8 @@ class FlashAttnWithGating { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be " - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be " + "nullptr when merge_qkv is true.")); // 1. Transpose qkv_out for flash_attn. phi::DenseTensor* qkv_out = config->GetQKVOut(); @@ -989,8 +989,8 @@ class FlashAttnWithGating { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, - platform::errors::NotFound("The input qkv_transpose_out can not be" - "nullptr when merge_qkv is true.")); + phi::errors::NotFound("The input qkv_transpose_out can not be" + "nullptr when merge_qkv is true.")); int64_t q_size = config->GetQuerySize(); const T* q_ptr = qkv_transpose_out->data(); diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index d066086bd6ae0..78202f70bcffb 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -401,7 +401,7 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( !key || query == key || query->data() == key->data(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "key is expected to be nullptr or the same as " "query, but received key=%p, query=%p.", key, @@ -623,14 +623,14 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, ALL_LAYOUT, ops::FusedGateAttentionOpKernel, float, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, GPU, ALL_LAYOUT, ops::FusedGateAttentionGradKernel, float, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} #else PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, @@ -639,7 +639,7 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention, ops::FusedGateAttentionOpKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, GPU, @@ -647,6 +647,6 @@ PD_REGISTER_STRUCT_KERNEL(fused_gate_attention_grad, ops::FusedGateAttentionGradKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} #endif diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index a6bd467dc1992..157ab69afc943 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -466,12 +466,11 @@ struct FusedLayernormResidualDropoutBiasFunctor { } }; -template struct FusedLayernormResidualDropoutBiasFunctor< - paddle::platform::float16, - uint8_t, - 8, - float, - false>; +template struct FusedLayernormResidualDropoutBiasFunctor; /* * @brief layernorm(residual + dropout(x)); @@ -872,7 +871,7 @@ void LaunchLayernormResidualDropoutBias( epsilon, cols)); default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Product from begin_norm_axis to end must be larger than 1")); break; } @@ -1037,7 +1036,7 @@ void LaunchLayernormResidualDropoutBias( switch (cols) { LAUNCH_FUSED_FAST_LN_KERNEL; default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Only when column is equal to 768/1024/4096 is supported for " "now")); break; diff --git a/paddle/fluid/operators/fused/fused_matmul_op.cc b/paddle/fluid/operators/fused/fused_matmul_op.cc index 129f7e85386e7..93d79d677f8a5 100644 --- a/paddle/fluid/operators/fused/fused_matmul_op.cc +++ b/paddle/fluid/operators/fused/fused_matmul_op.cc @@ -176,47 +176,47 @@ class FusedMatmulOpMaker : public framework::OpProtoAndCheckerMaker { protected: void Apply() { AddInput("ResidualData", - "Extra input from matmul_elementwise_add_mkldnn_fuse_pass") + "Extra input from matmul_elementwise_add_onednn_fuse_pass") .AsDispensable() .AsExtra(); AddAttr("matmul_alpha", "Output scale used in matmul_v1") .SetDefault(1.0f); AddAttr( "fuse_activation", - "Activation type from matmul_activation_mkldnn_fuse_pass") + "Activation type from matmul_activation_onednn_fuse_pass") .SetDefault(""); AddAttr("fuse_alpha", - "Activation alpha from matmul_activation_mkldnn_fuse_pass") + "Activation alpha from matmul_activation_onednn_fuse_pass") .SetDefault(0.0f); AddAttr("fuse_beta", - "Activation beta from matmul_activation_mkldnn_fuse_pass") + "Activation beta from matmul_activation_onednn_fuse_pass") .SetDefault(0.0f); AddAttr("fused_output_scale", "Output scale from operator_scale_onednn_fuse_pass") .SetDefault(1.0f); AddAttr>("fused_reshape_X", "Reshape's shape attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_X", "Transpose's axis attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_reshape_Y", "Reshape's shape attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_Y", "Transpose's axis attribute from " - "reshape_transpose_matmul_mkldnn_fuse_pass") + "reshape_transpose_matmul_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_reshape_Out", "Reshape's shape attribute from " - "matmul_transpose_reshape_mkldnn_fuse_pass") + "matmul_transpose_reshape_onednn_fuse_pass") .SetDefault({}); AddAttr>("fused_transpose_Out", "Transpose's axis attribute from " - "matmul_transpose_reshape_mkldnn_fuse_pass") + "matmul_transpose_reshape_onednn_fuse_pass") .SetDefault({}); AddAttr("mkldnn_data_type", "oneDNN operator data type") .SetDefault("float32") diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc index 562ddf7ae6c4e..dcea415e32508 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc @@ -72,23 +72,23 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); - PADDLE_ENFORCE_EQ(y_dim.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of qkv_weight must be 4" - "(3, num_head, dim_head, dim_embed)," - "but received dimensions of" - "Input is [%d]", - y_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); + PADDLE_ENFORCE_EQ( + y_dim.size(), + 4, + phi::errors::InvalidArgument("The dimensions of qkv_weight must be 4" + "(3, num_head, dim_head, dim_embed)," + "but received dimensions of" + "Input is [%d]", + y_dim.size())); PADDLE_ENFORCE_EQ( x_dim[2], trans_qkvw ? y_dim[3] : y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is " "true) or y_dim[0](trans_qkvw is false)" "must be equal. But received: the shape " @@ -101,7 +101,7 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { if (trans_qkvw) { PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2], y_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(3, num_head, dim_head, dim_embed)," "and must satisfy the limitations: " @@ -110,7 +110,7 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(y_dim[2] * y_dim[3], y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" "(dim_embed, 3, num_head, dim_head)," "and must satisfy the limitations: " @@ -126,23 +126,23 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], trans_qkvw ? y_dim[1] : y_dim[2], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", trans_qkvw ? y_dim[1] : y_dim[2], @@ -150,12 +150,12 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( c_dim[3], 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The forth dim of CacheKV must be greater than 0, but got %d", c_dim[3])); // cache_seq_len PADDLE_ENFORCE_EQ(c_dim[4], trans_qkvw ? y_dim[2] : y_dim[3], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", trans_qkvw ? y_dim[2] : y_dim[3], @@ -273,7 +273,7 @@ class FusedMultiTransformerINT8OpMaker .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -284,7 +284,7 @@ class FusedMultiTransformerINT8OpMaker .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); @@ -301,7 +301,7 @@ class FusedMultiTransformerINT8OpMaker PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu index a76e93f5cdcf5..5893024c0e958 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu @@ -106,19 +106,19 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { if (time_step) { PADDLE_ENFORCE_EQ(time_step->place(), platform::CPUPlace(), - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The place of input(TimeStep) must be CPUPlace.")); // cache_seq_len int time_step_value = time_step->data()[0]; PADDLE_ENFORCE_GT(time_step_value, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The value of time_step must > 0, but now is %d", time_step_value)); PADDLE_ENFORCE_EQ( seq_len, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "In decode stage, the seq_len of input must be 1, but now is %d", seq_len)); out_seq_len += time_step_value; @@ -668,4 +668,4 @@ PD_REGISTER_STRUCT_KERNEL(fused_multi_transformer_int8, ALL_LAYOUT, ops::FusedMultiTransformerINT8OpKernel, float, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index a8bd90c7da5d4..dc90eaa3e5306 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -66,23 +66,23 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); - PADDLE_ENFORCE_EQ(y_dim.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of qkv_weight must be 4" - "(3, num_head, dim_head, dim_embed)," - "but received dimensions of" - "Input is [%d]", - y_dim.size())); + phi::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); + PADDLE_ENFORCE_EQ( + y_dim.size(), + 4, + phi::errors::InvalidArgument("The dimensions of qkv_weight must be 4" + "(3, num_head, dim_head, dim_embed)," + "but received dimensions of" + "Input is [%d]", + y_dim.size())); PADDLE_ENFORCE_EQ( x_dim[2], trans_qkvw ? y_dim[3] : y_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: the dimension of x_dim[2] and y_dim[3](trans_qkvw is " "true) or y_dim[0](trans_qkvw is false)" "must be equal. But received: the shape " @@ -99,30 +99,30 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( c_dim.size(), 5, - paddle::platform::errors::InvalidArgument( - "The CacheKV must be 5 dims, but got %d", c_dim.size())); + phi::errors::InvalidArgument("The CacheKV must be 5 dims, but got %d", + c_dim.size())); PADDLE_ENFORCE_EQ(c_dim[0], 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of CacheKV must be 2, but got %d", c_dim[0])); // 2 PADDLE_ENFORCE_EQ(c_dim[1], x_dim[0], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of CacheKV must be equal with " "batch size %d, but got %d", x_dim[0], c_dim[1])); // batch_size PADDLE_ENFORCE_EQ(c_dim[2], trans_qkvw ? y_dim[1] : y_dim[2], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of CacheKV must be equal with num " "head %d, but got %d", trans_qkvw ? y_dim[1] : y_dim[2], c_dim[2])); // num_head PADDLE_ENFORCE_EQ(c_dim[4], trans_qkvw ? y_dim[2] : y_dim[3], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fifth dim of CacheKV must be equal with head " "size %d, but got %d", trans_qkvw ? y_dim[2] : y_dim[3], @@ -223,7 +223,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( rotary_emb_dims >= 0 && rotary_emb_dims <= 2, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'rotary_emb_dims' in Op(Rotray) should be between" "0 and 2, But received [%s].", rotary_emb_dims)); @@ -234,7 +234,7 @@ class FusedMultiTransformerOpOpMaker .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'epsilon' in Op(LayerNorm) should be between" "0.0 and 0.001, But received [%s].", epsilon)); @@ -245,7 +245,7 @@ class FusedMultiTransformerOpOpMaker .AddCustomChecker([](const float &drop_p) { PADDLE_ENFORCE_EQ(drop_p >= 0.0f && drop_p <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'dropout_rate' must be between 0.0 and 1.0.")); }); @@ -262,7 +262,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( type == "downgrade_in_infer" || type == "upscale_in_train", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "dropout_implementation can only be downgrade_in_infer or " "upscale_in_train")); }); @@ -272,7 +272,7 @@ class FusedMultiTransformerOpOpMaker PADDLE_ENFORCE_EQ( act_type == "gelu" || act_type == "relu" || act_type == "none", true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support `gelu`, `relu`, `none` activation in " "FusedMultiTransformer. ")); }); diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h index 415a6ba1ffdf3..4bf467e9caf8f 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h @@ -131,7 +131,7 @@ static void AllReduce(phi::DenseTensor &tensor, // NOLINT namespace { // NOLINT namespace plat = paddle::platform; -using float16 = plat::float16; +using float16 = phi::dtype::float16; #define MMHA_USE_FP32_ACUM_FOR_LOGITS #define MMHA_USE_FP32_ACUM_FOR_OUT @@ -746,9 +746,9 @@ inline __device__ void convert_from_float(float4 &dst, float4 src) { // NOLINT dst = src; } -inline __device__ void convert_from_float(plat::float16 &dst, // NOLINT +inline __device__ void convert_from_float(phi::dtype::float16 &dst, // NOLINT float src) { - dst = static_cast(src); + dst = static_cast(src); } inline __device__ void convert_from_float(uint4 &dst, Float8_ src) { // NOLINT diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc index 3dbba2bf42ce4..93ac8f4e220c9 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc @@ -25,24 +25,23 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusedSeqpoolCVMOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of FusedSeqpoolCVMOp should not be empty.")); auto cvm_dims = ctx->GetInputDim("CVM"); PADDLE_ENFORCE_EQ( cvm_dims.size(), 2UL, - platform::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); - PADDLE_ENFORCE_EQ( - cvm_dims[1], - 2UL, - platform::errors::InvalidArgument("The 2nd dimension of " - "Input(CVM) should be 2.")); + phi::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); + PADDLE_ENFORCE_EQ(cvm_dims[1], + 2UL, + phi::errors::InvalidArgument("The 2nd dimension of " + "Input(CVM) should be 2.")); auto ins_dims = ctx->GetInputsDim("X"); const int cvm_offset = ctx->Attrs().Get("cvm_offset"); @@ -53,7 +52,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT(num_inputs, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensors count should be greater than 0, " "but received value is %d.", num_inputs)); @@ -62,7 +61,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be equal to 2, " "but received value is %d.", ins_dims[0].size())); @@ -88,7 +87,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please check, last batch_size is %d, current " "batch_size is %d", @@ -111,7 +110,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( dims[rank - 1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension(embedding) of the " "'X' tensor must be larger than 2.", i)); @@ -145,7 +144,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE_EQ(flag, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "All Inputs of fused_seqpool_cvm OP are Empty!")); return phi::KernelKey(input_data_type, ctx.GetPlace()); // return phi::KernelKey(framework::proto::VarType::FP32, @@ -201,13 +200,13 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cvm_dims.size(), 2, - platform::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); + phi::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); for (size_t i = 0; i < og_dims.size(); i++) { PADDLE_ENFORCE_EQ( og_dims[i].size(), x_dims[i].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of output grad must equal to Input(X). But " "received: input rank %u, input shape [%s].", og_dims[i].size(), @@ -217,7 +216,7 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( o_dim, x_dims[i][og_dims[i].size() - 1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " @@ -230,7 +229,7 @@ class FusedSeqpoolCVMGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( og_dims[i][og_dims[i].size() - 1], x_dims[i][og_dims[i].size() - 1] - cvm_offset, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index 362860aa23bdf..df00c74a30237 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -463,7 +463,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please cheack, last batchsize is %d, current " "batchsize is %d", @@ -550,7 +550,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input should be same, " "please cheack, last batchsize is %d, current " "batchsize is %d", diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h index dcc76bbf95254..bd4475da0b8ea 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h @@ -27,7 +27,7 @@ template class FusedSeqpoolCVMOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented CPU kernel for FusedSeqpoolCVMOp, only support GPU " "now.")); } @@ -37,7 +37,7 @@ template class FusedSeqpoolCVMGradOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unimplemented CPU kernel for FusedSeqpoolCVMGradOp, only support GPU " "now.")); } diff --git a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h index 12e511fe3aef9..cb3292a60ebd2 100644 --- a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h +++ b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h @@ -123,9 +123,10 @@ __global__ void FusedSoftmaxMaskVecKernel(T* dst, // #define SELECT_SOFTMAX_MASK_KERNEL(ELEMENTS) \ // do { \ // if (sizeof(T) == 2 && seq_len % 8 == 0) { \ -// FusedSoftmaxMaskVecKernel \ +// FusedSoftmaxMaskVecKernel \ // <<>>( \ -// (plat::float16*)dst, (const plat::float16*)src, mask, seq_len); \ +// (phi::dtype::float16*)dst, (const phi::dtype::float16*)src, mask, +// seq_len); \ // } \ // else if (seq_len % 4 == 0) SOFTMAX_MASK_KERNEL(4, ELEMENTS); \ // else if (seq_len % 2 == 0) SOFTMAX_MASK_KERNEL(2, ELEMENTS); \ @@ -159,9 +160,9 @@ void LaunchFusedSoftmaxMaskKernel(const T* src, PADDLE_ENFORCE_EQ( seq_len > 0 && seq_len <= 4096, true, - platform::errors::InvalidArgument("seq_len must be between (0, 4096] " - "received the seq_len is %d", - seq_len)); + phi::errors::InvalidArgument("seq_len must be between (0, 4096] " + "received the seq_len is %d", + seq_len)); constexpr int block_size = 128; constexpr int warp_size = 32; @@ -196,7 +197,7 @@ void LaunchFusedSoftmaxMaskKernel(const T* src, CASE_SOFTMAX_MASK_KERNEL(64); // <=2048 CASE_SOFTMAX_MASK_KERNEL(128); // <=4096 default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "seq_len must be between (0, 4096], received the seq_len is %d", seq_len)); } diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc deleted file mode 100644 index 41a9299f7258c..0000000000000 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/backends/gpu/cuda/cudnn_workspace_helper.h" - -namespace paddle { -namespace operators { - -class ConvInceptionFusionOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - // 1 x - auto in_dims = ctx->GetInputDim("Input"); - // 4 filters - auto w_dims = ctx->GetInputsDim("Filter"); - - PADDLE_ENFORCE_EQ( - in_dims.size(), - 4, - platform::errors::InvalidArgument("Conv intput should be 4-D tensor.")); - PADDLE_ENFORCE_EQ( - w_dims.size(), - 4, - platform::errors::InvalidArgument("There should be 4 filters.")); - PADDLE_ENFORCE_EQ(w_dims[0][1], - in_dims[1], - platform::errors::InvalidArgument( - "Invalid filter channel number %d, which should be " - "equal to input channel number %d.", - w_dims[0][1], - in_dims[1])); - PADDLE_ENFORCE_EQ(w_dims[1][1], - in_dims[1], - platform::errors::InvalidArgument( - "Invalid filter channel number %d, which should be " - "equal to input channel number %d.", - w_dims[1][1], - in_dims[1])); - - int n = in_dims[0]; - // compute output channel - // 1st channel - int c = w_dims[0][0]; - // add 2nd channel - c += (w_dims[1][0] - w_dims[2][1] * 2); - // add 3rd channel - c += (w_dims[2][0] - w_dims[3][1]); - // add 4-th channel - c += w_dims[3][0]; - - int h = in_dims[2]; - int w = in_dims[3]; - - ctx->SetOutputDim("Output", {n, c, h, w}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"), - ctx.GetPlace()); - } -}; - -class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker { - protected: - void Make() override { - AddInput("Input", "(Tensor) NCHW layout."); - AddInput("Filter", "(vector) 4 aggregated filters").AsDuplicable(); - AddInput("Bias", "(vector) it's length is equal to Filter") - .AsDuplicable(); - AddOutput("Output", - "(Tensor) The output tensor of convolution operator. " - "The format of output tensor is also NCHW."); - AddOutput("TempOutput", "").AsDuplicable(); - AddAttr( - "pooling_type", - "(string), pooling type, can be \"max\" for max-pooling " - "and \"avg\" for average-pooling.") - .InEnum({"max", "avg"}); - AddAttr( - "exclusive", - "(bool, default True) When true, will exclude the zero-padding in the " - "averaging calculating, otherwise, include the zero-padding. Note, it " - "is only used when pooling_type is avg. The default is True.") - .SetDefault(true); - AddAttr( - "activation", - "The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' " - "'relux' , 'tanh', 'band_pass'") - .SetDefault("relu"); - AddAttr("workspace_size_MB", - "Only used in cudnn kernel. Need set use_cudnn to true." - "workspace size for cudnn, in MB, " - "workspace is a section of GPU memory which will be " - "allocated/freed each time the operator runs, larger " - "workspace size can increase performance but also requires " - "better hardware. This size should be chosen carefully.") - .SetDefault(phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()); - AddComment(R"DOC( -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - conv2d_inception_fusion, - ops::ConvInceptionFusionOp, - ops::ConvInceptionFusionOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu deleted file mode 100644 index 63f065e0fef49..0000000000000 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ /dev/null @@ -1,350 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/phi/kernels/gpudnn/conv_gpudnn_info.h" - -namespace paddle { -namespace operators { - -#if CUDNN_VERSION >= 7100 -using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; -using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; -using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; -using ScopedActivationDescriptor = platform::ScopedActivationDescriptor; -using DataLayout = platform::DataLayout; - -using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor; -using PoolingMode = platform::PoolingMode; -template -using ScalingParamType = typename platform::CudnnDataType::ScalingParamType; - -template -using CudnnDataType = platform::CudnnDataType; - -template -class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { -#if CUDNN_VERSION < 7100 - PADDLE_THROW(phi::errors::Unimplemented( - "The conv2d_inception_fusion operator is not supported on GPU " - "when CUDNN version < 7.1.0")); -#endif - auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("Input"); - auto filters = ctx.MultiInput("Filter"); - auto bias = ctx.MultiInput("Bias"); - - auto* output = ctx.Output("Output"); - auto temp_outs = ctx.MultiOutput("TempOutput"); - - const std::string pool_type = ctx.Attr("pooling_type"); - const std::string activation = ctx.Attr("activation"); - const bool exclusive = ctx.Attr("exclusive"); - - int64_t user_workspace_size = - static_cast(ctx.Attr("workspace_size_MB")); - - const T* input_data = input->data(); - T* output_data = dev_ctx.Alloc(output, output->numel() * sizeof(T)); - temp_outs[0]->Resize(input->dims()); - T* temp_data = - dev_ctx.Alloc(temp_outs[0], temp_outs[0]->numel() * sizeof(T)); - - DataLayout layout = DataLayout::kNCHW; - std::vector in_dim = common::vectorize(input->dims()); - - // ------------------- cudnn descriptors --------------------- - PoolingMode pooling_mode; - if (pool_type == "max") { - pooling_mode = PoolingMode::kMaximum; - } else { - pooling_mode = exclusive ? PoolingMode::kAverageExclusive - : (PoolingMode::kAverageInclusive); - } - std::vector k0x0 = {0, 0}; - std::vector k1x1 = {1, 1}; - std::vector k1x1_2 = {1, 1}; - std::vector k3x3 = {3, 3}; - ScopedPoolingDescriptor pool_desc; - ScopedActivationDescriptor act_desc; - ScopedTensorDescriptor out_pool_desc; - ScopedTensorDescriptor input_desc; - cudnnPoolingDescriptor_t cudnn_pool_desc = - pool_desc.descriptor(pooling_mode, k3x3, k1x1, k1x1); - - cudnnTensorDescriptor_t cudnn_input_desc = - input_desc.descriptor(layout, common::vectorize(input->dims())); - cudnnTensorDescriptor_t pool_out_desc = out_pool_desc.descriptor( - layout, common::vectorize(input->dims())); - - cudnnDataType_t cudnn_dtype = CudnnDataType::type; - cudnnTensorDescriptor_t* out_desc = new cudnnTensorDescriptor_t[4]; - cudnnFilterDescriptor_t* filter_desc = new cudnnFilterDescriptor_t[4]; - cudnnTensorDescriptor_t* bias_desc = new cudnnTensorDescriptor_t[4]; - cudnnTensorDescriptor_t* in_desc = new cudnnTensorDescriptor_t[4]; - cudnnConvolutionDescriptor_t* conv_desc = - new cudnnConvolutionDescriptor_t[4]; - for (int i = 0; i < 4; ++i) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateFilterDescriptor(&filter_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&bias_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&in_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&out_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateConvolutionDescriptor(&conv_desc[i])); - } - - std::vector> filter_dims; - std::vector> bias_dims; - std::vector> in_dims; - std::vector> out_dims; - std::vector> in_strides; - std::vector> out_strides; - std::vector> bias_strides; - - cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW; - int n = in_dim[0]; - int h = in_dim[2]; - int w = in_dim[3]; - int oc = output->dims()[1]; - - cudnnDataType_t compute_type = (cudnn_dtype == CUDNN_DATA_DOUBLE) - ? CUDNN_DATA_DOUBLE - : CUDNN_DATA_FLOAT; - - for (int i = 0; i < 4; ++i) { - filter_dims.push_back(common::vectorize(filters[i]->dims())); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetFilterNdDescriptor( - filter_desc[i], cudnn_dtype, format, 4, filter_dims[i].data())); - bias_dims.push_back({1, filter_dims[i][0], 1, 1}); - bias_strides.push_back({filter_dims[i][0], 1, 1, 1}); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - bias_desc[i], - cudnn_dtype, - 4, - bias_dims[i].data(), - bias_strides[i].data())); - in_dims.push_back({n, filter_dims[i][1], h, w}); - out_dims.push_back({n, filter_dims[i][0], h, w}); - in_strides.push_back({filter_dims[i][1] * h * w, h * w, w, 1}); - out_strides.push_back({oc * h * w, h * w, w, 1}); - - if (i < 2) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionNdDescriptor( - conv_desc[i], - 2, - k0x0.data(), - k1x1.data(), - k1x1.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); - } else { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionNdDescriptor( - conv_desc[i], - 2, - k1x1.data(), - k1x1.data(), - k1x1.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); - } - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( - conv_desc[i], CUDNN_DEFAULT_MATH)); -#if CUDA_VERSION >= 11000 && CUDNN_VERSION >= 8000 - if (!phi::allow_tf32_cudnn) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionMathType(conv_desc[i], - CUDNN_FMA_MATH)); - } -#endif // CUDA_VERSION >= 11000 && CUDNN_VERSION >= 8000 - } - in_dims[2][1] *= 2; - in_strides[2][0] = oc * h * w; - out_strides[2][0] = filter_dims[2][0] * h * w; // this out is continuous. - in_strides[3][0] = filter_dims[2][0] * h * w; - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionGroupCount(conv_desc[2], 2)); - - cudnnConvolutionFwdAlgo_t algo[4]; - auto handle = dev_ctx.cudnn_handle(); - size_t workspace_size_in_bytes = 0; // final workspace to allocate. - - size_t workspace_size_limit = 0; - if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { - int64_t max_user_size = - std::min(static_cast(FLAGS_conv_workspace_size_limit), - user_workspace_size); - workspace_size_limit = max_user_size * 1024 * 1024; - } - - for (int i = 0; i < 4; ++i) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - in_desc[i], cudnn_dtype, 4, in_dims[i].data(), in_strides[i].data())); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetTensorNdDescriptor(out_desc[i], - cudnn_dtype, - 4, - out_dims[i].data(), - out_strides[i].data())); - - int perf_count; - int best_algo_idx = 0; - size_t tmp_size = 0; - std::unique_ptr perf_results( - new cudnnConvolutionFwdAlgoPerf_t[phi::kNUM_CUDNN_FWD_ALGS]); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnGetConvolutionForwardAlgorithm_v7( - handle, - in_desc[i], - filter_desc[i], - conv_desc[i], - out_desc[i], - phi::kNUM_CUDNN_FWD_ALGS, - &perf_count, - perf_results.get())); - algo[i] = (perf_results.get())[best_algo_idx].algo; - - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( - handle, - in_desc[i], - filter_desc[i], - conv_desc[i], - out_desc[i], - algo[i], - &tmp_size)); - - workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size); - } - cudnnActivationDescriptor_t cudnn_act_desc = - act_desc.descriptor(activation); - - int oc0 = filter_dims[0][0]; - int oc1 = filter_dims[1][0] - filter_dims[2][1] * 2; - int oc3 = filter_dims[3][0]; - int oc2 = oc - oc0 - oc1 - oc3; - - // branch1: pool + 1x1 conv - ScalingParamType alpha = 1.0f, beta = 0.0f; - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnPoolingForward(handle, - cudnn_pool_desc, - &alpha, - cudnn_input_desc, - input_data, - &beta, - pool_out_desc, - temp_data)); - - std::vector in_datas; - in_datas.push_back(static_cast(temp_data)); - in_datas.push_back(static_cast(input_data)); - in_datas.push_back( - static_cast(output_data + (oc0 + oc1) * h * w)); - temp_outs[1]->Resize(common::make_ddim(out_dims[2])); - T* temp2_data = - dev_ctx.Alloc(temp_outs[1], temp_outs[1]->numel() * sizeof(T)); - in_datas.push_back(static_cast(temp2_data + oc2 * h * w)); - - std::vector out_datas; - out_datas.push_back(static_cast(output_data)); - out_datas.push_back(static_cast(output_data + oc0 * h * w)); - out_datas.push_back(static_cast(temp2_data)); - out_datas.push_back( - static_cast(output_data + (oc0 + oc1 + oc2) * h * w)); - - for (int i = 0; i < 4; ++i) { - auto func = [&](void* cudnn_workspace) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnConvolutionBiasActivationForward( - handle, - &alpha, - in_desc[i], - in_datas[i], - filter_desc[i], - static_cast(filters[i]->data()), - conv_desc[i], - algo[i], - cudnn_workspace, - workspace_size_in_bytes, - &beta, - out_desc[i], - out_datas[i], - bias_desc[i], - static_cast(bias[i]->data()), - cudnn_act_desc, - out_desc[i], - out_datas[i])); - }; - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); - workspace_handle.RunFunc(func, workspace_size_in_bytes); - } - - cudnnTensorDescriptor_t x_desc; - cudnnTensorDescriptor_t y_desc; - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&x_desc)); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnCreateTensorDescriptor(&y_desc)); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - x_desc, cudnn_dtype, 4, out_dims[3].data(), out_strides[2].data())); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( - y_desc, cudnn_dtype, 4, out_dims[3].data(), out_strides[3].data())); - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnTransformTensor( - handle, - CudnnDataType::kOne(), - x_desc, - static_cast(out_datas[2]), - CudnnDataType::kZero(), - y_desc, - static_cast(output_data + (oc0 + oc1) * h * w))); - - for (int i = 0; i < 4; ++i) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(in_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(out_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyFilterDescriptor(filter_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(bias_desc[i])); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyConvolutionDescriptor(conv_desc[i])); - } - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(x_desc)); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnDestroyTensorDescriptor(y_desc)); - } -}; -#endif - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(conv2d_inception_fusion, - GPU, - ALL_LAYOUT, - ops::CUDNNConvInceptionFusionOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 1c3b37d12d689..dc3a223d745b3 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -36,7 +36,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank must be 2, but received x's rank " "is:%d, x dim is:[%s]", x_dims.size(), @@ -48,7 +48,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) should be " "same, but received h0 dims is:[%s], c0 dims is:[%s]", h_dims, @@ -58,14 +58,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto wx_dims = ctx->GetInputDim("WeightX"); PADDLE_ENFORCE_EQ(wx_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightX) should be 2, but received " "WeightX's rank is:%d, WeightX dim is:[%s]", wx_dims.size(), wx_dims)); PADDLE_ENFORCE_EQ(wx_dims[0], x_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightX) " "should equal to second dimension of Input(X), but " "received WeightX first dim is:%d, X second dim is:%d", @@ -77,14 +77,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(wh_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WeightH) should be 2, but received " "WeightH rank is:%d, WeightH dim is:[%s]", wh_dims.size(), wh_dims)); PADDLE_ENFORCE_EQ(wh_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WeightH) " "should equal to frame size, but received WeightH " "first dim is:%d, frame size is:%d.", @@ -93,7 +93,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(wh_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WeightH) " "should equal to 4 * frame_size, but received WeightH " "second dimension is:%d, frame size is:%d.", @@ -103,14 +103,14 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto b_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received " "Bias rank is:%d, Bias dim is:[%s]", b_dims.size(), b_dims)); PADDLE_ENFORCE_EQ(b_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Bias) should be 1, but " "received Bias's dimension is:[%s]", b_dims)); @@ -118,7 +118,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { if (ctx->Attrs().Get("use_peepholes")) { PADDLE_ENFORCE_EQ(b_dims[1], 7 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "7 * %d if enable peepholes connection, but received " "Bias dim is:[%s]", @@ -129,7 +129,7 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( b_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be " "4 * %d if disable peepholes, but received Bias dim is:[%s]", frame_size, diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc index b489f5e458bc1..725eb2682e1a2 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc @@ -26,24 +26,24 @@ void FusionSeqPoolConcatOp::InferShape( framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusionSeqPoolConcatOp should be greater " "than 1, but received value is %d.", ctx->Inputs("X").size())); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "FusionSeqPoolConcat"); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, - 1, - platform::errors::InvalidArgument( - "FusionSeqPoolConcatOp only supports concat " - "axis=1 yet, but received axis value is %d", - axis)); + PADDLE_ENFORCE_EQ( + axis, + 1, + phi::errors::InvalidArgument("FusionSeqPoolConcatOp only supports concat " + "axis=1 yet, but received axis value is %d", + axis)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); PADDLE_ENFORCE_GT(n, 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensors count should be greater than 0, " "but received value is %d.", n)); @@ -55,7 +55,7 @@ void FusionSeqPoolConcatOp::InferShape( // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be equal to 2, " "but received value is %d.", ins_dims[0].size())); @@ -116,7 +116,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { int w = static_cast(ins[0]->numel() / x0_dims[0]); PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of dims[1] should be dividable of w, but " "dims[1] is %d, w is %d.", y_dims[1], @@ -140,7 +140,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(ins[i]->numel() / x_dims[0]), w, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Width of all inputs should be equal, but the width of the %d-th " "input %d is not equal to the previous %d", i, @@ -149,7 +149,7 @@ class FusionSeqPoolConcatKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( x_lod.size(), bs + 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of all inputs should be equal, but the value of the " "%d-th %d is not equal to the previous %d.", i, diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index 5bcd4d2fbc75a..352d427b8ab91 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -27,33 +27,31 @@ void FusionSeqPoolCVMConcatOp::InferShape( PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty.")); PADDLE_ENFORCE( ctx->HasOutput("Out"), - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of FusionSeqPoolCVMConcatOp should not be null.")); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, - 1, - paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "concat axis=1 yet, but received %d.", - axis)); + PADDLE_ENFORCE_EQ( + axis, + 1, + phi::errors::InvalidArgument("FusionSeqPoolCVMConcatOp only supports " + "concat axis=1 yet, but received %d.", + axis)); bool use_cvm = ctx->Attrs().Get("use_cvm"); - PADDLE_ENFORCE_EQ(use_cvm, - true, - paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "use_cvm is true yet, but received %d.", - use_cvm)); + PADDLE_ENFORCE_EQ( + use_cvm, + true, + phi::errors::InvalidArgument("FusionSeqPoolCVMConcatOp only supports " + "use_cvm is true yet, but received %d.", + use_cvm)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, - 0UL, - paddle::platform::errors::InvalidArgument( - "Input tensors count should > 0.")); + PADDLE_ENFORCE_GT( + n, 0UL, phi::errors::InvalidArgument("Input tensors count should > 0.")); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } @@ -62,7 +60,7 @@ void FusionSeqPoolCVMConcatOp::InferShape( // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dims size of first input should be 2.")); ctx->SetOutputDim("Out", {-1, ins_dims[0][axis] * static_cast(n)}); } @@ -120,7 +118,7 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { int w = static_cast(ins[0]->numel() / x0_dims[0]); PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of dims[1] should be dividable of w")); phi::jit::seq_pool_attr_t attr(w, phi::jit::SeqPoolType::kSum); if (pooltype == "AVERAGE") { @@ -138,13 +136,13 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { auto x_lod = ins[i]->lod()[0]; const T* src = ins[i]->data(); T* dst = y_data + i * w; - PADDLE_ENFORCE_EQ(static_cast(ins[i]->numel() / x_dims[0]), - w, - paddle::platform::errors::InvalidArgument( - "Width of all inputs should be equal.")); + PADDLE_ENFORCE_EQ( + static_cast(ins[i]->numel() / x_dims[0]), + w, + phi::errors::InvalidArgument("Width of all inputs should be equal.")); PADDLE_ENFORCE_EQ(x_lod.size(), bs + 1, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Batchsize of all inputs should be equal.")); for (size_t j = 0; j < bs; ++j) { attr.h = static_cast(x_lod[j + 1] - x_lod[j]); diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc index df8e8c956a045..7011dfebb6719 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.cc +++ b/paddle/fluid/operators/fused/multi_gru_op.cc @@ -37,10 +37,10 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( x_mat_dims.size(), 2, - platform::errors::InvalidArgument("The size of input X dims should be 2, " - "or 3 with second dimension equal to " - "1, but now Input X dim is:[%s] ", - x_dims)); + phi::errors::InvalidArgument("The size of input X dims should be 2, " + "or 3 with second dimension equal to " + "1, but now Input X dim is:[%s] ", + x_dims)); auto layers = ctx->Attrs().Get("layers"); auto wx_dims = ctx->GetInputsDim("WeightX"); @@ -48,7 +48,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wx_dims[i][0], x_mat_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of flattened WeightX #%d" "should equal to last dimension of flattened input X, but " "received fattened WeightX dimension is:%d, flattened X dimension " @@ -62,7 +62,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { for (int i = 0; i < 2 * layers; ++i) { PADDLE_ENFORCE_EQ(wx_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of WeightX #%d should be 2, but received " "WeightX dim size is:%d, WeightX dim is:[%s] ", i, @@ -70,7 +70,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { wx_dims[i])); PADDLE_ENFORCE_EQ(wh_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of WeightH #%d should be 2, but received " "WeightH dim size is:%d, WeightH dim is:[%s] ", i, @@ -80,7 +80,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wh_dims[i][1], 3 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of WeightH #%d " "should equal to 3 * frame_size, but received WeightH's " "second dimension is: %d, frame size is:%d", @@ -90,7 +90,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( wx_dims[i][1], 3 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of WeightX #%d " "should equal to 3 * frame_size, but received WeightX's " "second dimension is: %d, frame size is:%d", @@ -105,7 +105,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { int frame_size = static_cast(wh_dims[i][0]); PADDLE_ENFORCE_EQ(b_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Bias #%d should be 2, but received " "Bias rank is:%d, Bias dim is:[%s]", i, @@ -113,7 +113,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { b_dims[i])); PADDLE_ENFORCE_EQ(b_dims[i][0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Bias #%d should be 1, but " "received Bias first dim is:%d, Bias dim is:[%s]", i, @@ -122,7 +122,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( b_dims[i][1], frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias #%d must be [1, frame_size * 3], but " "received bias dim is:[%s], frame size is:%d", i, diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc similarity index 95% rename from paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc rename to paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc index 2b92cb6f76663..68c73f7d3500b 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/onednn/fusion_lstm_onednn_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/fused/fusion_lstm_op.h" -#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" +#include "paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h" #include "paddle/phi/core/expect.h" namespace paddle { @@ -66,17 +66,17 @@ class LSTMMKLDNNHandler PADDLE_ENFORCE_EQ( ctx.Attr("gate_activation"), "sigmoid", - platform::errors::Unimplemented("oneDNN fusion_lstm supports only " - "sigmoid as a gate activation.")); + phi::errors::Unimplemented("oneDNN fusion_lstm supports only " + "sigmoid as a gate activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("cell_activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_lstm supports only tanh as a cell activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("candidate_activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_lstm supports only tanh a candidate activation.")); // Weights for int8 kernel are of a type s8 @@ -325,7 +325,7 @@ template class FusionLSTMMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const bool is_bf16 = std::is_same::value; + const bool is_bf16 = std::is_same::value; const bool force_fp32_output = ctx.Attr("force_fp32_output"); // BF16 does not support force output @@ -407,14 +407,11 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { handler.template AcquireWeightHMemory(weight_h); } else if (framework::TransToProtoVarType(weight_h->dtype()) == paddle::framework::proto::VarType_Type_BF16) { - h0_memory_p = - handler.template AcquireH0Memory(h0); + h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = - handler.template AcquireWeightXMemory( - weight_x); + handler.template AcquireWeightXMemory(weight_x); weight_h_memory_p = - handler.template AcquireWeightHMemory( - weight_h); + handler.template AcquireWeightHMemory(weight_h); } else { h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = @@ -478,4 +475,4 @@ PD_REGISTER_STRUCT_KERNEL(fusion_lstm, ops::FusionLSTMMKLDNNKernel, float, uint8_t, - paddle::platform::bfloat16) {} + phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h similarity index 100% rename from paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h rename to paddle/fluid/operators/fused/onednn/fusion_rnn_onednn.h diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc similarity index 97% rename from paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc rename to paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc index 8e11c91a117d1..c9545876a0dc6 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/onednn/multi_gru_onednn_op.cc @@ -69,29 +69,29 @@ class MultiGRUHandler { PADDLE_ENFORCE_EQ( weights_x_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of WeightX inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of WeightX inputs does " + "not match the number of layers.")); PADDLE_ENFORCE_EQ( weights_h_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of WeightH inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of WeightH inputs does " + "not match the number of layers.")); if (!biases_.empty()) PADDLE_ENFORCE_EQ( biases_.size(), layers_ * 2, - platform::errors::InvalidArgument("The number of Bias inputs does " - "not match the number of layers.")); + phi::errors::InvalidArgument("The number of Bias inputs does " + "not match the number of layers.")); // oneDNN kernel has hardcoded activation functions PADDLE_ENFORCE_EQ( ctx.Attr("gate_activation"), "sigmoid", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_gru supports only sigmoid as a gate activation.")); PADDLE_ENFORCE_EQ( ctx.Attr("activation"), "tanh", - platform::errors::Unimplemented( + phi::errors::Unimplemented( "oneDNN fusion_gru supports only tanh as an activation.")); N_ = x_lod_.size() - 1; // Number of sentences (batches) @@ -134,7 +134,7 @@ class MultiGRUHandler { PADDLE_ENFORCE_EQ( scale_weights.size(), layers_ * 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of weight scale inputs does " "not match the number of layers. Expected: %d. Actual: %d", layers_ * 2, @@ -212,7 +212,7 @@ class MultiGRUHandler { attrs_[2 * layer + (dir == R2L)]); PADDLE_ENFORCE_NOT_NULL( pd, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Primitive descriptor for gru_forward cannot be null.")); dev_ctx_.SetBlob(pd_key, pd); } diff --git a/paddle/fluid/operators/fused/quant_dequant_kernel.h b/paddle/fluid/operators/fused/quant_dequant_kernel.h index 8e8fdc95e91b5..63dbee42d6e7a 100644 --- a/paddle/fluid/operators/fused/quant_dequant_kernel.h +++ b/paddle/fluid/operators/fused/quant_dequant_kernel.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/operators/fake_quantize_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc index 58125a9b7f674..37315367189fa 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op.cc @@ -112,29 +112,29 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { // make sure Mean/RunningMean and Var/RunningVar share memory PADDLE_ENFORCE_EQ(ctx->Inputs("Mean1")[0], ctx->Outputs("Mean1Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean1 and Mean1Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var1")[0], ctx->Outputs("Var1Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var1 and Var1Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Mean2")[0], ctx->Outputs("Mean2Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean2 and Mean2Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var2")[0], ctx->Outputs("Var2Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var2 and Var2Out should share the same memory")); if (has_shortcut) { PADDLE_ENFORCE_EQ(ctx->Inputs("Mean3")[0], ctx->Outputs("Mean3Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Mean3 and Mean3Out should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("Var3")[0], ctx->Outputs("Var3Out")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Var3 and Var3Out should share the same memory")); } @@ -143,10 +143,10 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( data_format, "NCHW", - platform::errors::InvalidArgument("The data format must equal to NCHW. " - "But received: the data format " - "= [%s]", - data_format)); + phi::errors::InvalidArgument("The data format must equal to NCHW. " + "But received: the data format " + "= [%s]", + data_format)); int stride1 = ctx->Attrs().Get("stride1"); int stride2 = ctx->Attrs().Get("stride2"); int padding1 = ctx->Attrs().Get("padding1"); @@ -158,13 +158,13 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x1_dims.size(), 4, - platform::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x1_dims, - x1_dims.size())); + phi::errors::InvalidArgument("The dimensions of input " + "must equal to 4." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x1_dims, + x1_dims.size())); // Calculate the dims of output1 int batch = x1_dims[0]; @@ -226,26 +226,26 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { // By default, the type of the scale, bias, mean, // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale1")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias1")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale2")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias2")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale1")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias1")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale2")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias2")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -546,8 +546,7 @@ class ResNetBasicBlockGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const { PADDLE_ENFORCE_NOT_NULL( ctx.InputVar(framework::GradVarName("Y")), - platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 16e2261f1afb5..50a3b3c46137d 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -298,10 +298,9 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); // input const phi::DenseTensor* x = ctx.Input("X"); @@ -704,10 +703,9 @@ class ResNetBasicBlockGradXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(ctx.GetPlace()), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); const phi::DenseTensor* y_grad = ctx.Input(framework::GradVarName("Y")); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 5827cd3427dee..d4e9b3f8e4525 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -101,22 +101,22 @@ class ResNetUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->Inputs("MeanX")[0], ctx->Outputs("RunningMeanX")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "MeanX and RunningMeanX should share the same memory")); PADDLE_ENFORCE_EQ(ctx->Inputs("VarX")[0], ctx->Outputs("RunningVarX")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "VarX and RunningVarX should share the same memory")); if (has_shortcut) { PADDLE_ENFORCE_EQ( ctx->Inputs("MeanZ")[0], ctx->Outputs("RunningMeanZ")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "MeanZ and RunningMeanZ should share the same memory")); PADDLE_ENFORCE_EQ( ctx->Inputs("VarZ")[0], ctx->Outputs("RunningVarZ")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "VarZ and RunningVarZ should share the same memory")); } @@ -132,25 +132,25 @@ class ResNetUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4, - platform::errors::InvalidArgument("The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, - x_dims.size())); - PADDLE_ENFORCE_EQ(w_dims.size(), - 4, - platform::errors::InvalidArgument( - "The dimensions of filter " - "must equal to 4." - "But received: the shape of filter " - "= [%s], the dimension of filter = [%d] ", - w_dims, - w_dims.size())); + phi::errors::InvalidArgument("The dimensions of input " + "must equal to 4." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, + x_dims.size())); + PADDLE_ENFORCE_EQ( + w_dims.size(), + 4, + phi::errors::InvalidArgument("The dimensions of filter " + "must equal to 4." + "But received: the shape of filter " + "= [%s], the dimension of filter = [%d] ", + w_dims, + w_dims.size())); PADDLE_ENFORCE_EQ(bn_param_dims.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of bn param " "must equal to 4." "But received: the shape of bn param " @@ -208,16 +208,16 @@ class ResNetUnitOp : public framework::OperatorWithKernel { // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("ScaleX")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, - framework::TransToProtoVarType( - ctx.Input("BiasX")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("ScaleX")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType( + ctx.Input("BiasX")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -394,8 +394,7 @@ class ResNetUnitGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL( ctx.InputVar(framework::GradVarName("Y")), - platform::errors::NotFound( - "Can not find Y@GRAD in the execution context.")); + phi::errors::NotFound("Can not find Y@GRAD in the execution context.")); return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu index 5b126008bf654..6afe03a67ceab 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ b/paddle/fluid/operators/fused/resnet_unit_op.cu @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h" #include "paddle/fluid/operators/fused/cudnn_norm_conv.cu.h" #include "paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -30,10 +30,10 @@ class ResNetUnitKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + phi::errors::PreconditionNotMet("It must use CUDAPlace.")); PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, CUDNN_DATA_HALF, - platform::errors::Unavailable( + phi::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); // input x @@ -230,10 +230,10 @@ class ResNetUnitGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + phi::errors::PreconditionNotMet("It must use CUDAPlace.")); PADDLE_ENFORCE_EQ(platform::CudnnDataType::type, CUDNN_DATA_HALF, - platform::errors::Unavailable( + phi::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); const phi::DenseTensor *y_grad = @@ -420,10 +420,10 @@ class ResNetUnitGradKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; PD_REGISTER_STRUCT_KERNEL( - resnet_unit, GPU, ALL_LAYOUT, ops::ResNetUnitKernel, plat::float16) {} + resnet_unit, GPU, ALL_LAYOUT, ops::ResNetUnitKernel, phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, GPU, ALL_LAYOUT, ops::ResNetUnitGradKernel, - plat::float16) {} + phi::dtype::float16) {} #endif diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc index c00e58f8463ab..f50d452d6c285 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -26,10 +26,9 @@ class ResNetUnitXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(place), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); // input x @@ -188,10 +187,9 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(place), - true, - platform::errors::PreconditionNotMet("It must use XPUPlace.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), + true, + phi::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); const phi::DenseTensor *y_grad = @@ -365,11 +363,11 @@ PD_REGISTER_STRUCT_KERNEL(resnet_unit, XPU, ALL_LAYOUT, ops::ResNetUnitXPUKernel, - plat::float16, + phi::dtype::float16, float) {} PD_REGISTER_STRUCT_KERNEL(resnet_unit_grad, XPU, ALL_LAYOUT, ops::ResNetUnitGradXPUKernel, - plat::float16, + phi::dtype::float16, float) {} diff --git a/paddle/fluid/operators/fused/unity_build_rule.cmake b/paddle/fluid/operators/fused/unity_build_rule.cmake index 8605cd3cdae85..9ef1e53891d52 100644 --- a/paddle/fluid/operators/fused/unity_build_rule.cmake +++ b/paddle/fluid/operators/fused/unity_build_rule.cmake @@ -10,11 +10,7 @@ register_unity_group( fused_embedding_fc_lstm_op.cc fused_embedding_seq_pool_op.cc fusion_lstm_op.cc - fusion_repeated_fc_relu_op.cc - fusion_seqconv_eltadd_relu_op.cc - fusion_seqexpand_concat_fc_op.cc fusion_seqpool_concat_op.cc - fusion_squared_mat_sub_op.cc multi_gru_op.cc - mkldnn/multi_gru_mkldnn_op.cc + onednn/multi_gru_onednn_op.cc fusion_seqpool_cvm_concat_op.cc) diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc index 12c8ec9b81db1..851c448865363 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc @@ -30,9 +30,9 @@ class SoftmaxMaskFuseUpperTriangleOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 4, - platform::errors::InvalidArgument("Input x must be in 4D dimension but " - "received the dimension of X is %d", - x_dims.size())); + phi::errors::InvalidArgument("Input x must be in 4D dimension but " + "received the dimension of X is %d", + x_dims.size())); ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", "Out"); diff --git a/paddle/fluid/operators/fused_token_prune_op.cc b/paddle/fluid/operators/fused_token_prune_op.cc index 9fab5c8e7c48d..144e91be396de 100644 --- a/paddle/fluid/operators/fused_token_prune_op.cc +++ b/paddle/fluid/operators/fused_token_prune_op.cc @@ -107,59 +107,59 @@ class FusedTokenPruneOp : public framework::OperatorWithKernel { auto new_mask_dim = ctx->GetInputDim("NewMask"); // check input dims number - PADDLE_ENFORCE_EQ(mask_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input mask must be 4-dimension")); - PADDLE_ENFORCE_EQ(attn_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input attn must be 4-dimension")); + PADDLE_ENFORCE_EQ( + mask_dim.size(), + 4, + phi::errors::InvalidArgument("The input mask must be 4-dimension")); + PADDLE_ENFORCE_EQ( + attn_dim.size(), + 4, + phi::errors::InvalidArgument("The input attn must be 4-dimension")); PADDLE_ENFORCE_EQ( x_dim.size(), 3, - platform::errors::InvalidArgument("The input x must be 4-dimension")); - PADDLE_ENFORCE_EQ(new_mask_dim.size(), - 4, - platform::errors::InvalidArgument( - "The input attn must be 4-dimension")); + phi::errors::InvalidArgument("The input x must be 4-dimension")); + PADDLE_ENFORCE_EQ( + new_mask_dim.size(), + 4, + phi::errors::InvalidArgument("The input attn must be 4-dimension")); // check input dims relations PADDLE_ENFORCE_EQ(mask_dim[0], attn_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of mask and attn should be the same" "which is batch size")); PADDLE_ENFORCE_EQ(mask_dim[1], attn_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dim of mask and attn should be the same" "which is nb_head")); PADDLE_ENFORCE_EQ(mask_dim[0], x_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dim of mask and x should be the same" "which is batch size")); PADDLE_ENFORCE_EQ( mask_dim[2], mask_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim and the fourth dim of mask should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ( attn_dim[2], attn_dim[3], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim and the fourth dim of mask should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ(attn_dim[2], mask_dim[2], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of mask and attn should be the same" "which is max seq len")); PADDLE_ENFORCE_EQ(attn_dim[2], x_dim[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The third dim of mask and the second dim of attn" "should be the same which is max seq len")); diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index a0f4f7a7e22fa..d9276c5eb9d62 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -105,8 +105,8 @@ phi::KernelKey GetConcatExpectedKernelType( op_ptr->SetDnnFallback(true); } if (flag == 0) { - PADDLE_THROW(platform::errors::InvalidArgument( - "All Inputs of Concat OP are Empty!")); + PADDLE_THROW( + phi::errors::InvalidArgument("All Inputs of Concat OP are Empty!")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); } @@ -128,7 +128,7 @@ phi::KernelKey GetReduceExpectedKernelType( platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU or NPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -236,7 +236,7 @@ phi::KernelKey GetSoftmaxExpectedKernelType( platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU/XPU and custom place")); } return phi::KernelKey( @@ -255,7 +255,7 @@ phi::KernelKey GetSoftmaxGradExpectedKernelType( if (!(platform::is_gpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()))) - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "float16 can only be used on GPU/XPU and custom place")); } return phi::KernelKey( @@ -275,7 +275,7 @@ phi::KernelKey GetStridedSliceExpectedKernelType( platform::is_same_place(tensor.place(), ctx.device_context().GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Place of context is %s. Place of input tensor is %s. They " "are should be same, but reveived different place.", string::to_string(ctx.device_context().GetPlace()), @@ -375,18 +375,18 @@ phi::KernelKey GetInstanceNormExpectedKernelType( in_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(in_param_type, - framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + phi::errors::InvalidArgument("Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(in_param_type, - framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + phi::errors::InvalidArgument("Bias input should be of float type")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -423,7 +423,7 @@ phi::KernelKey GetConvExpectedKernelType( PADDLE_ENFORCE_EQ( input_data_type, filter_data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input and filter data type should be consistent, " "but received input data type is %s and filter type " "is %s", diff --git a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 index 068704e6d0687..c42032a45cdcd 100644 --- a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 +++ b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 @@ -811,7 +811,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO {% if "tensor_name" in attr_dict[attrs[i]] %} auto {{'tensor_' + attrs[i]}} = this->GetOptionalSingleForwardInput("{{attr_dict[attrs[i]]['tensor_name']}}"); if ({{'tensor_' + attrs[i]}}) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic tensor attribute {{attr_dict[attrs[i]]['tensor_name']}} for {{op_name}} composite" "for now. ")); } @@ -819,7 +819,7 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradO {% if "tensors_name" in attr_dict[attrs[i]] %} auto {{'tensors_' + attrs[i]}} = this->GetOptionalMultiForwardInput("{{attr_dict[attrs[i]]['tensors_name']}}"); if ({{'tensors_' + attrs[i]}}) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic tensors attribute {{attr_dict[attrs[i]]['tensor_name']}} for {{op_name}} composite " "for now. ")); } diff --git a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc index 8ae92b04b7df4..d11b445f3a9b8 100644 --- a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc +++ b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc @@ -30,7 +30,7 @@ class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("X").front(), framework::proto::VarType::SELECTED_ROWS, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input X(%s)'s type should be SelectedRows, " "but the received is %s", ctx->Inputs("X").front(), @@ -38,7 +38,7 @@ class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetOutputsVarType("Out").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output Out(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx->Outputs("Out").front(), diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index c88d36602bd79..6fdd6d380a7fe 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -38,7 +38,7 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); @@ -87,7 +87,7 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index e23b3c6c42d5f..c948315189a15 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -52,7 +52,7 @@ class GRUOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUOp, but received %d " "(Input) vs %d (frame_size).", @@ -62,7 +62,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_dims[1], frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_dims[0], @@ -74,7 +74,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( h0_dims[1], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Input(H0) must be equal to frame_size, but " "received %d (width of H0) vs %d (frame_size).", h0_dims[1], @@ -87,7 +87,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -96,7 +96,7 @@ class GRUOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -233,7 +233,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 times of " "frame_size in GRUOp, but received %d (Input) vs %d (frame_size).", input_size, @@ -241,7 +241,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_height, @@ -251,7 +251,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3], but received [%d, %d] (Weight) vs [%d, %d] (frame_size).", weight_height, @@ -263,7 +263,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( h0_dims[1], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Input(H0) must be equal to frame_size, but " "received %d (width of H0) vs %d (frame_size).", h0_dims[1], @@ -279,7 +279,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -288,7 +288,7 @@ class GRUGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -406,7 +406,7 @@ class GRUCPUKernel : public framework::OpKernel { frame_size /*height of height*/); PADDLE_ENFORCE_NOT_NULL( packed_gate, - platform::errors::NotFound( + phi::errors::NotFound( "The calculation result of packed_gate by " "GEMM_ALLOC should not be null when using MKL.")); blas.GEMM_PACK(CblasBMatrix, @@ -424,7 +424,7 @@ class GRUCPUKernel : public framework::OpKernel { frame_size /*height of height*/); PADDLE_ENFORCE_NOT_NULL( packed_state, - platform::errors::NotFound( + phi::errors::NotFound( "The calculation result of packed_state by " "GEMM_ALLOC should not be null when using MKL.")); blas.GEMM_PACK(CblasBMatrix, diff --git a/paddle/fluid/operators/gru_unit_op.cc b/paddle/fluid/operators/gru_unit_op.cc index b217d58e6d5da..5a29abda1f369 100644 --- a/paddle/fluid/operators/gru_unit_op.cc +++ b/paddle/fluid/operators/gru_unit_op.cc @@ -45,7 +45,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { if (ctx->IsRuntime() || input_size >= 0) { PADDLE_ENFORCE_EQ(input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUUnitOp, but received %d " "(Input) vs %d (frame_size).", @@ -55,7 +55,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -66,7 +66,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -82,7 +82,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -91,7 +91,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -203,7 +203,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_size, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Input) must be 3 " "times of frame_size in GRUUnitGradOp, but received %d " "(Input) vs %d (frame_size).", @@ -213,7 +213,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_height, frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitGradOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -224,7 +224,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( weight_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Weight) matrix must be [frame_size, frame_size " "* 3] in GRUUnitGradOp, but received [%d, %d] (Weight) vs [%d, %d] " "(frame_size).", @@ -240,7 +240,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_height, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, @@ -249,7 +249,7 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( bias_width, frame_size * 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Bias must be [1, frame_size * 3], but received " "[%d, %d] (Bias) vs [1, %d] (frame_size * 3).", bias_height, diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 933176433e2d7..fa774e2bef3c2 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -46,7 +46,7 @@ class GRUUnitKernel : public framework::OpKernel { else ReluCUDAFunctor()(d, x, y); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported activation type, only supports identity, sigmoid, tanh " "and relu.")); } @@ -169,7 +169,7 @@ class GRUUnitGradKernel : public framework::OpKernel { else if (act_type == relu) ReluGradFunctor()(d, x, y, dy, dx); else - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported activation type, only supports identity, sigmoid, tanh " "and relu.")); } diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index 03887561934b7..002a98f3538e4 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -46,7 +46,7 @@ class HashOp : public framework::OperatorWithKernel { auto dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(dims.size(), 2UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of hash_op's dimensions must be 2")); std::vector out_dims; int num_hash = ctx->Attrs().Get("num_hash"); diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index dea3ce3fe695b..dcf16cf104cc8 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -35,7 +35,7 @@ class HingeLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( pred_dims, label_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(input) and Input(label) should have the same " "shape, but received input shape [%s] != label shape [%s]", pred_dims, @@ -44,13 +44,13 @@ class HingeLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( pred_dims.size(), 2, - platform::errors::InvalidArgument("Input(input) rank should be 2, " - "but received input rank(%d) != 2", - pred_dims.size())); + phi::errors::InvalidArgument("Input(input) rank should be 2, " + "but received input rank(%d) != 2", + pred_dims.size())); PADDLE_ENFORCE_EQ(pred_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(input) should be 1, " "as each row of input contains a real value, " "but received second dimension of input (%d) != 1", @@ -112,7 +112,7 @@ class HingeLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(loss_grad_dims, pred_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of loss gradient should be the same as " "the shape of Input(input), but received the loss " "gradient shape [%s] != input shape [%s]", diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index 0486dd12c4519..d11734c1a6c99 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -27,19 +27,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "The input 'X' of Im2SequenceOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("The input 'X' of Im2SequenceOp is not found.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output 'Out' of Im2SequenceOp is not found.")); auto in_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(in_dim.size(), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions size of input 'X' in Im2SequenceOp " "should be 4. But " "received dimensions size=[%d], dimensions=[%s].", @@ -159,13 +159,13 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The input 'X' of Im2SequenceGradOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::NotFound( - "The input %s of Im2SequenceGradOp is not found.", - framework::GradVarName("Out"))); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::NotFound("The input %s of Im2SequenceGradOp is not found.", + framework::GradVarName("Out"))); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } }; diff --git a/paddle/fluid/operators/index_impl.cu.h b/paddle/fluid/operators/index_impl.cu.h index 629717f61933a..364eb9d574036 100644 --- a/paddle/fluid/operators/index_impl.cu.h +++ b/paddle/fluid/operators/index_impl.cu.h @@ -89,7 +89,7 @@ void IndexKernel(const KPDevice &dev_ctx, Tensor *out, Functor func) { <<>>(out_data, numel, main_offset, func); break; default: { - PADDLE_THROW(paddle::platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unsupported vectorized size: %d !", vec_size)); break; } diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index c06885633f348..8ac3fee1d0452 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -59,7 +59,7 @@ void IndexSelectInner(const framework::ExecutionContext& context, PADDLE_ENFORCE_GE( index_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (index) of OP(index_select) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -68,7 +68,7 @@ void IndexSelectInner(const framework::ExecutionContext& context, PADDLE_ENFORCE_LT( index_data[i], input_dim[dim], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (index) of OP(index_select) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 4d7730d687d8d..21cd6ad3e084a 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -31,7 +31,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ("linear", interp_method, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"linear\" when" "Input(X) dimension is 3, but got method = %s .", interp_method)); @@ -44,7 +44,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'size of Op(interpolate) must be 1. " "Attr(out_shape)'s length must be 1 for 3-D input tensor, but got " "size = %d .", @@ -67,7 +67,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got dimension = %d .", scale_tensor.size())); out_w = -1; @@ -90,13 +90,13 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension size must be 1, but got dimension = %d .", out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's 0-th dimension's value must be 1, but got value = %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -119,7 +119,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || "bicubic" == interp_method, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"bilinear\" " "or \"nearest\" or \"bicubic\" when " "Input(X) dimension is 4, but got method is %s.", @@ -133,7 +133,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'size of Op(interpolate) must be 2. " "Attr(out_shape)'s length must be 2 for 4-D input " "tensor, but got size = %d .", @@ -157,7 +157,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got dimension = %d .", scale_tensor.size())); out_h = -1; @@ -186,13 +186,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument("OutSize's dimension size must be 1, " - "but got dimension size is %d .", - out_size_dim.size())); + phi::errors::InvalidArgument("OutSize's dimension size must be 1, " + "but got dimension size is %d .", + out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension[0] must be 2, but got dimension[0] is %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -215,7 +215,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( "trilinear", interp_method, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Interpolation method can only be \"trilinear\" when Input(X) " "dimension is 5, but got method = %s .", interp_method)); @@ -228,7 +228,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( inputs_name.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(SizeTensor)'s size of Op(interpolate) must be 3. " "Attr(out_shape)'s length must be 3 for 5-D input " "tensor, but got size = %d .", @@ -253,7 +253,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( scale_tensor.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Scale's dimension size must be 1, but got size = %d .", scale_tensor.size())); out_d = -1; @@ -288,12 +288,12 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dimension size must be 1, but got size is %d.", out_size_dim.size())); PADDLE_ENFORCE_EQ(out_size_dim[0], 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "OutSize's dim[0] must be 3, but got size is %d.", out_size_dim[0])); ctx->ShareLoD("X", "Out"); @@ -321,7 +321,7 @@ class InterpolateOp : public framework::OperatorWithKernel { auto dim_x = ctx->GetInputDim("X"); // NCHW format PADDLE_ENFORCE( dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5, - platform::errors::Unimplemented( + phi::errors::Unimplemented( "Input(X) dimension must be 3, 4 or 5, but got dimension = %d .", dim_x.size())); if (dim_x.size() == 3) { diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index bfbb15b076448..8a71b6d96a055 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -953,7 +953,7 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -1049,12 +1049,12 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); @@ -1205,17 +1205,17 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_d, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_d in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); @@ -1648,7 +1648,7 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::NotFound("This kernel only runs on GPU device.")); + phi::errors::NotFound("This kernel only runs on GPU device.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); @@ -1670,7 +1670,7 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::NotFound("This kernel only runs on GPU device.")); + phi::errors::NotFound("This kernel only runs on GPU device.")); auto* input_grad = ctx.Output(framework::GradVarName("X")); auto* output_grad = diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 563879e301d12..793b5fa629ee1 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -37,7 +37,7 @@ inline std::vector get_new_shape( auto tensor = list_new_shape_tensor[i]; PADDLE_ENFORCE_EQ(tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of dimension tensor should be [1]," "but received d%.", tensor->dims())); @@ -890,7 +890,7 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -969,12 +969,12 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); framework::DDim dim_out; @@ -1090,17 +1090,17 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, } PADDLE_ENFORCE_GT(out_d, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_d in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_h, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_h in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); PADDLE_ENFORCE_GT(out_w, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "out_w in Attr(out_shape) of Op(interpolate) " "should be greater than 0.")); diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index 710cdaeb707b6..39a2b31fa6925 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -61,7 +61,7 @@ class OverflowOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( true, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input type mismatch, the type of Input(X) must be Tensor or " "SelectedRows, please check your input.")); } diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu index 300229cbeca66..71aaa66a5ad0d 100755 --- a/paddle/fluid/operators/isfinite_op.cu +++ b/paddle/fluid/operators/isfinite_op.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/operators/isfinite_op.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -23,7 +23,9 @@ REGISTER_OP_CUDA_KERNEL( ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel, + ops::OverflowKernel, ops::OverflowKernel); REGISTER_OP_CUDA_KERNEL( @@ -31,5 +33,5 @@ REGISTER_OP_CUDA_KERNEL( ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel, + ops::OverflowKernel, ops::OverflowKernel); diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h index 5352ccc99df92..0eb6243a31873 100644 --- a/paddle/fluid/operators/isfinite_op.h +++ b/paddle/fluid/operators/isfinite_op.h @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/common/transform.h" #include "paddle/phi/kernels/isfinite_kernel.h" #include "paddle/phi/kernels/reduce_all_kernel.h" @@ -89,7 +89,7 @@ inline void TensorContainsNAN(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } inline void TensorContainsInf(const phi::DenseTensor& tensor, phi::DenseTensor* out) { @@ -106,7 +106,7 @@ inline void TensorContainsInf(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } inline void TensorIsfinite(const phi::DenseTensor& tensor, phi::DenseTensor* out) { @@ -123,7 +123,7 @@ inline void TensorIsfinite(const phi::DenseTensor& tensor, return; } #endif - PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); + PADDLE_THROW(phi::errors::Unimplemented("Not supported on %s.", place)); } // copy the result bool to cpu @@ -173,7 +173,7 @@ class OverflowKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(true, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input type mismatch, the type of Input(X) " "must be phi::DenseTensor or " "SelectedRows, please check your input.")); diff --git a/paddle/fluid/operators/l1_norm_op.h b/paddle/fluid/operators/l1_norm_op.h index c268a6c51fbc5..3cfcf1959a387 100644 --- a/paddle/fluid/operators/l1_norm_op.h +++ b/paddle/fluid/operators/l1_norm_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" namespace paddle { namespace operators { @@ -34,7 +34,8 @@ class L1NormKernel : public framework::OpKernel { auto &place = *context.template device_context().eigen_device(); - EigenL1Norm, T>::Eval(place, out, x); + phi::funcs::EigenL1Norm, T>::Eval( + place, out, x); } }; @@ -49,7 +50,7 @@ class L1NormGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( d_out->numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(GRAD@Out) of L1NormGradOP should be a scalar.")); phi::DenseTensor *dx = context.Output(framework::GradVarName("X")); @@ -62,7 +63,7 @@ class L1NormGradKernel : public framework::OpKernel { *context.template device_context().eigen_device(); Eigen::DSizes x_dsize(x->numel()); - EigenL1NormGrad, T>::Eval( + phi::funcs::EigenL1NormGrad, T>::Eval( place, dx_eigen, d_out_eigen, x_eigen, x_dsize); } }; diff --git a/paddle/fluid/operators/limit_by_capacity_op.cc b/paddle/fluid/operators/limit_by_capacity_op.cc index 387e30ae647c9..77c29a4cef9f1 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cc +++ b/paddle/fluid/operators/limit_by_capacity_op.cc @@ -52,14 +52,14 @@ class LimitByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( expert_count_dtype, capacity_dtype, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the expert_count and capacity should be same")); PADDLE_ENFORCE_EQ( expert_count_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument("The dtype of the expert_count and " - "capacity should be same as int64")); + phi::errors::InvalidArgument("The dtype of the expert_count and " + "capacity should be same as int64")); return phi::KernelKey(expert_count_dtype, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc deleted file mode 100644 index e017e43d7db2d..0000000000000 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ /dev/null @@ -1,410 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/linear_chain_crf_op.h" - -#include - -namespace paddle { -namespace operators { - -class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Emission", - "(phi::DenseTensor). When a phi::DenseTensor " - "input,A 2-D phi::DenseTensor" - " with shape [N x D], where N is the size of the " - "mini-batch and D is the total tag number. The unscaled emission " - "weight matrix for the linear chain CRF. When a Tensor input," - "A Tensor with shape [N x S x D], where N is batch number," - "S is max length of sequences, D is the total tag number." - "A phi::DenseTensor with type float32, float64."); - AddInput("Transition", - "(Tensor, default Tensor) A 2-D Tensor with shape " - "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " - "operator. See more details in the operator's comments."); - AddInput("Label", - "(phi::DenseTensor), when a phi::DenseTensor input, " - "[N x 1], where N is the total element number in a mini-batch. " - "when a Tensor input, [N x S], where N is batch number. " - "S is max length of sequences. The ground truth." - "A phi::DenseTensor with int64."); - AddInput("Length", - "(Tensor, default Tensor) A Tensor with shape " - "[M x 1], where M is the sequence number in a mini-batch." - "A Tensor with type int64.") - .AsDispensable(); - AddOutput( - "Alpha", - "(Tensor, default Tensor), the same shape with Emission. " - "The forward vectors for the entire batch. Denote it as $\alpha$. " - "$\alpha$ is a memo table used to calculate the normalization " - "factor in CRF. $\alpha[k, v]$ stores the unnormalized " - "probabilities of all possible unfinished sequences of tags that end " - "at position $k$ with tag $v$. For each $k$, " - "$\alpha[k, v]$ is a vector of length $D$ with a component for " - "each tag value $v$. This vector is called a forward vector and " - "will also be used in backward computations.") - .AsIntermediate(); - AddOutput( - "EmissionExps", - "(Tensor, default Tensor), the same shape with Emission. " - "The exponentials of Input(Emission). This is an intermediate " - "computational result in forward computation, and will be reused in " - "backward computation." - "A phi::DenseTensor with type float32, float64.") - .AsIntermediate(); - AddOutput( - "TransitionExps", - "(Tensor, default Tensor) A 2-D Tensor with shape " - "[(D + 2) x D]. The exponentials of Input(Transition). This is an " - "intermediate computational result in forward computation, and " - "will be reused in backward computation." - "A phi::DenseTensor with type float32, float64.") - .AsIntermediate(); - AddOutput( - "LogLikelihood", - "(Tensor, default Tensor) The logarithm of the conditional " - "likelihood of each training sample in a mini-batch. This is a 2-D " - "tensor with shape [S x 1], where S is the sequence number in a " - "mini-batch. Note: S is equal to the sequence number in a mini-batch. " - "A Tensor with type float32, float64."); - AddComment(R"DOC( -Conditional Random Field defines an undirected probabilistic graph with nodes -denoting random variables and edges denoting dependencies between these -variables. CRF learns the conditional probability $P(Y|X)$, where -$X = (x_1, x_2, ... , x_n)$ are structured inputs and -$Y = (y_1, y_2, ... , y_n)$ are labels for the inputs. - -Linear chain CRF is a special case of CRF that is useful for sequence labeling -task. Sequence labeling tasks do not assume a lot of conditional -independences among inputs. The only constraint they impose is that the input -and output must be linear sequences. Thus, the graph of such a CRF is a simple -chain or a line, which results in the linear chain CRF. - -This operator implements the Forward-Backward algorithm for the linear chain -CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and -http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. - -Equation: - -1. Denote Input(Emission) to this operator as $x$ here. -2. The first D values of Input(Transition) to this operator are for starting -weights, denoted as $a$ here. -3. The next D values of Input(Transition) of this operator are for ending -weights, denoted as $b$ here. -4. The remaining values of Input(Transition) are for transition weights, -denoted as $w$ here. -5. Denote Input(Label) as $s$ here. - -The probability of a sequence $s$ of length $L$ is defined as: -$$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} - + \sum_{l=1}^L x_{s_l} - + \sum_{l=2}^L w_{s_{l-1},s_l})$$ - -where $Z$ is a normalization value so that the sum of $P(s)$ over -all possible sequences is 1, and $x$ is the emission feature weight -to the linear chain CRF. - -Finally, the linear chain CRF operator outputs the logarithm of the conditional -likelihood of each training sample in a mini-batch. - -NOTE: - -1. The feature function for a CRF is made up of the emission features and the -transition features. The emission feature weights are NOT computed in -this operator. They MUST be computed first before this operator is called. - -2. Because this operator performs global normalization over all possible -sequences internally, it expects UNSCALED emission feature weights. -Please do not call this op with the emission feature being output of any -nonlinear activation. - -3. The 2nd dimension of Input(Emission) MUST be equal to the tag number. - -)DOC"); - } -}; - -class LinearChainCRFOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("Emission"), "Input", "Emission", "LinearChainCRF"); - OP_INOUT_CHECK( - ctx->HasInput("Transition"), "Input", "Transition", "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "LinearChainCRF"); - - OP_INOUT_CHECK( - ctx->HasOutput("Alpha"), "Output", "Alpha", "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("EmissionExps"), - "Output", - "EmissionExps", - "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("TransitionExps"), - "Output", - "TransitionExps", - "LinearChainCRF"); - OP_INOUT_CHECK(ctx->HasOutput("LogLikelihood"), - "Output", - "LogLikelihood", - "LinearChainCRF"); - - auto transition_dims = ctx->GetInputDim("Transition"); - PADDLE_ENFORCE_EQ(transition_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "The Input(Transition) should be a 2-D tensor. But " - "received: input rank %u, input shape [%s].", - transition_dims.size(), - transition_dims)); - bool check = true; - if ((!ctx->IsRuntime()) && - (transition_dims[0] <= 0 || transition_dims[1] <= 0)) { - check = false; - } - if (check) { - PADDLE_ENFORCE_EQ( - transition_dims[0] - 2, - transition_dims[1], - platform::errors::InvalidArgument( - "An invalid dimension for the Input(Transition), which should " - "be a 2-D tensor with shape [(D + 2) x D]. But received: input " - "rank %u, " - "input shape [%s].", - transition_dims.size(), - transition_dims)); - } - auto emission_dims = ctx->GetInputDim("Emission"); - if (ctx->HasInput("Length")) { - PADDLE_ENFORCE_EQ(emission_dims.size(), - 3, - platform::errors::InvalidArgument( - "The Input(Emission) should be a 3-D tensor. But " - "received: input rank %u, input shape [%s].", - emission_dims.size(), - emission_dims)); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ( - (label_dims.size() == 3UL && label_dims[2] == 1) || - (label_dims.size() == 2UL), - true, - platform::errors::InvalidArgument( - "The Input(Label) should be a 3-D tensor with last dimension " - "fixed to 1 or a 2-D tensor in padding mode. But received: input " - "rank %u, input shape [%s].", - label_dims.size(), - label_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(emission_dims[0], - label_dims[0], - platform::errors::InvalidArgument( - "The batch size of Input(Emission) " - "and Input(Label) should be the same. But " - "received Input(Emission): " - "rank %u, shape [%s]; received Input(Label): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - PADDLE_ENFORCE_EQ(emission_dims[1], - label_dims[1], - platform::errors::InvalidArgument( - "The max length of Input(Emission) " - "and Input(Label) should be the same. But " - "received Input(Emission): " - "rank %u, shape [%s]; received Input(Label): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - } - } else { - PADDLE_ENFORCE_EQ( - emission_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Input(Emission) should be a 2-D tensor. But received: " - "input rank %u, input shape [%s].", - emission_dims.size(), - emission_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(emission_dims[1], - transition_dims[1], - platform::errors::InvalidArgument( - "The 2nd dimension of the Input(Emission) and " - "the Input(Transition) " - "should be equal to the tag number. But received " - "Input(Emission): rank " - "%u, shape [%s]; received Input(Transition): " - "rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - transition_dims.size(), - transition_dims)); - } - - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ( - label_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Input(Label) should be a 2-D tensor with the 2nd " - "dimensions fixed to 1. But received: input rank %u, " - "input shape [%s].", - label_dims.size(), - label_dims)); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - emission_dims[0], - label_dims[0], - platform::errors::InvalidArgument( - "The first dimension of Input(Emission) and Input(Label) " - "should be the same. But received Input(Emission): rank %u, " - "shape " - "[%s]; received Input(Label): rank %u, shape [%s].", - emission_dims.size(), - emission_dims, - label_dims.size(), - label_dims)); - } - } - ctx->SetOutputDim("Alpha", emission_dims); - ctx->SetOutputDim("EmissionExps", emission_dims); - ctx->SetOutputDim("TransitionExps", transition_dims); - // TODO(caoying) This is tricky. The 1st dimension of Output(LogLikelihood) - // is the sequence number in a mini-batch. The dimension set here should be - // resized to its correct size in the function Compute. Fix this once we can - // get LoD information in the InferShape interface. - ctx->SetOutputDim("LogLikelihood", {emission_dims[0], 1}); - } - - protected: - // Explicitly set that the data type of computation kernel of linear_chain_crf - // is determined by its input "Emission". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Emission"), - platform::CPUPlace()); - } -}; - -class LinearChainCRFGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("EmissionExps"), - "Input", - "EmissionExps", - "LinearChainCRFGrad"); - OP_INOUT_CHECK(ctx->HasInput("TransitionExps"), - "Input", - "TransitionExps", - "LinearChainCRFGrad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("LogLikelihood")), - "Input", - framework::GradVarName("LogLikelihood"), - "LinearChainCRFGrad"); - - auto transition_exps_dims = ctx->GetInputDim("TransitionExps"); - auto emission_exps_dims = ctx->GetInputDim("EmissionExps"); - if (ctx->HasOutput(framework::GradVarName("Emission"))) { - ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims); - if (ctx->HasInput("Length") == false) { - ctx->ShareLoD("Emission", framework::GradVarName("Emission")); - } - } - - if (ctx->HasOutput(framework::GradVarName("Transition"))) { - ctx->SetOutputDim(framework::GradVarName("Transition"), - transition_exps_dims); - ctx->ShareLoD("Transition", framework::GradVarName("Transition")); - } - } - - protected: - // Explicitly set that the data type of output of the linear_chain_crf_grad - // operator is determined by its input: gradients of LogLikelihood. - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("LogLikelihood")), - platform::CPUPlace()); - } -}; - -template -class LinearChainCRFGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("linear_chain_crf_grad"); - op->SetAttrMap(this->Attrs()); - op->SetInput("Emission", this->Input("Emission")); - op->SetInput("Transition", this->Input("Transition")); - op->SetInput("Label", this->Input("Label")); - op->SetInput("Alpha", this->Output("Alpha")); - op->SetInput("EmissionExps", this->Output("EmissionExps")); - op->SetInput("TransitionExps", this->Output("TransitionExps")); - if (this->HasInput("Length")) { - op->SetInput("Length", this->Input("Length")); - } - op->SetInput(framework::GradVarName("LogLikelihood"), - this->OutputGrad("LogLikelihood")); - - op->SetOutput(framework::GradVarName("Emission"), - this->InputGrad("Emission")); - op->SetOutput(framework::GradVarName("Transition"), - this->InputGrad("Transition")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(LinearChainCRFGradNoNeedBufferVarsInferer, - "Transition", - "Emission"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(linear_chain_crf, - ops::LinearChainCRFOp, - ops::LinearChainCRFOpMaker, - ops::LinearChainCRFGradMaker, - ops::LinearChainCRFGradMaker); -REGISTER_OPERATOR(linear_chain_crf_grad, - ops::LinearChainCRFGradOp, - ops::LinearChainCRFGradNoNeedBufferVarsInferer); - -PD_REGISTER_STRUCT_KERNEL(linear_chain_crf, - CPU, - ALL_LAYOUT, - ops::LinearChainCRFOpKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(linear_chain_crf_grad, - CPU, - ALL_LAYOUT, - ops::LinearChainCRFGradOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h deleted file mode 100644 index 2891320506391..0000000000000 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ /dev/null @@ -1,457 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -static inline T NormalizeL1(T* x, size_t len) { - T sum = 0.; - for (size_t i = 0; i < len; ++i) sum += x[i]; - // (This comment is from the old LinearChainCRFLayer.) - // Right now, we just bet that sum won't be zero. If this really happens, we - // will figure out what should be done then. - PADDLE_ENFORCE_GT( - sum, - 0., - platform::errors::InvalidArgument( - "The unnormalized probabilities of all possible unfinished " - "sequences must be greater than 0.")); - T s = 1. / sum; - for (size_t i = 0; i < len; ++i) x[i] *= s; - return sum; -} - -template -struct ScalarMul { - explicit ScalarMul(const T& scalar) : scalar(scalar) {} - T operator()(const T& val) const { return val * scalar; } - - T scalar; -}; - -using framework::LoD; - -template -class LinearChainCRFOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* emission_weights = - ctx.Input("Emission"); - const phi::DenseTensor* transition_weights = - ctx.Input("Transition"); - - phi::DenseTensor* emission_exps = - ctx.Output("EmissionExps"); - phi::DenseTensor* transition_exps = - ctx.Output("TransitionExps"); - phi::DenseTensor* alpha = ctx.Output("Alpha"); - phi::DenseTensor* ll = ctx.Output("LogLikelihood"); - - // Because the computation codes only runs on CPU, here the memory for all - // the outputs is FIXED to be allocated on the CPU memory. - emission_exps->mutable_data(platform::CPUPlace()); - alpha->mutable_data(platform::CPUPlace()); - transition_exps->mutable_data(platform::CPUPlace()); - auto emission_dims = emission_weights->dims(); - - const phi::DenseTensor* label = ctx.Input("Label"); - phi::DenseTensor emission_weights_tmp = *emission_weights; - phi::DenseTensor label_tmp = *label; - phi::DenseTensor emission_exps_tmp = *emission_exps; - phi::DenseTensor alpha_tmp = *alpha; - int64_t seq_num = 0; - int64_t batch_size; - int64_t tag_num; - const int64_t* length_data = nullptr; - framework::LoD in_lod; - if (ctx.HasInput("Length")) { - const phi::DenseTensor* label_length = - ctx.Input("Length"); - length_data = label_length->data(); - seq_num = label_length->numel(); - PADDLE_ENFORCE_EQ( - seq_num, - emission_dims[0], - platform::errors::InvalidArgument( - "the size of Input(length) must be equal to " - "emission_dims[0]. But input_size = %d, emission_dims[0] = %d.", - seq_num, - emission_dims[0])); - auto label_dims = label->dims(); - PADDLE_ENFORCE_EQ( - seq_num, - label_dims[0], - platform::errors::InvalidArgument( - "the size of Input(length) must be equal to " - "label_dims[0]. But input_size = %d, label_dims[0] = %d.", - seq_num, - label_dims[0])); - - batch_size = emission_dims[0] * emission_dims[1]; - tag_num = emission_dims[2]; - emission_weights_tmp.Resize({batch_size, tag_num}); - label_tmp.Resize({batch_size, 1}); - alpha_tmp.Resize({batch_size, tag_num}); - emission_exps_tmp.Resize({batch_size, tag_num}); - phi::funcs::set_constant( - ctx.device_context(), emission_exps, static_cast(0.0)); - phi::funcs::set_constant( - ctx.device_context(), alpha, static_cast(0.0)); - } else { - in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE(in_lod.size(), - 0, - platform::errors::InvalidArgument( - "Input(Label) must be a sequence.")); - seq_num = in_lod[0].size() - 1; - batch_size = emission_dims[0]; - tag_num = emission_dims[1]; - } - - // Resize the output tensor to its correct dimension. - ll->Resize({seq_num, 1}); - ll->mutable_data(platform::CPUPlace()); - // Now, all the inputs and outputs should be on the CPU memory. - phi::DenseTensor emission_row_max; - emission_row_max.mutable_data( - common::make_ddim({static_cast(batch_size), 1}), - platform::CPUPlace()); - auto& place = - *ctx.template device_context().eigen_device(); - auto x = framework::EigenMatrix::From(emission_weights_tmp); - auto x_row_max = framework::EigenMatrix::From(emission_row_max); - x_row_max.device(place) = - x.maximum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(static_cast(batch_size), 1)); - auto x_exps = framework::EigenMatrix::From(emission_exps_tmp); - x_exps.device(place) = - (x - x_row_max.broadcast(Eigen::DSizes(1, tag_num))).exp(); - auto w = framework::EigenMatrix::From(*transition_weights); - auto w_exps = framework::EigenMatrix::From(*transition_exps); - w_exps.device(place) = w.exp(); - T* log_likelihood = ll->data(); - for (int64_t i = 0; i < seq_num; ++i) { - int64_t start_pos = 0; - int64_t end_pos = 0; - if (ctx.HasInput("Length")) { - start_pos = i * emission_dims[1]; - end_pos = start_pos + length_data[i]; - } else { - start_pos = static_cast(in_lod[0][i]); - end_pos = static_cast(in_lod[0][i + 1]); - } - if (end_pos == start_pos) { - // If an empty input sequence is given, pad 0 for its cost. - log_likelihood[i] = 0.; - continue; - } - const phi::DenseTensor one_seq = - emission_weights_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_row_max = - emission_row_max.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_exps = - emission_exps_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_label = - label_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_alpha = alpha_tmp.Slice(start_pos, end_pos); - log_likelihood[i] = ForwardOneSequence(one_seq, - one_seq_row_max, - one_seq_exps, - *transition_weights, - *transition_exps, - one_seq_label, - &one_seq_alpha); - } - }; - - private: - T ForwardOneSequence(const phi::DenseTensor& emission, - const phi::DenseTensor& emission_row_max, - const phi::DenseTensor& emission_exps, - const phi::DenseTensor& trans_weights, - const phi::DenseTensor& trans_weight_exps, - const phi::DenseTensor& label, - phi::DenseTensor* alpha) const { - const T* x = emission.data(); - const T* x_row_max = emission_row_max.data(); - const T* x_exps = emission_exps.data(); - const T* w = trans_weights.data(); - const T* w_exps = trans_weight_exps.data(); - T* alpha_value = alpha->data(); - - auto x_dims = emission.dims(); - const size_t seq_length = x_dims[0]; - const size_t tag_num = x_dims[1]; - // The 1st row of w are transition weights for start mask. - // The 2nd row of w are transition weights for end mask. - // Transition weights between other tags begin from the 3rd row of w. - const size_t state_trans_base_idx = 2; - - for (size_t i = 0; i < tag_num; ++i) { - alpha_value[i] = w_exps[i] * x_exps[i]; - } - T ll = -x_row_max[0] - std::log(NormalizeL1(alpha_value, tag_num)); - - for (size_t k = 1; k < seq_length; ++k) { - for (size_t i = 0; i < tag_num; ++i) { - T sum = 0.; - for (size_t j = 0; j < tag_num; ++j) { - sum += alpha_value[(k - 1) * tag_num + j] * // (*) - w_exps[(j + state_trans_base_idx) * tag_num + i]; - } - alpha_value[k * tag_num + i] = x_exps[k * tag_num + i] * sum; - } - // NormalizeL1 is to avoid underflow or overflow at (*). - ll -= x_row_max[k] + - std::log(NormalizeL1(alpha_value + k * tag_num, tag_num)); - } - T sum = 0.; - for (size_t i = 0; i < tag_num; ++i) { - sum += alpha_value[(seq_length - 1) * tag_num + i] * w_exps[tag_num + i]; - } - ll -= std::log(sum); - // Now ll is equal to -log(Z). - - const int64_t* lbl = label.data(); - PADDLE_ENFORCE_LT( - static_cast(*std::max_element(lbl, lbl + seq_length)), - tag_num, - platform::errors::InvalidArgument( - "An invalid tag label that excesses the largest tag number.")); - - // Calculate the nominator part, which depends on the label sequence. - ll += w[lbl[0]] /*start transition*/ + x[lbl[0]] + - w[tag_num + lbl[seq_length - 1]] /*end transition*/; - for (size_t k = 1; k < seq_length; ++k) { - ll += x[k * tag_num + lbl[k]] + - w[(lbl[k - 1] + state_trans_base_idx) * tag_num + lbl[k]]; - } - return -ll; - } -}; - -template -class LinearChainCRFGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* label = ctx.Input("Label"); - const phi::DenseTensor* emission_exps = - ctx.Input("EmissionExps"); - const phi::DenseTensor* transition_exps = - ctx.Input("TransitionExps"); - const phi::DenseTensor* alpha = ctx.Input("Alpha"); - const T* ll_grad = - ctx.Input(framework::GradVarName("LogLikelihood")) - ->data(); - phi::DenseTensor* emission_grad = - ctx.Output(framework::GradVarName("Emission")); - auto* emission_grad_data = - emission_grad->mutable_data(platform::CPUPlace()); - memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T)); - phi::DenseTensor alpha_tmp = *alpha; - phi::DenseTensor label_tmp = *label; - phi::DenseTensor emission_exps_tmp = *emission_exps; - phi::DenseTensor emission_grad_tmp = *emission_grad; - // getting seq_num using padding or not - int64_t seq_num = 0; - framework::LoD in_lod; - const int64_t* length_data = nullptr; - if (ctx.HasInput("Length")) { - const phi::DenseTensor* label_length = - ctx.Input("Length"); - length_data = label_length->data(); - seq_num = label_length->numel(); - auto emission_dims = emission_grad->dims(); - auto label_dims = label->dims(); - emission_grad_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); - label_tmp.Resize({label_dims[0] * label_dims[1], 1}); - alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); - emission_exps_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); - } else { - in_lod = ctx.Input("Label")->lod(); - PADDLE_ENFORCE_NE(in_lod.size(), - 0, - platform::errors::InvalidArgument( - "Input(Label) must be a sequence.")); - seq_num = static_cast(in_lod[0].size() - 1); - } - - phi::DenseTensor* transition_grad = - ctx.Output(framework::GradVarName("Transition")); - - // TODO(caoying) Fix this constraint. When the Input(Emission) is from the - // data reader operator, it can have no gradients. - if (transition_grad) { - transition_grad->mutable_data(platform::CPUPlace()); - phi::funcs::set_constant( - ctx.device_context(), transition_grad, static_cast(0.)); - } - // Now, all the inputs and outputs should be on the CPU memory. - auto emission_dims = emission_exps->dims(); - // Beta is the memo table used in dynamic programming to calculate the - // backward vectors. For a backward vector i (the i-th row of beta), it - // captures the unnormalized probabilities of partial sequences starting - // at position i. - phi::DenseTensor beta; - beta.mutable_data(emission_dims, platform::CPUPlace()); - if (ctx.HasInput("Length")) { - beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); - } - - for (int64_t i = 0; i < seq_num; ++i) { - int64_t start_pos = 0; - int64_t end_pos = 0; - if (ctx.HasInput("Length")) { - start_pos = i * emission_dims[1]; - end_pos = start_pos + length_data[i]; - } else { - start_pos = static_cast(in_lod[0][i]); - end_pos = static_cast(in_lod[0][i + 1]); - } - - if (end_pos == start_pos) { - continue; - } - const phi::DenseTensor one_seq_emission_exps = - emission_exps_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_label = - label_tmp.Slice(start_pos, end_pos); - const phi::DenseTensor one_seq_alpha = - alpha_tmp.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_beta = beta.Slice(start_pos, end_pos); - phi::DenseTensor one_seq_emission_grad = - emission_grad_tmp.Slice(start_pos, end_pos); - BackwardOneSequence(ctx.template device_context(), - ll_grad[i], - one_seq_emission_exps, - *transition_exps, - one_seq_alpha, - one_seq_label, - &one_seq_beta, - transition_grad, - &one_seq_emission_grad); - } - }; - - private: - void BackwardOneSequence(const phi::CPUContext& ctx, - const T ll_grad, - const phi::DenseTensor& emission_exps, - const phi::DenseTensor& transition_exps, - const phi::DenseTensor& alpha, - const phi::DenseTensor& label, - phi::DenseTensor* beta, - phi::DenseTensor* transition_grad, - phi::DenseTensor* emission_grad) const { - const T* w_exps = transition_exps.data(); - const T* x_exps = emission_exps.data(); - const int64_t* label_value = label.data(); - T* beta_value = beta->data(); - auto x_dims = emission_exps.dims(); - const size_t seq_length = x_dims[0]; - const size_t tag_num = x_dims[1]; - const size_t state_trans_base_idx = 2; - - // Calculate the backward vectors: beta. - // First, calculate the initial state. - for (size_t i = 0; i < tag_num; ++i) { - beta_value[(seq_length - 1) * tag_num + i] = w_exps[tag_num + i]; - } - NormalizeL1(beta_value + (seq_length - 1) * tag_num, tag_num); - for (int k = static_cast(seq_length) - 2; k >= 0; --k) { - for (size_t i = 0; i < tag_num; ++i) { - T sum = 0.; - for (size_t j = 0; j < tag_num; ++j) { - sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**) - x_exps[(k + 1) * tag_num + j] * - beta_value[(k + 1) * tag_num + j]; - } - beta_value[k * tag_num + i] = sum; - } - // NormalizeL1 is to avoid underflow or overflow at (**). - NormalizeL1(beta_value + k * tag_num, tag_num); - } - - auto x_grad_mat = framework::EigenMatrix::From(*emission_grad); - auto alpha_mat = framework::EigenMatrix::From(alpha); - auto beta_mat = framework::EigenMatrix::From(*beta); - - auto* place = ctx.eigen_device(); - auto prob = alpha_mat * beta_mat; - auto row_sum = prob.sum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(seq_length, 1)) - .broadcast(Eigen::DSizes(1, tag_num)); - x_grad_mat.device(*place) = - (prob / row_sum).unaryExpr(ScalarMul(ll_grad)); - - for (size_t k = 0; k < seq_length; ++k) { - x_grad_mat(k, label_value[k]) -= static_cast(ll_grad); - } - - if (transition_grad) { - T* trans_grad = transition_grad->data(); - for (size_t k = 0; k < tag_num; ++k) { - // Do not multiply by the output gradient here, because x_grad_mat has - // already done this. - trans_grad[k] += x_grad_mat(/*from start state*/ 0, k); - trans_grad[tag_num + k] += - x_grad_mat(/*to end state*/ seq_length - 1, k); - } - - auto x_exps_mat = framework::EigenMatrix::From(emission_exps); - - // TODO(caoying): Fix this to avoid using this local variable if we can - // profile the training process. - phi::DenseTensor tmp; - tmp.mutable_data(beta->dims(), platform::CPUPlace()); - auto tmp_mat = framework::EigenMatrix::From(tmp); - auto prob = beta_mat * x_exps_mat; - auto row_sum = prob.sum(Eigen::DSizes(1)) - .reshape(Eigen::DSizes(seq_length, 1)) - .broadcast(Eigen::DSizes(1, tag_num)); - tmp_mat.device(*place) = prob / row_sum; - - for (size_t k = 1; k < seq_length; ++k) { - T sum = 0.; - for (size_t i = 0; i < tag_num; ++i) { - for (size_t j = 0; j < tag_num; ++j) { - sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**) - alpha_mat(k - 1, i) * tmp_mat(k, j); - } - } - sum = 1. / sum; - for (size_t i = 0; i < tag_num; ++i) { - for (size_t j = 0; j < tag_num; ++j) { - trans_grad[(i + state_trans_base_idx) * tag_num + j] += - sum * w_exps[(i + state_trans_base_idx) * tag_num + j] * - alpha_mat(k - 1, i) * tmp_mat(k, j) * ll_grad; - } - } - trans_grad[(label_value[k - 1] + state_trans_base_idx) * tag_num + - label_value[k]] -= static_cast(ll_grad); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt deleted file mode 100644 index ca3b62648378b..0000000000000 --- a/paddle/fluid/operators/lite/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -op_library(lite_engine_op DEPS lite_engine lite_tensor_utils) diff --git a/paddle/fluid/operators/lite/lite_engine_op.cc b/paddle/fluid/operators/lite/lite_engine_op.cc deleted file mode 100644 index 0ec1c55f7abee..0000000000000 --- a/paddle/fluid/operators/lite/lite_engine_op.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/lite/lite_engine_op.h" - -#include -#include - -namespace paddle { - -namespace operators { - -class LiteEngineOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Xs", "A list of inputs.").AsDuplicable(); - AddOutput("Ys", "A list of outputs.").AsDuplicable(); - AddAttr( - "engine_key", - "The engine_key here is used to distinguish different Lite Engines"); - AddComment("Lite engine operator."); - } -}; - -class LiteInferVarType : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override {} -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(lite_engine, ops::LiteEngineOp, ops::LiteEngineOpMaker); diff --git a/paddle/fluid/operators/lite/lite_engine_op.h b/paddle/fluid/operators/lite/lite_engine_op.h deleted file mode 100644 index 756fec24d9874..0000000000000 --- a/paddle/fluid/operators/lite/lite_engine_op.h +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/inference/lite/engine.h" -#include "paddle/fluid/inference/lite/tensor_utils.h" -#include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" - -namespace paddle { -namespace operators { - -class LiteEngineOp : public framework::OperatorBase { - private: - std::vector in_names_; - std::vector out_names_; - paddle::lite_api::PaddlePredictor *engine_; - framework::proto::VarType::Type precision_; - bool use_gpu_; - bool zero_copy_; - - public: - LiteEngineOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) { - in_names_ = Inputs("Xs"); - out_names_ = Outputs("Ys"); - engine_ = - inference::Singleton::Global().Get( - Attr("engine_key")); - if (Attr("enable_int8")) { - precision_ = framework::proto::VarType_Type_INT8; - } else { - precision_ = framework::proto::VarType_Type_FP32; - } - use_gpu_ = Attr("use_gpu"); - zero_copy_ = Attr("zero_copy"); - } - - void SetEngine(paddle::lite_api::PaddlePredictor *engine) { - engine_ = engine; - } - - protected: - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - Execute(scope, dev_place); - } - - void Execute(const framework::Scope &scope, - const platform::Place &dev_place) const { - const platform::DeviceContext *ctx = - platform::DeviceContextPool::Instance().Get(dev_place); - for (size_t i = 0; i < in_names_.size(); i++) { - phi::DenseTensor src_t = - inference::analysis::GetFromScope(scope, - in_names_[i]); - paddle::lite_api::Tensor dst_t = *(engine_->GetInput(i)); - VLOG(3) << "== fluid -> lite (" << in_names_[i] << " -> " - << engine_->GetInputNames()[i] << ")"; - inference::lite::utils::TensorCopy(&dst_t, &src_t, *ctx, zero_copy_); - } - VLOG(3) << "lite engine run"; - engine_->Run(); - VLOG(3) << "lite engine run done"; - for (size_t i = 0; i < out_names_.size(); i++) { - paddle::lite_api::Tensor src_t = *(engine_->GetOutput(i)); - phi::DenseTensor *dst_t = - &inference::analysis::GetFromScope(scope, - out_names_[i]); - VLOG(3) << "== lite -> fluid (" << out_names_[i] << " -> " - << engine_->GetOutputNames()[i] << ")"; - inference::lite::utils::TensorCopy(dst_t, &src_t, *ctx, zero_copy_); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h index 3d574b1f844c8..ba55b7066da1e 100644 --- a/paddle/fluid/operators/lite/ut_helper.h +++ b/paddle/fluid/operators/lite/ut_helper.h @@ -60,8 +60,8 @@ void serialize_params(std::string* str, for (const auto& param : params) { PADDLE_ENFORCE_NOT_NULL( scope->FindVar(param), - platform::errors::NotFound("Block should already have a '%s' variable", - param)); + phi::errors::NotFound("Block should already have a '%s' variable", + param)); auto* tensor = scope->FindVar(param)->GetMutable(); framework::SerializeToStream(os, *tensor, ctx); } @@ -81,7 +81,7 @@ void RandomizeTensor(phi::DenseTensor* tensor, const platform::Place& place) { size_t num_elements = analysis::AccuDims(dims, dims.size()); PADDLE_ENFORCE_GT(num_elements, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor dimension of the randomized tensor " "function should be greater than zero.")); platform::CPUPlace cpu_place; diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h index 4641c39111fad..be94eab242491 100644 --- a/paddle/fluid/operators/load_combine_op.h +++ b/paddle/fluid/operators/load_combine_op.h @@ -40,7 +40,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT(out_var_names.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of variables to be loaded is %d, expect " "it to be greater than 0.", out_var_names.size())); @@ -49,7 +49,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(fin), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "LoadCombine operator fails to open file %s, please check " "whether the model file is complete or damaged.", filename)); @@ -58,7 +58,7 @@ class LoadCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( filename.empty(), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "LoadCombine operator fails to open file %s, please check " "whether the model file is complete or damaged.", filename)); @@ -81,14 +81,14 @@ class LoadCombineOpKernel : public framework::OpKernel { VLOG(4) << "loading tensor: " << out_var_names[i]; PADDLE_ENFORCE_NOT_NULL( out_vars[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The variable %s to be loaded cannot be found.", out_var_names[i])); // Error checking PADDLE_ENFORCE_EQ( static_cast(*buffer), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "An error occurred while loading model parameters. " "Please check whether the model file is complete or damaged.")); if (out_vars[i]->IsType()) { @@ -142,7 +142,7 @@ class LoadCombineOpKernel : public framework::OpKernel { buffer->peek(); PADDLE_ENFORCE_EQ(buffer->eof(), true, - platform::errors::Unavailable( + phi::errors::Unavailable( "Not allowed to load partial data via " "load_combine_op, please use load_op instead.")); } diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc index a399ad4527ff8..310fb619bcb01 100644 --- a/paddle/fluid/operators/lod_rank_table_op.cc +++ b/paddle/fluid/operators/lod_rank_table_op.cc @@ -72,10 +72,9 @@ output operators. class LoDRankTableInferShape : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext *context) const override { - PADDLE_ENFORCE_EQ( - context->HasInput("X"), - true, - platform::errors::NotFound("LoDRankTable must have input X.")); + PADDLE_ENFORCE_EQ(context->HasInput("X"), + true, + phi::errors::NotFound("LoDRankTable must have input X.")); } }; diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index ae464e7b47161..654bc669c7504 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -33,7 +33,7 @@ class LoDResetOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( static_cast(level0.size()), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If Input(Y) is not provided, the output's LoD should be " "specified by attribute 'target_lod'. But the size of " "'target_lod' is 0.")); @@ -252,7 +252,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset, CPU, ALL_LAYOUT, ops::LoDResetKernel, - plat::float16, + phi::dtype::float16, float, double, int, @@ -263,7 +263,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset, XPU, ALL_LAYOUT, ops::LoDResetKernel, - plat::float16, + phi::dtype::float16, float, double, int, @@ -274,7 +274,7 @@ PD_REGISTER_STRUCT_KERNEL(lod_reset_grad, CPU, ALL_LAYOUT, ops::LoDResetGradKernel, - plat::float16, + phi::dtype::float16, float, double, int, diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index a468577ab9aa1..acba05514226b 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -54,7 +54,7 @@ class LoDResetKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( static_cast(last_level.back()), in->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last value of Input(Y)'s last level LoD should be equal " "to the first dimension of Input(X). But received the last " "value of Input(Y)'s last level LoD is %d, the first dimension " @@ -79,20 +79,20 @@ class LoDResetKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( level0.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of target LoD should be greater than 1. But received the " "size of target LoD is %d.", level0.size())); PADDLE_ENFORCE_EQ(static_cast(level0[0]), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Target LoD should be a vector starting from 0. But " "target LoD starts from %d.", static_cast(level0[0]))); PADDLE_ENFORCE_EQ( static_cast(level0.back()), in->dims()[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last value of 'Target LoD''s last level LoD should be equal " "to the first dimension of Input(X). But received the 'Target LoD' " "is %s, Input(X)'s shape is %s.", @@ -101,7 +101,7 @@ class LoDResetKernel : public framework::OpKernel { for (size_t i = 0; i < level0.size() - 1; ++i) { PADDLE_ENFORCE_GE(level0[i + 1], level0[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'Target LoD' should be an ascending " "vector. But received the Target LoD is %s.", common::make_ddim(level0))); diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index 94b0319729117..42f6a4786fb25 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -70,7 +70,7 @@ struct LoDTensorToArrayFunctor { Apply(static_cast(dev_ctx)); #else PADDLE_THROW( - platform::errors::Unavailable("Paddle is not compiled with CUDA.")); + phi::errors::Unavailable("Paddle is not compiled with CUDA.")); #endif } } @@ -126,11 +126,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( rank_level, x.lod().size(), - platform::errors::InvalidArgument("Input should be a phi::DenseTensor, " - "and its lod_level should be at " - "least %d, but given is %d.", - rank_level + 1, - x.lod().size())); + phi::errors::InvalidArgument("Input should be a phi::DenseTensor, " + "and its lod_level should be at " + "least %d, but given is %d.", + rank_level + 1, + x.lod().size())); out.resize(max_seq_len); std::vector> copy_ranges(max_seq_len); @@ -215,18 +215,18 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( context->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of LoDTensorToArrayOp should not be null.")); PADDLE_ENFORCE_EQ( context->HasInput("RankTable"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(RankTable) of LoDTensorToArrayOp should not be null.")); PADDLE_ENFORCE_EQ( context->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of LoDTensorToArrayOp should not be null.")); auto x_dim = context->GetInputDim("X"); diff --git a/paddle/fluid/operators/lookup_table_dequant_op.cc b/paddle/fluid/operators/lookup_table_dequant_op.cc index 93826aab0d573..6f780b946eae8 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.cc +++ b/paddle/fluid/operators/lookup_table_dequant_op.cc @@ -30,17 +30,17 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(W) of LookupTableDequantOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of LookupTableDequantOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of LookupTableDequantOp should not be null.")); auto table_dims = ctx->GetInputDim("W"); @@ -50,7 +50,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'lookup table' must be 2. " "But received lookup table's dimensions = %d, " "lookup table's shape = [%s].", @@ -59,7 +59,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The last dimensions of the 'Ids' tensor must be 1. " "But received Ids's last dimensions = %d, Ids's shape = [%s].", ids_dims[ids_rank - 1], @@ -69,7 +69,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); PADDLE_ENFORCE_GE(table_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the second dim of table_dims should be " "greater or equal to 2, but the actual shape " "is [%s]", diff --git a/paddle/fluid/operators/lookup_table_dequant_op.h b/paddle/fluid/operators/lookup_table_dequant_op.h index 2f5a3d0fd7a16..191f05597668c 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.h +++ b/paddle/fluid/operators/lookup_table_dequant_op.h @@ -65,7 +65,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( table_var->Type(), framework::VarTypeTrait::kId, - platform::errors::InvalidArgument("lookup table must be LodTensor")); + phi::errors::InvalidArgument("lookup table must be LodTensor")); auto *table_t = context.Input("W"); int64_t row_number = table_t->dims()[0]; int64_t quant_number = table_t->dims()[1]; @@ -81,7 +81,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -90,7 +90,7 @@ class LookupTableDequantKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index a8185691c45aa..6818b363bc89a 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_type_inference.h" -#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/phi/common/bfloat16.h" namespace paddle { namespace operators { @@ -40,7 +40,7 @@ class LookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'lookup table' must be 2. " "But received lookup table's dimensions = %d, " "lookup table's shape = [%s].", @@ -49,7 +49,7 @@ class LookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The last dimensions of the 'Ids' tensor must be 1. " "But received Ids's last dimensions = %d, Ids's shape = [%s].", ids_dims[ids_rank - 1], @@ -239,11 +239,11 @@ REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel, ops::LookupTableKernel, ops::LookupTableKernel, - ops::LookupTableKernel); + ops::LookupTableKernel); REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel, ops::LookupTableGradKernel, - ops::LookupTableGradKernel); + ops::LookupTableGradKernel); /* ========================== register checkpoint ===========================*/ diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index ba8af995429a3..46ae30754a933 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_op.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -195,7 +195,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -252,10 +252,10 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(lookup_table, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, - ops::LookupTableCUDAKernel, + ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel); REGISTER_OP_CUDA_KERNEL(lookup_table_grad, ops::LookupTableGradCUDAKernel, ops::LookupTableGradCUDAKernel, - ops::LookupTableGradCUDAKernel); + ops::LookupTableGradCUDAKernel); diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index 21f0bf6a957ae..f4e48065742ca 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -64,7 +64,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -73,7 +73,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -99,7 +99,7 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); @@ -129,14 +129,14 @@ class LookupTableKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); PADDLE_ENFORCE_GE( id_index, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input key should be exists. But received %d.", id_index)); @@ -173,7 +173,7 @@ class LookupTableGradKernel : public framework::OpKernel { auto *table_t = context.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter W of a LookupTable " "must be either phi::DenseTensor or SelectedRows")); } @@ -210,7 +210,7 @@ class LookupTableGradKernel : public framework::OpKernel { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -243,7 +243,7 @@ class LookupTableGradKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( ids_data[i], N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -252,7 +252,7 @@ class LookupTableGradKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( ids_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input" "value.", diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index edd8b20da160c..137d6bea417c3 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_v2_op.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -190,7 +190,7 @@ struct LookupTableV2GradCUDAFunctor { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index 82dbac8b21dfc..cce29cb715563 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -78,7 +78,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_LT( ids[i], row_number, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -87,7 +87,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -113,7 +113,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( ids[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0. But received %ld", ids[i])); @@ -121,7 +121,7 @@ struct LookupTableV2CPUFunctor { PADDLE_ENFORCE_GE( id_index, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "the input key should be exists. But received %d.", id_index)); @@ -173,7 +173,7 @@ struct LookupTableV2GradCPUFunctor { auto *table_t = context_.Input("W"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter W of a LookupTableV2 " "must be either phi::DenseTensor or SelectedRows")); } @@ -209,7 +209,7 @@ struct LookupTableV2GradCPUFunctor { common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The shape of lookup_table@Grad and " "output@Grad should be same. " "But received lookup_table@Grad's shape = [%s], " @@ -242,7 +242,7 @@ struct LookupTableV2GradCPUFunctor { PADDLE_ENFORCE_LT( ids_data[i], N, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -251,7 +251,7 @@ struct LookupTableV2GradCPUFunctor { PADDLE_ENFORCE_GE( ids_data[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.embedding) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index bf4c72a2133b6..705af5f8d0587 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif namespace paddle { @@ -199,23 +199,23 @@ class LRNOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dim.size(), 4, - platform::errors::InvalidArgument("Input(input) rank should be 4, " - "but received input rank (%d) != 4", - x_dim.size())); + phi::errors::InvalidArgument("Input(input) rank should be 4, " + "but received input rank (%d) != 4", + x_dim.size())); int n = ctx->Attrs().Get("n"); - PADDLE_ENFORCE_GT(n, - 0UL, - platform::errors::InvalidArgument( - "Argument(n) should be positive, " - "but received n(%d) not greater than 0", - n)); - PADDLE_ENFORCE_EQ(n % 2, - 1UL, - platform::errors::InvalidArgument( - "Argument(n) should be odd value, " - "but received n(%d) is not an odd value", - n)); + PADDLE_ENFORCE_GT( + n, + 0UL, + phi::errors::InvalidArgument("Argument(n) should be positive, " + "but received n(%d) not greater than 0", + n)); + PADDLE_ENFORCE_EQ( + n % 2, + 1UL, + phi::errors::InvalidArgument("Argument(n) should be odd value, " + "but received n(%d) is not an odd value", + n)); ctx->SetOutputDim("Out", x_dim); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index 4d1cc268d48b6..063ec6e445044 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -78,21 +78,21 @@ class LRNKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( alpha, 0UL, - platform::errors::InvalidArgument("Argument(alpha) should >= 0.0, " - "but received alpha(%d) less than 0", - alpha)); + phi::errors::InvalidArgument("Argument(alpha) should >= 0.0, " + "but received alpha(%d) less than 0", + alpha)); PADDLE_ENFORCE_GE( beta, 0UL, - platform::errors::InvalidArgument("Argument(beta) should >= 0.0, " - "but received beta(%d) less than 0", - beta)); + phi::errors::InvalidArgument("Argument(beta) should >= 0.0, " + "but received beta(%d) less than 0", + beta)); PADDLE_ENFORCE_GE( k, 0UL, - platform::errors::InvalidArgument("Argument(k) should >= 0.0, " - "but received k(%d) less than 0", - k)); + phi::errors::InvalidArgument("Argument(k) should >= 0.0, " + "but received k(%d) less than 0", + k)); LRNFunctor f; f(ctx, x, out, mid, N, C, H, W, n, k, alpha, beta, data_layout); @@ -165,7 +165,7 @@ class LRNGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( !ctx.Attr("is_test"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "is_test attribute should be set to False in training phase. " "but received is_test == True in training phase.")); diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index 8bdb455375bee..a34fc82fe177c 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -46,20 +46,20 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X)'s rank must be 2, but received %d.", in_dims.size())); if (ctx->HasInput("H0")) { PADDLE_ENFORCE_EQ( ctx->HasInput("C0"), true, - platform::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " - "should not be null at the same time.")); + phi::errors::NotFound("Input(Cell) and Input(Hidden) of LSTM " + "should not be null at the same time.")); auto h_dims = ctx->GetInputDim("H0"); auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(h_dims, c_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of Input(H0) and Input(C0) should " "be the same, but received [%s] (H0) vs [%s] (C0).", h_dims, @@ -71,19 +71,19 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Weight) should be 2, but received %d.", w_dims.size())); PADDLE_ENFORCE_EQ(w_dims[0], frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Weight) should be %d, " "but received %d.", frame_size, w_dims[0])); PADDLE_ENFORCE_EQ(w_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Weight) should be 4 * " "%d, but received %d.", frame_size, @@ -93,13 +93,13 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received %d.", b_dims.size())); PADDLE_ENFORCE_EQ( b_dims[0], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Bias) should be 1, but received %d.", b_dims[0])); @@ -107,7 +107,7 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[1], 7 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be 7 * %d if enable " "peepholes connection, but received %d.", frame_size, @@ -116,7 +116,7 @@ class LSTMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( b_dims[1], 4 * frame_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(Bias) should be 4 * %d if disable " "peepholes connection, but received %d.", frame_size, diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 0e068c47647e3..278fdbdb41761 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -252,7 +252,7 @@ class LSTMGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( frame_size, out_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(" + framework::GradVarName("Hidden") + ") should be %d, but received %d in LSTM@Grad operator.", diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index d2c026a9042c7..77cd6433c69e9 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -98,7 +98,7 @@ void GetClassInterval(const gpuStream_t& stream, if (FLAGS_dynamic_static_unified_comm) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(rid)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -109,7 +109,7 @@ void GetClassInterval(const gpuStream_t& stream, comm_context_manager.Get(std::to_string(rid))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { @@ -287,7 +287,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, PADDLE_ENFORCE_EQ( comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -299,7 +299,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx, PADDLE_ENFORCE_NE( comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); } else { diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index 746a28ed588d5..e790262a0fd78 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -36,7 +36,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X) should be equal to 2, " "but received %d.", x_dims.size())); @@ -44,7 +44,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(y_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y) should be equal to 2, " "but received %d.", y_dims.size())); @@ -52,7 +52,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { auto w_dims = ctx->GetInputDim("W"); PADDLE_ENFORCE_EQ(w_dims.size(), 3, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(W) should be equal to 3, " "but received %d.", w_dims.size())); @@ -61,7 +61,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[0], x_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(W) should be equal to the second " "dimension of Input(X). But received the first dimension of Input(W) " "is %d, the second dimension of Input(X) is %d.", @@ -70,7 +70,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[1], dim_t, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(W) should be equal to 'dim_t', but " "received the second dimension of Input(W) is %d, 'dim_t' is %d.", w_dims[1], @@ -78,7 +78,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[2], y_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last dimension of Input(W) should be equal to " "the second dimension of Input(Y). But received the last dimension " "of Input(W) is %d, the second dimension of Input(Y) is %d.", @@ -93,19 +93,19 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { const auto& x_lod = x_var->Get().lod(); PADDLE_ENFORCE_EQ(x_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) should hold LoD information, but " "received Input(X).lod() is empty.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE(x_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s LoD data should be " "equal to 2, but received %d.", x_lod_0.size())); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(X)'s LoD data should be " "equal to the first dimension of Input(X). " "But received the last element of Input(X)'s LoD " @@ -118,19 +118,19 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { const auto& y_lod = y_var->Get().lod(); PADDLE_ENFORCE_EQ(y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) should hold LoD information, but " "received Input(Y).lod() is empty.")); const auto& y_lod_0 = y_lod[0]; PADDLE_ENFORCE_GE(y_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y)'s LoD data should be " "equal to 2, but received %d.", y_lod_0.size())); PADDLE_ENFORCE_EQ(y_dims[0], static_cast(y_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(Y)'s LoD data should be " "equal to the first dimension of Input(Y). " "But received the last element of Input(Y)'s LoD " @@ -140,7 +140,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(x_lod_0.size(), y_lod_0.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s and Input(Y)'s LoD " "data should be equal. " "But received the dimensions of Input(X)'s LoD is " @@ -164,17 +164,17 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE( x_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument("The LoD level of Input(X) should be " - "greater than 1, but received %d.", - x_desc->GetLoDLevel())); + phi::errors::InvalidArgument("The LoD level of Input(X) should be " + "greater than 1, but received %d.", + x_desc->GetLoDLevel())); framework::VarDesc* y_desc = PADDLE_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Y")[0]); PADDLE_ENFORCE_GE( y_desc->GetLoDLevel(), 1, - platform::errors::InvalidArgument("The LoD level of Input(Y) should be " - "greater than 1, but received %d.", - y_desc->GetLoDLevel())); + phi::errors::InvalidArgument("The LoD level of Input(Y) should be " + "greater than 1, but received %d.", + y_desc->GetLoDLevel())); ctx->ShareLoD("X", "Out"); } @@ -255,20 +255,20 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { const auto& x_lod = x->lod(); PADDLE_ENFORCE_EQ(x_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) should hold LoD information, but " "received Input(X).lod() is empty.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE(x_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s LoD data should be " "equal to 2, but received %d.", x_lod_0.size())); auto x_dims = x->dims(); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(X)'s LoD data should be " "equal to the first dimension of Input(X). " "But received the last element of Input(X)'s LoD " @@ -278,20 +278,20 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { const auto& y_lod = y->lod(); PADDLE_ENFORCE_EQ(y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(Y) should hold LoD information, but " "received Input(Y).lod() is empty.")); const auto& y_lod_0 = y_lod[0]; PADDLE_ENFORCE_GE(y_lod_0.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(Y)'s LoD data should be " "equal to 2, but received %d.", y_lod_0.size())); auto y_dims = y->dims(); PADDLE_ENFORCE_EQ(y_dims[0], static_cast(y_lod_0.back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The last element of Input(Y)'s LoD data should be " "equal to the first dimension of Input(Y). " "But received the last element of Input(Y)'s LoD " @@ -301,7 +301,7 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(x_lod_0.size(), y_lod_0.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimensions of Input(X)'s and Input(Y)'s LoD " "data should be equal. " "But received the dimensions of Input(X)'s LoD is " diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index aeff6c394c429..974d5d5d5a3c8 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -100,10 +100,10 @@ class BeamSearchFunctor { lod[0].assign(high_level.begin(), high_level.end()); lod[1].assign(low_level.begin(), low_level.end()); if (!framework::CheckLoD(lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(lod))); } selected_ids->set_lod(lod); selected_scores->set_lod(lod); diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 098f40ab526b1..702c34ce2161f 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -504,17 +504,17 @@ class BeamSearchFunctor { num_used_threads)); } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Not implemented other number of sequences yet.")); } context.Wait(); mix_vector.CopyToCPU(); if (!framework::CheckLoD(selected_lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(selected_lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(selected_lod))); } selected_ids->set_lod(selected_lod); diff --git a/paddle/fluid/operators/math/beam_search_xpu.cc b/paddle/fluid/operators/math/beam_search_xpu.cc index 4ac0e3d886017..33484d139982c 100644 --- a/paddle/fluid/operators/math/beam_search_xpu.cc +++ b/paddle/fluid/operators/math/beam_search_xpu.cc @@ -41,7 +41,7 @@ void CopyDataByCondition(const T *x, T **y, int len, const Place &place) { PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("Copy data form xpu to cpu failed")); + phi::errors::External("Copy data form xpu to cpu failed")); } } @@ -125,10 +125,10 @@ class BeamSearchFunctor { lod[0].assign(high_level.begin(), high_level.end()); lod[1].assign(low_level.begin(), low_level.end()); if (!framework::CheckLoD(lod)) { - PADDLE_THROW(platform::errors::InvalidArgument( - "lod %s is not right in" - " beam_search, please check your code.", - framework::LoDToString(lod))); + PADDLE_THROW( + phi::errors::InvalidArgument("lod %s is not right in" + " beam_search, please check your code.", + framework::LoDToString(lod))); } selected_ids->set_lod(lod); selected_scores->set_lod(lod); diff --git a/paddle/fluid/operators/math/bert_encoder_functor.h b/paddle/fluid/operators/math/bert_encoder_functor.h index 76e27380b90e2..32b36b9c1515e 100644 --- a/paddle/fluid/operators/math/bert_encoder_functor.h +++ b/paddle/fluid/operators/math/bert_encoder_functor.h @@ -28,7 +28,7 @@ namespace cub = hipcub; #endif #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ struct CUDATypeTraits; template <> struct CUDATypeTraits { - typedef platform::float16 TYPE; + typedef phi::dtype::float16 TYPE; }; template <> diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index 87b3695553356..7a37d929be71d 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -110,7 +110,7 @@ class ConcatFunctor { PADDLE_ENFORCE_EQ( r, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s], please check whether " "Baidu Kunlun Card is properly installed.", r, @@ -169,7 +169,7 @@ class SplitFunctor { PADDLE_ENFORCE_EQ( r, XPU_SUCCESS, - platform::errors::External( + phi::errors::External( "XPU API return wrong value[%d %s], please check whether " "Baidu Kunlun Card is properly installed.", r, @@ -190,8 +190,8 @@ FOR_ALL_TYPES(DEFINE_FUNCTOR); template class SplitFunctor; DEFINE_XPU_FUNCTOR(float) -DEFINE_XPU_FUNCTOR(platform::float16) -DEFINE_XPU_FUNCTOR(platform::bfloat16) +DEFINE_XPU_FUNCTOR(phi::dtype::float16) +DEFINE_XPU_FUNCTOR(phi::dtype::bfloat16) #endif } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 20211160b7e5e..f510034a7ea0c 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -144,7 +144,7 @@ class ContextProjectFunctor { if (padding_trainable) { PADDLE_ENFORCE_NOT_NULL( padding_data, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor 'padding_data' should not be NULL.")); for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { if (lod_level_0[i] == lod_level_0[i + 1]) continue; diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 8d6b0b99f9d52..c4a22ece92b54 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -38,7 +38,7 @@ static void CheckEighResult(const int batch, const int info) { PADDLE_ENFORCE_LE( info, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "For batch [%d]: the [%d] off-diagonal elements of an intermediate" "tridiagonal form did not converge to zero", batch, @@ -46,7 +46,7 @@ static void CheckEighResult(const int batch, const int info) { PADDLE_ENFORCE_GE( info, 0, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "For batch [%d]: the [%d] argument had an illegal value", batch, info)); @@ -160,7 +160,7 @@ struct MatrixEighFunctor { } if (has_vectors) { PADDLE_ENFORCE_NOT_NULL(eigen_vectors, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When has_vectors is true," "the eigenvectors needs to be calculated, " "so the eigenvectors must be provided.")); @@ -293,7 +293,7 @@ struct MatrixEighFunctor { } if (has_vectors) { PADDLE_ENFORCE_NOT_NULL(eigen_vectors, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When has_vectors is true," "the eigenvectors needs to be calculated," "so the eigenvectors must be provided.")); diff --git a/paddle/fluid/operators/math/prelu.cu b/paddle/fluid/operators/math/prelu.cu index 9dc25e30ce9aa..eadfdf8cf39e4 100644 --- a/paddle/fluid/operators/math/prelu.cu +++ b/paddle/fluid/operators/math/prelu.cu @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/prelu.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" namespace paddle { namespace operators { @@ -135,18 +135,18 @@ void PreluScalarDirectCUDAFunctor::operator()(gpuStream_t stream, } template class PreluChannelWiseDirectCUDAFunctor; -template class PreluChannelWiseDirectCUDAFunctor; -template class PreluChannelWiseDirectCUDAFunctor; +template class PreluChannelWiseDirectCUDAFunctor; +template class PreluChannelWiseDirectCUDAFunctor; template class PreluChannelWiseDirectCUDAFunctor; template class PreluElementWiseDirectCUDAFunctor; -template class PreluElementWiseDirectCUDAFunctor; -template class PreluElementWiseDirectCUDAFunctor; +template class PreluElementWiseDirectCUDAFunctor; +template class PreluElementWiseDirectCUDAFunctor; template class PreluElementWiseDirectCUDAFunctor; template class PreluScalarDirectCUDAFunctor; -template class PreluScalarDirectCUDAFunctor; -template class PreluScalarDirectCUDAFunctor; +template class PreluScalarDirectCUDAFunctor; +template class PreluScalarDirectCUDAFunctor; template class PreluScalarDirectCUDAFunctor; } // namespace math diff --git a/paddle/fluid/operators/math/sampler.h b/paddle/fluid/operators/math/sampler.h index 9bca69edd1fea..e14e1ca572cab 100644 --- a/paddle/fluid/operators/math/sampler.h +++ b/paddle/fluid/operators/math/sampler.h @@ -36,7 +36,7 @@ class Sampler { PADDLE_ENFORCE_GT( range, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Range should be greater than 0, but received %d.", range)); if (seed == 0) { std::random_device r; diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index 41c131de0f392..f9950cd95de0b 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -58,7 +58,7 @@ void Tree2ColUtil::construct_tree(const phi::DenseTensor &EdgeSet, const auto &edge_set_dims = EdgeSet.dims(); PADDLE_ENFORCE_EQ(edge_set_dims[1], 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of the EdgeSet shall be 2, but " "got %ld != 2. Please check the input value.", edge_set_dims[1])); diff --git a/paddle/fluid/operators/math/unpooling.cc b/paddle/fluid/operators/math/unpooling.cc index 78c41f1b8387a..a4e64b4d84fc2 100644 --- a/paddle/fluid/operators/math/unpooling.cc +++ b/paddle/fluid/operators/math/unpooling.cc @@ -43,7 +43,7 @@ class Unpool2dMaxFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor height * output tensor " "width. Expected %ld < %ld, but got " "%ld >= %ld. Please check input value.", @@ -88,7 +88,7 @@ class Unpool2dMaxGradFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor height * output tensor " "width. Expected %ld < %ld, but got " "%ld >= %ld. Please check input value.", @@ -134,7 +134,7 @@ class Unpool3dMaxFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor depth * output tensor " "height " "* output tensor width. Expected %ld < %ld, but got " @@ -182,7 +182,7 @@ class Unpool3dMaxGradFunctor { PADDLE_ENFORCE_LT( index, output_feasize, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "index should less than output tensor depth * output tensor " "height " "* output tensor width. Expected %ld < %ld, but got " diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 895a427bae6e2..c55a1e6b14123 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -961,7 +961,7 @@ REGISTER_OP_CUDA_KERNEL( matmul, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #endif #if defined(PADDLE_WITH_CUDA) @@ -971,13 +971,13 @@ REGISTER_OP_CUDA_KERNEL( ops::MatMulKernel, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #else REGISTER_OP_CUDA_KERNEL( matmul, ops::MatMulKernel, ops::MatMulKernel, - ops::MatMulKernel); + ops::MatMulKernel); #endif #endif @@ -985,7 +985,7 @@ REGISTER_OP_CUDA_KERNEL( matmul_grad, ops::MatMulGradKernel, ops::MatMulGradKernel, - ops::MatMulGradKernel); + ops::MatMulGradKernel); REGISTER_OP_CUDA_KERNEL(matmul_grad_grad, ops::MatMulDoubleGradKernel, ops::MatMulDoubleGradKernel); diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc index de2aa41d971df..095a90737f9ad 100644 --- a/paddle/fluid/operators/matmul_op_xpu.cc +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/xpu_api_wrapper.h" +#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h" namespace paddle { namespace operators { @@ -156,12 +156,13 @@ REGISTER_OP_XPU_KERNEL( matmul, ops::MatMulXPUKernel, ops::MatMulXPUKernel, - ops::MatMulXPUKernel); + ops::MatMulXPUKernel); REGISTER_OP_XPU_KERNEL( matmul_grad, ops::MatMulGradXPUKernel, ops::MatMulGradXPUKernel, ops::MatMulGradXPUKernel); + phi::dtype::float16>); #endif diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc index 7233e437e147a..9c1087d42b2b3 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.cc +++ b/paddle/fluid/operators/memcpy_d2h_op.cc @@ -67,10 +67,10 @@ class MemcpyD2HKernel { if (x == nullptr) { return; } - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of memcpy_d2h_op is not found.")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of memcpy_d2h_op is not found.")); auto *out = ctx.OutputVar("Out"); // Get dev_ctx from ExecutionContext, it's D2H stream auto &dev_ctx = ctx.device_context(); @@ -136,13 +136,13 @@ REGISTER_OP_IPU_KERNEL_FUNCTOR(memcpy_d2h, ops::MemcpyD2HKernel, bool, ops::MemcpyD2HKernel, - paddle::platform::bfloat16, + phi::dtype::bfloat16, ops::MemcpyD2HKernel, paddle::platform::complex, ops::MemcpyD2HKernel, paddle::platform::complex, ops::MemcpyD2HKernel, - plat::float16, + phi::dtype::float16, ops::MemcpyD2HKernel, int16_t, ops::MemcpyD2HKernel); diff --git a/paddle/fluid/operators/memcpy_d2h_op.h b/paddle/fluid/operators/memcpy_d2h_op.h index 4f948e4482f8a..2a69ae556adfd 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.h +++ b/paddle/fluid/operators/memcpy_d2h_op.h @@ -53,7 +53,7 @@ class MemcpyD2HFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -62,7 +62,7 @@ class MemcpyD2HFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } @@ -76,7 +76,7 @@ class MemcpyD2HFunctor { } else if (dst_place_type_ == 0) { framework::TensorCopy(src, platform::CPUPlace(), dev_ctx_, &dst); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } // NOTE(Aurelius84): host <-> device memory copies of a memory block of 64 diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index 457b629268659..85cd21831c9b1 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -68,10 +68,10 @@ class MemcpyH2DKernel { if (x == nullptr) { return; } - PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of memcpy_d2h_op is not found.")); + PADDLE_ENFORCE_EQ( + ctx.HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of memcpy_d2h_op is not found.")); auto *out = ctx.OutputVar("Out"); // Get dev_ctx from ExecutionContext, it's H2D stream auto &dev_ctx = ctx.device_context(); @@ -137,13 +137,13 @@ REGISTER_OP_IPU_KERNEL_FUNCTOR(memcpy_h2d, ops::MemcpyH2DKernel, bool, ops::MemcpyH2DKernel, - paddle::platform::bfloat16, + phi::dtype::bfloat16, ops::MemcpyH2DKernel, paddle::platform::complex, ops::MemcpyH2DKernel, paddle::platform::complex, ops::MemcpyH2DKernel, - plat::float16, + phi::dtype::float16, ops::MemcpyH2DKernel, int16_t, ops::MemcpyH2DKernel); diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h index 5f480461d77cd..6b83ab1541976 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.h +++ b/paddle/fluid/operators/memcpy_h2d_op.h @@ -53,7 +53,7 @@ class MemcpyH2DFunctor { framework::TensorCopy( lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } out_tensor.set_lod(lod_tensor.lod()); @@ -61,7 +61,7 @@ class MemcpyH2DFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -70,7 +70,7 @@ class MemcpyH2DFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index bb3e29df16d53..8031e318f3af9 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -87,7 +87,7 @@ class MemcpyKernel { PADDLE_ENFORCE_EQ( ctx.HasOutput("Out"), true, - platform::errors::NotFound("Output(Out) of memcpy_op is not found.")); + phi::errors::NotFound("Output(Out) of memcpy_op is not found.")); auto *out = ctx.OutputVar("Out"); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(ctx.GetPlace()); diff --git a/paddle/fluid/operators/memcpy_op.h b/paddle/fluid/operators/memcpy_op.h index bfdd43eaaa519..81432dcb30f6b 100644 --- a/paddle/fluid/operators/memcpy_op.h +++ b/paddle/fluid/operators/memcpy_op.h @@ -66,7 +66,7 @@ class MemcpyFunctor { lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); #endif } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } out_tensor.set_lod(lod_tensor.lod()); @@ -74,7 +74,7 @@ class MemcpyFunctor { void operator()(const phi::SelectedRows &rows) const { // (JZ-LIANG) to support SelectedRows - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Memcpy for SelectedRows is NOT support yet.")); } @@ -83,7 +83,7 @@ class MemcpyFunctor { PADDLE_ENFORCE_EQ( true, false, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Not support type for Memcpy op with type %s", typeid(T).name())); } diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc deleted file mode 100644 index 3ed27460e16b6..0000000000000 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ /dev/null @@ -1,276 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/lod_utils.h" - -namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -class Scope; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -using LoD = framework::LoD; - -class MergeLoDTensorOp : public framework::OperatorBase { - public: - MergeLoDTensorOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - protected: - void RunBase(const framework::Scope &scope, - const platform::Place &dev_place) const { - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(dev_place); - - auto &x = scope.FindVar(Input("X"))->Get(); - auto &mask = scope.FindVar(Input("Mask"))->Get(); - auto &in_true = scope.FindVar(Input("InTrue"))->Get(); - auto &in_false = scope.FindVar(Input("InFalse"))->Get(); - auto *out = scope.FindVar(Output("Out"))->GetMutable(); - auto level = static_cast(Attr("level")); - - PADDLE_ENFORCE_EQ( - in_true.numel() || in_false.numel(), - true, - platform::errors::InvalidArgument( - "Input(InTrue) or Input(InFalse) should be initialized.")); - - auto &mask_dim = mask.dims(); - std::unique_ptr cpu_mask{new phi::DenseTensor()}; - if (platform::is_cpu_place(mask.place())) { - cpu_mask->ShareDataWith(mask); - } else if (platform::is_gpu_place(mask.place())) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - framework::TensorCopy( - mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); -#else - PADDLE_THROW(platform::errors::PreconditionNotMet( - "Not supported GPU, Please recompile or reinstall paddle with CUDA " - "support.")); -#endif - } - auto *mask_data = cpu_mask->data(); - - platform::Place place = dev_place; - int64_t batch_size = in_true.dims()[0] + in_false.dims()[0]; - auto data_type = in_true.IsInitialized() ? in_true.type() : in_false.type(); - int rank = 0; - framework::DDim in_dims; - if (in_true.IsInitialized()) { - rank = in_true.dims().size(); - in_dims = common::slice_ddim(in_true.dims(), 1, rank); - } else { - rank = in_false.dims().size(); - in_dims = common::slice_ddim(in_false.dims(), 1, rank); - } - - auto in_dim_vec = common::vectorize(in_dims); - in_dim_vec.insert(in_dim_vec.begin(), batch_size); - - framework::DDim out_dims = common::make_ddim(in_dim_vec); - out->Resize(out_dims); - - out->mutable_data(place, data_type); - - auto *out_lod = out->mutable_lod(); - out_lod->clear(); - size_t out_offset = 0; - - // Build phi::DenseTensor `out` - - size_t in_true_idx = 0; - size_t in_false_idx = 0; - for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { - const phi::DenseTensor *input = nullptr; - size_t *in_idx = nullptr; - if (static_cast(mask_data[i]) == 0) { - input = &in_false; - in_idx = &in_false_idx; - } else { - input = &in_true; - in_idx = &in_true_idx; - } - auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( - input->lod(), *in_idx, (*in_idx) + 1, 0); - auto &lod_length = lod_and_offset.first; - - phi::AppendLoD(out_lod, lod_length); - - size_t start_offset = lod_and_offset.second.first; - size_t end_offset = lod_and_offset.second.second; - - PADDLE_ENFORCE_GE(end_offset, - start_offset, - platform::errors::InvalidArgument( - "The end offset less than start offset, end offset " - "is %d, start offset is %d.", - end_offset, - start_offset)); - size_t len = end_offset - start_offset; - if (len == 0) { - continue; - } - auto slice = out->Slice(out_offset, out_offset + len); // NOLINT - framework::TensorCopy(input->Slice(start_offset, end_offset), // NOLINT - place, - dev_ctx, - &slice); - out_offset += len; - (*in_idx) += 1; - } - - for (size_t i = 0; i < level; i++) { - out_lod->insert(out_lod->begin(), x.lod()[i]); - } - } - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - RunBase(scope, dev_place); - } -}; - -class MergeLoDTensorInferOp : public MergeLoDTensorOp { - public: - MergeLoDTensorInferOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : MergeLoDTensorOp(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - RunBase(scope, dev_place); - framework::Variable *in_true_var = scope.FindVar(Input("InTrue")); - framework::Variable *in_false_var = scope.FindVar(Input("InFalse")); - in_true_var->Clear(); - in_false_var->Clear(); - in_true_var->GetMutable(); - in_false_var->GetMutable(); - } -}; - -class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "The input phi::DenseTensor, contains complete lod information to " - "construct the output"); - AddInput("Mask", "A bool column vector which mask the input"); - AddInput("InTrue", "The True branch to be merged"); - AddInput("InFalse", "The False branch to be merged"); - AddOutput("Out", "The merged output phi::DenseTensor"); - AddAttr("level", "(int) the specific lod level to rank.") - .SetDefault(0) - .EqualGreaterThan(0); - AddComment( - R"DOC( - Merge True and False branches of phi::DenseTensor into a single Output, - with a mask at certain lod level. X is used to obtain complete - lod information. Please refer to SplitLoDTensorOp.)DOC"); - } -}; - -class MergeLoDTensorInferShape : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *context) const override { - OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "merge_lod_tensor"); - OP_INOUT_CHECK( - context->HasInput("Mask"), "Input", "Mask", "merge_lod_tensor"); - OP_INOUT_CHECK( - context->HasInput("InTrue"), "Input", "InTrue", "merge_lod_tensor"); - OP_INOUT_CHECK( - context->HasInput("InFalse"), "Input", "InFalse", "merge_lod_tensor"); - OP_INOUT_CHECK( - context->HasOutput("Out"), "Output", "Out", "merge_lod_tensor"); - auto mask_dim = context->GetInputDim("Mask"); - PADDLE_ENFORCE_EQ(mask_dim.size(), - 2, - platform::errors::InvalidArgument( - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond is a 2-D tensor and " - "the second dim size of cond is 1. " - "But now the cond's shape is [%s].\n", - mask_dim)); - if (context->IsRuntime() || mask_dim[1] > 0) { - PADDLE_ENFORCE_EQ(mask_dim[1], - 1, - platform::errors::InvalidArgument( - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond is a 2-D tensor " - "and the second dim size of cond is 1. " - "But now the cond's shape is [%s].\n", - mask_dim)); - } - - context->SetOutputDim("Out", context->GetInputDim("InTrue")); - } -}; - -template -class MergeLoDTensorGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr grad_op) const override { - grad_op->SetType("split_lod_tensor"); - grad_op->SetInput("X", this->OutputGrad("Out")); - grad_op->SetInput("Mask", this->Input("Mask")); - grad_op->SetOutput("OutTrue", this->InputGrad("InTrue")); - grad_op->SetOutput("OutFalse", this->InputGrad("InFalse")); - grad_op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(merge_lod_tensor, - ops::MergeLoDTensorOp, - ops::MergeLoDTensorOpProtoMaker, - ops::MergeLoDTensorInferShape, - ops::MergeLoDTensorGradMaker, - ops::MergeLoDTensorGradMaker); -REGISTER_OPERATOR( - merge_lod_tensor_infer, - ops::MergeLoDTensorInferOp, - ops::MergeLoDTensorOpProtoMaker, - ops::MergeLoDTensorInferShape, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/metrics/CMakeLists.txt b/paddle/fluid/operators/metrics/CMakeLists.txt deleted file mode 100644 index b968dbf288ee2..0000000000000 --- a/paddle/fluid/operators/metrics/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(operators) -if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/metrics. - include(unity_build_rule.cmake) -endif() -register_operators() diff --git a/paddle/fluid/operators/metrics/precision_recall_op.cc b/paddle/fluid/operators/metrics/precision_recall_op.cc deleted file mode 100644 index 63385cb59171f..0000000000000 --- a/paddle/fluid/operators/metrics/precision_recall_op.cc +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/metrics/precision_recall_op.h" - -namespace paddle { -namespace operators { - -class PrecisionRecallOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("MaxProbs"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Input(MaxProbs) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Indices"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Input(Indices) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Labels"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Input(Labels) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("BatchMetrics"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Output(BatchMetrics) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("AccumMetrics"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Output(AccumMetrics) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("AccumStatesInfo"), - true, - platform::errors::NotFound( - "PrecisionRecallOp Output(AccumStatesInfo) should not be null.")); - - int64_t cls_num = - static_cast(ctx->Attrs().Get("class_number")); - auto max_probs_dims = ctx->GetInputDim("MaxProbs"); - auto labels_dims = ctx->GetInputDim("Labels"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(max_probs_dims[1], - 1, - platform::errors::InvalidArgument( - "Each instance of PrecisionRecallOp " - "Input(MaxProbs) contains one max probability, " - "the shape of Input(MaxProbs) should be " - "[batch_size, 1], the 2nd dimension of " - "Input(MaxProbs) should be 1. But the 2nd " - "dimension we received is %d", - max_probs_dims[1])); - PADDLE_ENFORCE_EQ( - ctx->GetInputDim("Indices"), - max_probs_dims, - platform::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(Indices) should be same " - "with " - "max_probs_dims. But received the shape of Input(Indices) is " - "[%d, %d], max_probs_dims is [%d, %d]", - ctx->GetInputDim("Indices")[0], - ctx->GetInputDim("Indices")[1], - max_probs_dims[0], - max_probs_dims[1])); - PADDLE_ENFORCE_EQ( - max_probs_dims[0], - labels_dims[0], - platform::errors::InvalidArgument( - "The 1st dimension of PrecisionRecallOp Input(MaxProbs) and " - "Input(Labels) both should be batch_size" - "But the 1st dimension we received max_probs_dims[0] = %d, " - "labels_dims[0] = %d", - max_probs_dims[0], - labels_dims[0])); - PADDLE_ENFORCE_EQ(labels_dims[1], - 1, - platform::errors::InvalidArgument( - "The 2nd dimension of PrecisionRecallOp " - "Input(Labels) contains instance label and " - "the shape should be equal to 1. But the 2nd " - "dimension we received is %d", - labels_dims[1])); - } - if (ctx->HasInput("Weights")) { - auto weights_dims = ctx->GetInputDim("Weights"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - weights_dims, - common::make_ddim({max_probs_dims[0], 1}), - platform::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(Weights) should be " - "[batch_size, 1]. But the shape we received is [%d, %d]", - weights_dims[0], - weights_dims[1])); - } - } - if (ctx->HasInput("StatesInfo")) { - auto states_dims = ctx->GetInputDim("StatesInfo"); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - states_dims, - common::make_ddim({cls_num, 4}), - platform::errors::InvalidArgument( - "The shape of PrecisionRecallOp Input(StatesInfo) should be " - "[class_number, 4]. But the shape we received is [%d, %d]", - states_dims[0], - states_dims[1])); - } - } - - // Layouts of BatchMetrics and AccumMetrics both are: - // [ - // macro average precision, macro average recall, macro average F1 score, - // micro average precision, micro average recall, micro average F1 score - // ] - ctx->SetOutputDim("BatchMetrics", {6}); - ctx->SetOutputDim("AccumMetrics", {6}); - // Shape of AccumStatesInfo is [class_number, 4] - // The layout of each row is: - // [ TP, FP, TN, FN ] - ctx->SetOutputDim("AccumStatesInfo", {cls_num, 4}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "MaxProbs"), - ctx.GetPlace()); - } -}; - -class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("MaxProbs", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each row contains the max probability " - "of an instance which computed by the previous top_k (k=1) " - "operator."); - AddInput("Indices", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each row contains the corresponding " - "index which computed by the previous top_k (k=1) operator."); - AddInput("Labels", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. Each element is a label and the " - "value should be in [0, class_number - 1]."); - AddInput("Weights", - "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " - "where N is the batch size. This input is optional. If provided, " - "weight of instance would be considered when computing metrics.") - .AsDispensable(); - AddInput("StatesInfo", - "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " - "where D is the number of classes. This input is optional. If " - "provided, current state will be accumulated to this state and " - "the accumulation state will be the output state.") - .AsDispensable(); - AddOutput("BatchMetrics", - "(Tensor, default Tensor) A 1-D tensor with shape {6}. " - "This output tensor contains metrics for current batch data. " - "The layout is [macro average precision, macro average recall, " - "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]."); - AddOutput("AccumMetrics", - "(Tensor, default Tensor) A 1-D tensor with shape {6}. " - "This output tensor contains metrics for accumulated data. " - "The layout is [macro average precision, macro average recall, " - "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]."); - AddOutput("AccumStatesInfo", - "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " - "where D is equal to class number. This output tensor contains " - "accumulated state variables used to compute metrics. The layout " - "for each class is [true positives, false positives, " - "true negatives, false negatives]."); - AddAttr("class_number", "(int) Number of classes to be evaluated."); - AddComment(R"DOC( -Precision Recall Operator. - -When given Input(Indices) and Input(Labels), this operator can be used -to compute various metrics including: -1. macro average precision -2. macro average recall -3. macro f1 score -4. micro average precision -5. micro average recall -6. micro f1 score - -To compute the above metrics, we need to do statistics for true positives, -false positives and false negatives. Here the count of true negatives is not -necessary, but counting it may provide potential usage and the cost is -trivial, so the operator also provides the count of true negatives. - -We define state as a 2-D tensor with shape [class_number, 4]. Each row of a -state contains statistic variables for corresponding class. Layout of each row -is: TP(true positives), FP(false positives), TN(true negatives), -FN(false negatives). If Input(Weights) is provided, TP, FP, TN, FN will be -calculated by given weight instead of the instance count. - -This operator also supports metrics computing for cross-batch situation. To -achieve this, Input(StatesInfo) should be provided. State of current batch -data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo) -is the accumulation state. - -Output(BatchMetrics) is metrics of current batch data while -Output(AccumStatesInfo) is metrics of accumulation data. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - precision_recall, - ops::PrecisionRecallOp, - ops::PrecisionRecallOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(precision_recall, - CPU, - ALL_LAYOUT, - ops::PrecisionRecallKernel, - float, - double) {} diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h deleted file mode 100644 index 6eef5658c5c00..0000000000000 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -using EigenMatrix = framework::EigenMatrix; - -enum StateVariable { TP = 0, FP, TN, FN }; - -template -class PrecisionRecallKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in0 = ctx.Input("Indices"); - auto* in1 = ctx.Input("Labels"); - auto* in2 = ctx.Input("Weights"); - auto* in3 = ctx.Input("StatesInfo"); - auto* out0 = ctx.Output("BatchMetrics"); - auto* out1 = ctx.Output("AccumMetrics"); - auto* out2 = ctx.Output("AccumStatesInfo"); - - const int* ids_data = in0->data(); - const int* labels_data = in1->data(); - size_t cls_num = static_cast(ctx.Attr("class_number")); - const T* weights_data = in2 ? in2->data() : nullptr; - const T* states_data = in3 ? in3->data() : nullptr; - double* batch_metrics_data = out0->mutable_data(ctx.GetPlace()); - double* accum_metrics_data = out1->mutable_data(ctx.GetPlace()); - out2->mutable_data(ctx.GetPlace()); - auto accum_states = EigenMatrix::From(*out2); - accum_states.setZero(); - T* accum_states_data = out2->data(); - - size_t sample_num = in0->dims()[0]; - size_t state_var_num = 4; // TP FP TN FN - - // get states info for current batch - for (size_t i = 0; i < sample_num; ++i) { - size_t idx = ids_data[i]; - size_t label = labels_data[i]; - - PADDLE_ENFORCE_GE( - idx, - 0, - platform::errors::InvalidArgument( - "Class index of each instance should be " - "greater than or equal to 0, But the index we received is %d", - idx)); - PADDLE_ENFORCE_LT(idx, - cls_num, - platform::errors::InvalidArgument( - "Class index of each instance should be less than " - "cls_num = %d, But the index we received is %d", - cls_num, - idx)); - - PADDLE_ENFORCE_GE(label, - 0, - platform::errors::InvalidArgument( - "Label of each instance should be greater than or " - "equal to 0, But the label we received is %d", - label)); - PADDLE_ENFORCE_LT(label, - cls_num, - platform::errors::InvalidArgument( - "Label of each instance should be less than " - "cls_num = %d, But the label we received is %d", - cls_num, - label)); - - T w = weights_data ? weights_data[i] : 1.0; - if (idx == label) { - accum_states_data[idx * state_var_num + TP] += w; - for (size_t j = 0; j < cls_num; ++j) { - accum_states_data[j * state_var_num + TN] += w; - } - accum_states_data[idx * state_var_num + TN] -= w; - } else { - accum_states_data[label * state_var_num + FN] += w; - accum_states_data[idx * state_var_num + FP] += w; - for (size_t j = 0; j < cls_num; ++j) { - accum_states_data[j * state_var_num + TN] += w; - } - accum_states_data[idx * state_var_num + TN] -= w; - accum_states_data[label * state_var_num + TN] -= w; - } - } - - ComputeMetrics( - accum_states_data, batch_metrics_data, state_var_num, cls_num); - - if (states_data) { - for (size_t i = 0; i < cls_num; ++i) { - for (size_t j = 0; j < state_var_num; ++j) { - size_t idx = i * state_var_num + j; - accum_states_data[idx] += states_data[idx]; - } - } - } - - ComputeMetrics( - accum_states_data, accum_metrics_data, state_var_num, cls_num); - } - - // expose to be reused - static inline T CalcPrecision(T tp_count, T fp_count) { - if (tp_count > 0.0 || fp_count > 0.0) { - return tp_count / (tp_count + fp_count); - } - return 1.0; - } - - static inline T CalcRecall(T tp_count, T fn_count) { - if (tp_count > 0.0 || fn_count > 0.0) { - return tp_count / (tp_count + fn_count); - } - return 1.0; - } - - static inline T CalcF1Score(T precision, T recall) { - if (precision > 0.0 || recall > 0.0) { - return 2 * precision * recall / (precision + recall); - } - return 0.0; - } - - protected: - void ComputeMetrics(const T* states_data, - double* metrics_data, - size_t state_var_num, - size_t cls_num) const { - T total_tp_count = 0; - T total_fp_count = 0; - T total_fn_count = 0; - T macro_avg_precision = 0.0; - T macro_avg_recall = 0.0; - - for (size_t i = 0; i < cls_num; ++i) { - T tp_count = states_data[i * state_var_num + TP]; - T fp_count = states_data[i * state_var_num + FP]; - T fn_count = states_data[i * state_var_num + FN]; - total_tp_count += tp_count; - total_fp_count += fp_count; - total_fn_count += fn_count; - macro_avg_precision += CalcPrecision(tp_count, fp_count); - macro_avg_recall += CalcRecall(tp_count, fn_count); - } - macro_avg_precision /= cls_num; - macro_avg_recall /= cls_num; - T macro_f1_score = CalcF1Score(macro_avg_precision, macro_avg_recall); - - T micro_avg_precision = CalcPrecision(total_tp_count, total_fp_count); - T micro_avg_recall = CalcRecall(total_tp_count, total_fn_count); - T micro_f1_score = CalcF1Score(micro_avg_precision, micro_avg_recall); - - // fill metrics data - metrics_data[0] = macro_avg_precision; - metrics_data[1] = macro_avg_recall; - metrics_data[2] = macro_f1_score; - metrics_data[3] = micro_avg_precision; - metrics_data[4] = micro_avg_recall; - metrics_data[5] = micro_f1_score; - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/metrics/unity_build_rule.cmake b/paddle/fluid/operators/metrics/unity_build_rule.cmake deleted file mode 100644 index 58acbc3b1e62f..0000000000000 --- a/paddle/fluid/operators/metrics/unity_build_rule.cmake +++ /dev/null @@ -1,8 +0,0 @@ -# This file records the Unity Build compilation rules. -# The source files in a `register_unity_group` called are compiled in a unity -# file. -# Generally, the combination rules in this file do not need to be modified. -# If there are some redefined error in compiling with the source file which -# in combination rule, you can remove the source file from the following rules. -register_unity_group(cc accuracy_op.cc auc_op.cc precision_recall_op.cc) -register_unity_group(cu accuracy_op.cu auc_op.cu) diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc deleted file mode 100644 index 64bc176d97149..0000000000000 --- a/paddle/fluid/operators/minus_op.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/minus_op.h" - -#include -#include -#include -#include - -namespace paddle { -namespace operators { - -class MinusOp : public framework::OperatorWithKernel { - public: - MinusOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::NotFound("Input(X) of MinusOp is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Y"), - true, - platform::errors::NotFound("Input(Y) of MinusOp is not found.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - platform::errors::NotFound("Output(Out) of MinusOp is not found.")); - - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); - - if (ctx->IsRuntime() || - (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { - PADDLE_ENFORCE_EQ( - x_dims, - y_dims, - platform::errors::InvalidArgument( - "Minus operator must take two tensor with same dim, but received " - "input X dim is:[%s], Y dim is:[%s]", - x_dims, - y_dims)); - } - ctx->SetOutputDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } -}; - -class MinusOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The left tensor of minus operator."); - AddInput("Y", "The right tensor of minus operator."); - AddOutput("Out", "The output tensor of minus operator."); - - AddComment(R"DOC( -Minus Operator. - -Equation: - - $Out = X - Y$ - -Both the input `X` and `Y` can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD information with input `X`. - -)DOC"); - } -}; - -class MinusGradDescMaker : public framework::GradOpDescMakerBase { - public: - using framework::GradOpDescMakerBase::GradOpDescMakerBase; - - std::vector> operator()() const override { - std::vector> ops; - auto x_g = this->InputGrad("X"); - if (!x_g.empty()) { - auto *x_g_op = new framework::OpDesc(); - x_g_op->SetType("scale"); - x_g_op->SetInput("X", this->OutputGrad("Out")); - x_g_op->SetOutput("Out", x_g); - x_g_op->SetAttr("scale", 1.0f); - ops.emplace_back(x_g_op); - } - - auto y_g = this->InputGrad("Y"); - if (!y_g.empty()) { - auto *y_g_op = new framework::OpDesc(); - y_g_op->SetType("scale"); - y_g_op->SetInput("X", this->OutputGrad("Out")); - y_g_op->SetOutput("Out", y_g); - y_g_op->SetAttr("scale", -1.0f); - ops.emplace_back(y_g_op); - } - - return ops; - } -}; - -class MinusGradMaker : public imperative::GradOpBaseMakerBase { - public: - using imperative::GradOpBaseMakerBase::GradOpBaseMakerBase; - - std::shared_ptr operator()() const override { - auto x_g = this->InputGrad("X"); - auto y_g = this->InputGrad("Y"); - - auto node = this->NewGradNode(); - - if (!x_g.empty()) { - imperative::TracedGradOp op(node); - op.SetType("scale"); - op.SetInput("X", this->OutputGrad("Out")); - op.SetOutput("Out", x_g); - op.SetAttr("scale", 1.0f); - op.SetDefaultAttrsMap(DefaultAttrsMap()); - } - - if (!y_g.empty()) { - imperative::TracedGradOp op(node); - op.SetType("scale"); - op.SetInput("X", this->OutputGrad("Out")); - op.SetOutput("Out", y_g); - op.SetAttr("scale", -1.0f); - op.SetDefaultAttrsMap(DefaultAttrsMap()); - } - - return node; - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(minus, - ops::MinusOp, - ops::MinusOpMaker, - ops::MinusGradDescMaker, - ops::MinusGradMaker); -PD_REGISTER_STRUCT_KERNEL(minus, CPU, ALL_LAYOUT, ops::MinusKernel, float) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_STRUCT_KERNEL(minus, GPU, ALL_LAYOUT, ops::MinusKernel, float) {} -#endif diff --git a/paddle/fluid/operators/minus_op.h b/paddle/fluid/operators/minus_op.h deleted file mode 100644 index 8cc18fe0c97ec..0000000000000 --- a/paddle/fluid/operators/minus_op.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" - -namespace paddle { -namespace operators { - -template -class MinusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* left_tensor = context.Input("X"); - auto* right_tensor = context.Input("Y"); - auto* out_tensor = context.Output("Out"); - - out_tensor->mutable_data(context.GetPlace()); - auto& dev = - *context.template device_context().eigen_device(); - EigenSub, T>::Eval( - dev, - framework::EigenVector::Flatten(*out_tensor), - framework::EigenVector::Flatten(*left_tensor), - framework::EigenVector::Flatten(*right_tensor)); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/miopen_rnn_cache.h b/paddle/fluid/operators/miopen_rnn_cache.h index 19255363259b5..2a8b38d38d577 100644 --- a/paddle/fluid/operators/miopen_rnn_cache.h +++ b/paddle/fluid/operators/miopen_rnn_cache.h @@ -92,10 +92,10 @@ struct CudnnRNNCache { const auto numDirections = is_bidirec_ ? 2 : 1; - PADDLE_ENFORCE_EQ(miopen_type, - miopenFloat, - platform::errors::InvalidArgument( - "MIOPEN do not support double datatype.")); + PADDLE_ENFORCE_EQ( + miopen_type, + miopenFloat, + phi::errors::InvalidArgument("MIOPEN do not support double datatype.")); auto miopen_size = sizeof(float); x_desc_ = new miopenTensorDescriptor_t[seq_length_]; @@ -259,7 +259,7 @@ struct CudnnRNNCache { PADDLE_ENFORCE_EQ( weights_size_, miopen_size * weight_numel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The miopen lstm and setting weight size should be same.")); int dim_w[3]; diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc index c6d553865277e..4da376ce97487 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cc +++ b/paddle/fluid/operators/modified_huber_loss_op.cc @@ -33,16 +33,16 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument("Input(input) rank should be 2, " - "but received input rank(%d) != 2", - x_dims.size())); + phi::errors::InvalidArgument("Input(input) rank should be 2, " + "but received input rank(%d) != 2", + x_dims.size())); if (ctx->IsRuntime() || (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { PADDLE_ENFORCE_EQ( x_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(input) and Input(label) should have the same " "shape, but received input shape [%s] != label shape [%s]", x_dims, @@ -52,7 +52,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(x_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(input) should be 1, " "but received second dimension of input (%d) != 1", x_dims[1])); @@ -123,7 +123,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( intermediate_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Intermediate variable which will be reused in " "backward processing should the same as " "the shape of Input(label), but received Intermediate variable " @@ -134,7 +134,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( out_grad_dims, y_dims, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of output gradient should be the same as " "the shape of Input(label), but received the output gradient " "shape [%s] != label shape [%s]", diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h index 88cb91d454e72..d0fb4dd40a667 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.h +++ b/paddle/fluid/operators/modified_huber_loss_op.h @@ -32,7 +32,7 @@ struct CheckLabelValue { PADDLE_ENFORCE_EQ( val == static_cast(0) || val == static_cast(1), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(label) value of modified_huber_loss_op expected to be 0 " "or 1, but got %ld. Please check label value.", val)); diff --git a/paddle/fluid/operators/nccl/nccl_op.cc b/paddle/fluid/operators/nccl/nccl_op.cc index c5a1097e2f157..dd3fd52d3b24d 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cc @@ -34,13 +34,13 @@ class NCCLInitOp : public framework::OperatorBase { const platform::Place &place) const override { PADDLE_ENFORCE_NOT_NULL( scope.FindVar(Input(kParallelScopes)), - platform::errors::NotFound("Can not find variable '%s' in the scope.", - kParallelScopes)); + phi::errors::NotFound("Can not find variable '%s' in the scope.", + kParallelScopes)); const auto &name = Output("Communicator"); PADDLE_ENFORCE_NOT_NULL( scope.FindVar(name), - platform::errors::NotFound( - "Output(%s) is needed for ncclInit operator.", name)); + phi::errors::NotFound("Output(%s) is needed for ncclInit operator.", + name)); // A parallel do may not use all the gpus. For example, the batch size is 7 // in the last batch while we have 8 gpu. In this case, parallel_do will // create 7 parallel scopes, so should ncclInitOp create 7 gpu peers @@ -52,7 +52,7 @@ class NCCLInitOp : public framework::OperatorBase { } PADDLE_ENFORCE_EQ(!gpus.empty(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "gpus is empty, NCCL must init with gpus")); platform::Communicator *comm = @@ -104,11 +104,10 @@ class NCCLAllReduceOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLAllReduce"); std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ( - (reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - platform::errors::InvalidArgument("invalid nccl reduction.")); + PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || + reduction == "ncclMin" || reduction == "ncclMax"), + true, + phi::errors::InvalidArgument("invalid nccl reduction.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); @@ -150,11 +149,10 @@ class NCCLReduceOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLReduce"); std::string reduction = ctx->Attrs().Get("reduction"); - PADDLE_ENFORCE_EQ( - (reduction == "ncclSum" || reduction == "ncclProd" || - reduction == "ncclMin" || reduction == "ncclMax"), - true, - platform::errors::InvalidArgument("invalid nccl reduction.")); + PADDLE_ENFORCE_EQ((reduction == "ncclSum" || reduction == "ncclProd" || + reduction == "ncclMin" || reduction == "ncclMax"), + true, + phi::errors::InvalidArgument("invalid nccl reduction.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); @@ -201,10 +199,9 @@ class NCCLBcastOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NCCLBcast"); int root = ctx->Attrs().Get("root"); - PADDLE_ENFORCE_EQ( - root != platform::kInvalidGPUId, - true, - platform::errors::InvalidArgument("Bcast root must be set.")); + PADDLE_ENFORCE_EQ(root != platform::kInvalidGPUId, + true, + phi::errors::InvalidArgument("Bcast root must be set.")); auto x_dims = ctx->GetInputsDim("X"); ctx->SetOutputsDim("Out", x_dims); diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index abb24cc8cae10..f1d6073a37231 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -46,7 +46,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) { auto it = str_to_type.find(reduction); PADDLE_ENFORCE_EQ(it != str_to_type.end(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid nccl reduction. Must be ncclMin | ncclMax | " "ncclProd | ncclSum")); return it->second; @@ -58,7 +58,7 @@ class NCCLAllReduceKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "This kernel only runs on GPU device.")); auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -91,10 +91,10 @@ template class NCCLReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); auto x = ctx.Input("X"); // x0, x1, x2 auto out = ctx.Output("Out"); auto* comm = ctx.Input("Communicator"); @@ -132,10 +132,10 @@ template class NCCLBcastKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); int root = ctx.Attr("root"); auto* comm = ctx.Input("Communicator"); // device id diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 1b622b7571667..ac260615969b4 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -45,7 +45,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], label_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Input) and Input(Label) should be " "equal in runtime. But received: Input(Input)'s shape = [%s] " "with 1st dim = %d, Input(Label)'s shape = [%s] with 1st dim = " @@ -61,7 +61,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Weight")[0], ctx->GetInputDim("Bias")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(Weight) and Input(Bias) " "should be equal. But received: Input(Weight)'s shape = [%s] " "with 1st dim = %d, and Input(Bias)'s shape = [%s] with 1st dim " @@ -78,7 +78,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( num_total_classes, ctx->GetInputDim("Weight")[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of total classes should be equal to the first " "dimension of Input(Weight). But received: Attr(num_total_classes) " "= %d, Input(Weight)'s shape = [%s] with 1st dim = %d.", @@ -89,7 +89,7 @@ class NCEOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( custom_neg_classes.size(), static_cast(num_neg_samples), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of Attr(custom_neg_classes) should be equal " "to the number of negative samples. But received: " "custom_neg_classes.size() = %d, num_neg_samples = %d.", diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 41262dca6e53c..25a970a5fa6da 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -104,7 +104,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistProbs) " "should be equal to the number of total classes. But Received: " "Input(CustomDistProbs).numel() = %d, Attr(num_total_classes) " @@ -114,7 +114,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistAlias) " "should be equal to the number of total classes. But Received: " "Input(CustomDistAlias).numel() = %d, Attr(num_total_classes) " @@ -124,7 +124,7 @@ class NCEKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in " "Input(CustomDistAliasProbs) " "should be equal to the number of total classes. But Received: " @@ -144,7 +144,7 @@ class NCEKernel : public framework::OpKernel { break; } default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unsupported SamplerType. SamplerType should be 0: Uniform, " "1: LogUniform or 2: CustomDist. Received SamplerType: %d", sampler_type)); @@ -180,7 +180,7 @@ class NCEKernel : public framework::OpKernel { for (int x = 0; x < sample_labels->numel(); x++) { PADDLE_ENFORCE_GE(sample_labels_data[x], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ValueError: Every sample label should be " "non-negative. But received: " "Input(SampleLabels)[%d] = %d", @@ -290,7 +290,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistProbs) " "should be equal to the number of total classes. But Received: " "Input(CustomDistProbs).numel() = %d, Attr(num_total_classes) " @@ -300,7 +300,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in Input(CustomDistAlias) " "should be equal to the number of total classes. But Received: " "Input(CustomDistAlias).numel() = %d, Attr(num_total_classes) " @@ -310,7 +310,7 @@ class NCEGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dist_alias_probs->numel(), num_total_classes, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The number of elements in " "Input(CustomDistAliasProbs) " "should be equal to the number of total classes. But Received: " @@ -330,7 +330,7 @@ class NCEGradKernel : public framework::OpKernel { break; } default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Unsupported SamplerType. SamplerType should be 0: Uniform, " "1: LogUniform or 2: CustomDist. Received SamplerType: %d", sampler_type)); @@ -399,7 +399,7 @@ class NCEGradKernel : public framework::OpKernel { auto *table_t = context.Input("Weight"); table_dim = table_t->value().dims(); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The parameter Weight of a NCE_OP " "must be either phi::DenseTensor or SelectedRows")); } diff --git a/paddle/fluid/operators/number_count_op.cc b/paddle/fluid/operators/number_count_op.cc index a67d6455bcf5f..7fb293891d3a5 100644 --- a/paddle/fluid/operators/number_count_op.cc +++ b/paddle/fluid/operators/number_count_op.cc @@ -35,7 +35,7 @@ class NumberCountOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(number_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the number_dtype should be int64")); return phi::KernelKey(number_dtype, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/onednn/interpolate_onednn_op.cc similarity index 97% rename from paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc rename to paddle/fluid/operators/onednn/interpolate_onednn_op.cc index 34e9679b29bb6..eff574b5a577b 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/interpolate_onednn_op.cc @@ -115,9 +115,8 @@ class InterpolateOneDNNKernel : public framework::OpKernel { std::all_of( out_dims.begin(), out_dims.end(), [](int i) { return i > 0; }), 0, - platform::errors::InvalidArgument( - "out_d, out_h, out_w of Op(interpolate) " - "should be greater than 0.")); + phi::errors::InvalidArgument("out_d, out_h, out_w of Op(interpolate) " + "should be greater than 0.")); const std::vector nc_dims = {in_dims[0], in_dims[1]}; out_dims.insert(out_dims.begin(), nc_dims.begin(), nc_dims.end()); diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/onednn/lrn_onednn_op.cc similarity index 94% rename from paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc rename to paddle/fluid/operators/onednn/lrn_onednn_op.cc index 7b22d5d3c6ff0..77d76add0174e 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/lrn_onednn_op.cc @@ -69,7 +69,7 @@ class LRNOneDNNHandler PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "is_test attribute should be set to False in training phase.")); const int n = ctx.Attr("n"); @@ -123,11 +123,11 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE_EQ( is_float_type, true, - platform::errors::PreconditionNotMet("DNNL LRN must use float data.")); - PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), - true, - paddle::platform::errors::PreconditionNotMet( - "Operator DNNL LRN must use CPUPlace")); + phi::errors::PreconditionNotMet("DNNL LRN must use float data.")); + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(ctx.GetPlace()), + true, + phi::errors::PreconditionNotMet("Operator DNNL LRN must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); @@ -169,11 +169,11 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { const bool is_float_type = std::is_same::value; PADDLE_ENFORCE_EQ(is_float_type, true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "DNNL LRN GradOpKernel must use float data.")); PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, - paddle::platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Operator DNNL LRNGrad must use CPUPlace")); auto in_x = ctx.Input("X"); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/onednn/matmul_onednn_op.cc similarity index 97% rename from paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc rename to paddle/fluid/operators/onednn/matmul_onednn_op.cc index 80af1b00b743c..b501cec806069 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/matmul_onednn_op.cc @@ -400,15 +400,15 @@ class MatMulMKLDNNKernel : public paddle::framework::OpKernel { trans_y, out); } else if (is_bfloat16) { - ExecuteMatMulV1(ctx, - onednn_engine, - x, - x_bd_dims, - trans_x, - y, - y_bd_dims, - trans_y, - out); + ExecuteMatMulV1(ctx, + onednn_engine, + x, + x_bd_dims, + trans_x, + y, + y_bd_dims, + trans_y, + out); } else { ExecuteMatMulV1(ctx, onednn_engine, @@ -661,7 +661,7 @@ REGISTER_OP_KERNEL(matmul, MKLDNN, ::phi::CPUPlace, MatMulMKLDNNKernel, - MatMulMKLDNNKernel, + MatMulMKLDNNKernel, MatMulMKLDNNKernel, MatMulMKLDNNKernel); @@ -669,4 +669,4 @@ REGISTER_OP_KERNEL(matmul_grad, MKLDNN, ::phi::CPUPlace, MatMulGradMKLDNNKernel, - MatMulGradMKLDNNKernel); + MatMulGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/onednn/quantize_onednn_op.cc similarity index 96% rename from paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc rename to paddle/fluid/operators/onednn/quantize_onednn_op.cc index 9b1cff1008677..3ad56469c922c 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/quantize_onednn_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { @@ -39,10 +39,10 @@ class QuantOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE(quantization_scale, 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Quantization scale must be different than 0.0f")); PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Quantization shift must be lower or equal to ", "255 and greater or equal to 0, but got %f", quantization_shift)); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/onednn/requantize_onednn_op.cc similarity index 92% rename from paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc rename to paddle/fluid/operators/onednn/requantize_onednn_op.cc index f467a9c57a8ca..2d277625dc34d 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/requantize_onednn_op.cc @@ -47,17 +47,17 @@ class ReQuantOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( scale_in, 0.0f, - platform::errors::InvalidArgument("Scale of input cannot be 0.0")); + phi::errors::InvalidArgument("Scale of input cannot be 0.0")); PADDLE_ENFORCE_NE( scale_out, 0.0f, - platform::errors::InvalidArgument("Scale of output cannot be 0.0")); + phi::errors::InvalidArgument("Scale of output cannot be 0.0")); if (shift_in != 0) { PADDLE_ENFORCE_EQ( input->dtype(), DataType::UINT8, - platform::errors::Unimplemented("Requantize does not support nonzero " - "shift for signed input.")); + phi::errors::Unimplemented("Requantize does not support nonzero " + "shift for signed input.")); } auto& dev_ctx = ctx.template device_context(); @@ -140,4 +140,4 @@ PD_REGISTER_STRUCT_KERNEL(requantize, ops::ReQuantOpKernel, int8_t, uint8_t, - paddle::platform::bfloat16) {} + phi::dtype::bfloat16) {} diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/onednn/reshape_onednn_op.cc similarity index 94% rename from paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc rename to paddle/fluid/operators/onednn/reshape_onednn_op.cc index 8632160b04ae0..7dba03ca6a799 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/reshape_onednn_op.cc @@ -37,7 +37,7 @@ static std::vector extract_shape( PADDLE_ENFORCE_EQ( tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If the element type of 'shape' in ReshapeOp is phi::DenseTensor, " "the element's shape must be [1]. But received the element's shape " "is [%s]", @@ -104,7 +104,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { break; case ReshapeKernelOpName::flatten: default: - PADDLE_THROW(paddle::platform::errors::OutOfRange( + PADDLE_THROW(phi::errors::OutOfRange( "Reshape kernel doesn not support that operator name")); } } @@ -180,7 +180,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( unk_dim_idx, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", common::make_ddim(shape), @@ -190,7 +190,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " @@ -203,7 +203,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", @@ -227,7 +227,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( output_shape[unk_dim_idx] * capacity, -in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' attribute in ReshapeOp is invalid. " "The input tensor X'size must be divisible by known " "capacity of 'shape'. " @@ -245,7 +245,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X'size must be equal to the capacity of " "'shape'. " @@ -319,7 +319,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { InferShapeFlattenGradOp(ctx, x_dims); break; default: - PADDLE_THROW(paddle::platform::errors::OutOfRange( + PADDLE_THROW(phi::errors::OutOfRange( "Reshape grad kernel doesn not support that operator name")); } } @@ -345,7 +345,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -353,7 +353,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); REGISTER_OP_KERNEL( @@ -361,7 +361,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -369,7 +369,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); REGISTER_OP_KERNEL( @@ -377,7 +377,7 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeMKLDNNKernel, - ops::ReshapeMKLDNNKernel); REGISTER_OP_KERNEL( @@ -385,5 +385,5 @@ REGISTER_OP_KERNEL( MKLDNN, phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, - ops::ReshapeGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc similarity index 97% rename from paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc rename to paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc index 6f656c5f1a2a1..19396cbe489ce 100644 --- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc +++ b/paddle/fluid/operators/onednn/shuffle_channel_onednn_op.cc @@ -75,4 +75,4 @@ REGISTER_OP_KERNEL(shuffle_channel, MKLDNN, phi::CPUPlace, ops::ShuffleChannelMKLDNNKernel, - ops::ShuffleChannelMKLDNNKernel); + ops::ShuffleChannelMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/onednn/transpose_onednn_op.cc similarity index 100% rename from paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc rename to paddle/fluid/operators/onednn/transpose_onednn_op.cc diff --git a/paddle/fluid/operators/ops_signature/elementwise_sig.cc b/paddle/fluid/operators/ops_signature/elementwise_sig.cc index b1150268fbad1..82f891bb48a00 100644 --- a/paddle/fluid/operators/ops_signature/elementwise_sig.cc +++ b/paddle/fluid/operators/ops_signature/elementwise_sig.cc @@ -168,7 +168,7 @@ KernelSignature ElementwiseDivGradOpArgumentMapping( KernelSignature ElementwiseDivDoubleGradOpArgumentMapping( const ArgumentMappingContext& ctx UNUSED) { return KernelSignature("divide_double_grad", - {"Y", "Out", "DX", "DDX", "DDY"}, + {"Y", "Out", "Out@GRAD", "DX", "DDX", "DDY"}, {"axis"}, {"Y@GRAD", "DOut", "DDOut"}); } diff --git a/paddle/fluid/operators/ops_signature/pow2_decay_with_linear_warmup_sig.cc b/paddle/fluid/operators/ops_signature/pow2_decay_with_linear_warmup_sig.cc deleted file mode 100644 index cf35b6e998095..0000000000000 --- a/paddle/fluid/operators/ops_signature/pow2_decay_with_linear_warmup_sig.cc +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature Pow2DecayWithLinearWarmupOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("pow2_decay_with_linear_warmup", - {"LearningRate", "Step"}, - {"warmup_steps", "total_steps", "base_lr", "end_lr"}, - {"LearningRateOut", "StepOut"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(pow2_decay_with_linear_warmup, - phi::Pow2DecayWithLinearWarmupOpArgumentMapping); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 6c64c6a1f72ff..23441206a55c1 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -36,14 +36,14 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { "DecayedAdagradOp"); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front())); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->Inputs("Grad").front(), @@ -57,26 +57,26 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(common::product(lr_dims), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ(common::product(lr_dims), - 1, - platform::errors::InvalidArgument( - "LearningRate should have one element")); + PADDLE_ENFORCE_EQ( + common::product(lr_dims), + 1, + phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Grad input of DecayedAdagradOp should have " "the same dimension.")); PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Moment"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Moment input of DecayedAdagradOp should have " "the same dimension.")); diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu index b51d12c003e38..a54aebc3eba5e 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu @@ -60,7 +60,7 @@ static void CheckCommContextHasRingId( const distributed::CommContextManager &comm_context_manager, int ring_id) { PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)), true, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "You choose to use new communication library by " "setting environment " "variable FLAGS_dynamic_static_unified_comm True. " @@ -1773,7 +1773,7 @@ void DistributedFusedLambKernel( comm_context_manager.Get(std::to_string(ring_ids[0]))); PADDLE_ENFORCE_NE(comm_ctx, nullptr, - paddle::platform::errors::Unavailable( + phi::errors::Unavailable( "NCCLCommContext is nullptr, collective op should " "has ring_id attr.")); diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index d8762b8bd719a..4c5b7bb369ad8 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -22,41 +22,41 @@ class DpsgdOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), - true, - platform::errors::NotFound( - "Input(Param) of DpsgdOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), - true, - platform::errors::NotFound( - "Input(Grad) of DpsgdOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Param"), + true, + phi::errors::NotFound("Input(Param) of DpsgdOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Grad"), + true, + phi::errors::NotFound("Input(Grad) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("LearningRate"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(LearningRate) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->GetInputsVarType("Param").front())); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(), framework::proto::VarType::LOD_TENSOR, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input var's type should be phi::DenseTensor, " "but the received is %s", ctx->GetInputsVarType("Grad").front())); PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(ParamOut) of DpsgdOp should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning rate should have 1 dimension. But Received " "LearningRate's dims [%s].", common::product(lr_dims))); @@ -64,7 +64,7 @@ class DpsgdOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Grad input of DpsgdOp should have same dimension. But " "received Para's dim [%s] and Grad's dim [%s].", param_dims, diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h index 0f2980ff368f4..427dc15f74638 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.h +++ b/paddle/fluid/operators/optimizers/dpsgd_op.h @@ -31,7 +31,7 @@ class DpsgdOpKernel : public framework::OpKernel { const auto *param_var = ctx.InputVar("Param"); PADDLE_ENFORCE_EQ(param_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Param").front(), @@ -40,7 +40,7 @@ class DpsgdOpKernel : public framework::OpKernel { const auto *grad_var = ctx.InputVar("Grad"); PADDLE_ENFORCE_EQ(grad_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Grad").front(), @@ -56,12 +56,12 @@ class DpsgdOpKernel : public framework::OpKernel { auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input parameter's number of elements is error, " "expected %zu, but received %zu.")); PADDLE_ENFORCE_EQ(grad->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input gradient's number of elements is error, " "expected %zu, but received %zu.")); diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cc b/paddle/fluid/operators/optimizers/ftrl_op.cc index e6eadadc17b6c..37edf5b8f8aa8 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cc +++ b/paddle/fluid/operators/optimizers/ftrl_op.cc @@ -45,7 +45,7 @@ class FTRLOp : public framework::OperatorWithKernel { auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Two input of FTRL Op's dimension must be same, but " "param_dim is %d, Grad is %d", param_dim, @@ -54,14 +54,14 @@ class FTRLOp : public framework::OperatorWithKernel { auto lr_dim = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(common::product(lr_dim), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(common::product(lr_dim), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning Rate should be a scalar, but got %d", common::product(lr_dim))); diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index d563b84b8d5c6..347dcbafa38d5 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -221,8 +221,8 @@ class FTRLOpKernel : public framework::OpKernel { lin_accum_out->mutable_data(ctx.GetPlace())); for_range(functor); } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Unsupported Variable Type of Grad")); + PADDLE_THROW( + phi::errors::InvalidArgument("Unsupported Variable Type of Grad")); } } }; diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc deleted file mode 100644 index 0c5a9721e279b..0000000000000 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" - -namespace paddle { -namespace operators { - -class Pow2DecayWithLinearWarmupOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - auto dim = common::make_ddim({1}); - ctx->SetOutputDim("LearningRateOut", dim); - ctx->SetOutputDim("StepOut", dim); - } - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto data_type = - OperatorWithKernel::IndicateVarDataType(ctx, "LearningRate"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -class Pow2DecayWithLinearWarmupOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("LearningRate", "(Tensor) The input learning rate Tensor."); - AddInput("Step", "(Tensor) The input global step Tensor."); - AddOutput("LearningRateOut", - "(Tensor) The output learning rate Tensor. Same with " - "Input(LearningRate)."); - AddOutput( - "StepOut", - "(Tensor) The output learning rate Tensor. Same with Input(Step)."); - AddAttr("warmup_steps", "(int64_t) The warmup steps."); - AddAttr( - "total_steps", - "(int64_t) The total steps for changing the learning rate."); - AddAttr("base_lr", - "(float) The final learning rate value after warmup."); - AddAttr("end_lr", - "(float) The final learning rate value after total_steps."); - AddComment(R"DOC( -The Pow2DecayWithLinearWarmup learning rate scheduler. - -When step_num < warmup_steps, lr = base_lr * step_num / warmup_steps - -When warmup_steps <= step_num <= total_steps, - factor = 1 - (step_num - warmup_steps) / (total_steps - warmup_steps) - lr = (base_lr - end_lr) * factor * factor + end_lr - -When step_num > total_steps, lr = end_lr - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(pow2_decay_with_linear_warmup, - ops::Pow2DecayWithLinearWarmupOp, - ops::Pow2DecayWithLinearWarmupOpMaker); diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index 074cc26c994e3..bc842d03a3c44 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -34,7 +34,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Intput(Param) should be equal to the " "Input(Grad) of ProximalGD Op. But received " "Input(Param).dimensions=[%s], " @@ -46,7 +46,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( common::product(lr_dim), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning Rate should be a scalar. But received dimensions:[%s]", lr_dim)); diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index f6d2435590f9e..a489454ff12a9 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -71,7 +71,7 @@ class SGDOpKernel : public framework::OpKernel { const auto* param_var = ctx.InputVar("Param"); PADDLE_ENFORCE_EQ(param_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Var(%s)'s type should be phi::DenseTensor, " "but the received is %s", ctx.InputNames("Param").front(), @@ -93,7 +93,7 @@ class SGDOpKernel : public framework::OpKernel { ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); PADDLE_ENFORCE_EQ(has_master, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); @@ -131,7 +131,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( param, param_out, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param of SgdOp should be equal with ParamOut " "if variable's type is SelectedRows.")); auto* grad = ctx.Input("Grad"); @@ -140,7 +140,7 @@ class SGDOpKernel : public framework::OpKernel { auto out_dims = param_out->dims(); PADDLE_ENFORCE_EQ(in_height, out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Grad's height of SgdOp should be " "equal with ParamOut's dims. But received Grad's " "height [%s] and ParamOut's dims [%s]", @@ -153,7 +153,7 @@ class SGDOpKernel : public framework::OpKernel { int64_t in_row_numel = in_value.numel() / in_rows.size(); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The in_row_numel of SgdOp should be equal with " "param_out's numel / in_height.")); @@ -179,7 +179,7 @@ class SGDOpKernel : public framework::OpKernel { } else { PADDLE_ENFORCE_EQ(false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Grad " "in SgdOp. Excepted LodTensor or " "SelectedRows, But received [%s]", diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h index 66ba8de469fee..ced04109e10bc 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.h +++ b/paddle/fluid/operators/optimizers/sgd_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/kernels/funcs/jit/kernels.h" namespace paddle { @@ -92,7 +92,7 @@ struct sgd_dense_param_kernel -struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; @@ -100,12 +100,12 @@ struct sgd_dense_param_kernel("Param"); auto *param_out = ctx.Output("ParamOut"); const auto *grad = ctx.Input("Grad"); - param_out->mutable_data(ctx.GetPlace()); + param_out->mutable_data(ctx.GetPlace()); - auto p = framework::EigenVector::Flatten(*param); - auto g = framework::EigenVector::Flatten(*grad); - auto o = framework::EigenVector::Flatten(*param_out); - const auto *lr = learning_rate->data(); + auto p = framework::EigenVector::Flatten(*param); + auto g = framework::EigenVector::Flatten(*grad); + auto o = framework::EigenVector::Flatten(*param_out); + const auto *lr = learning_rate->data(); o = p - lr[0] * g; } @@ -113,7 +113,7 @@ struct sgd_dense_param_kernel -struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; @@ -127,15 +127,15 @@ struct sgd_dense_param_kernel(grad_rows.size()); const auto grad_width = grad_value.numel() / grad_val_height; - const auto *grad_data = grad_value.data(); - auto *out_data = param_out->data(); - const auto *lr = learning_rate->data(); + const auto *grad_data = grad_value.data(); + auto *out_data = param_out->data(); + const auto *lr = learning_rate->data(); for (size_t i = 0; i < grad_rows.size(); ++i) { PADDLE_ENFORCE_LT( grad_rows[i], grad_height, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Grad rows index value should be less than grad height." "Got [%s], but expected less than [%s]", grad_rows[i], @@ -170,7 +170,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Parameter in SgdOp. Excepted " "LodTensor or SelectedRows, But received [%s]", paddle::framework::ToTypeName(param_var->Type()))); @@ -188,22 +188,22 @@ class SGDOpKernel : public framework::OpKernel { const auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), sz, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param's numel of SgdOp " "should be equal with ParamOut's numel. " "But received Param's " "numel = [%s], ParamOut's numel = [%s]", param->numel(), sz)); - PADDLE_ENFORCE_EQ(grad->numel(), - sz, - platform::errors::InvalidArgument( - "The input tensor Grad's numel of SgdOp " - "should be equal with ParamOut's numel. " - "But received Grad's " - "numel = [%s], ParamOut's numel = [%s]", - grad->numel(), - sz)); + PADDLE_ENFORCE_EQ( + grad->numel(), + sz, + phi::errors::InvalidArgument("The input tensor Grad's numel of SgdOp " + "should be equal with ParamOut's numel. " + "But received Grad's " + "numel = [%s], ParamOut's numel = [%s]", + grad->numel(), + sz)); dense_param_and_grad_kernel(ctx); } else if (grad_var->IsType()) { @@ -212,7 +212,7 @@ class SGDOpKernel : public framework::OpKernel { // It's better to find a more elegant solution. PADDLE_ENFORCE_EQ(param, param_out, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Param of SgdOp " "should be equal with ParamOut if variable's " "type is SelectedRows. ")); @@ -228,7 +228,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( grad->height(), out_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input tensor Grad's height of SgdOp " "should be equal with ParamOut's dims. But received Grad's " "height [%s] and ParamOut's dims [%s]", @@ -246,7 +246,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( grad_width, param_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The grad_value's numel of SgdOp " "should be equal with param_out's numel. But received " "grad_value's numel [%s] and param_out's numel [%s]", @@ -258,7 +258,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( false, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Unsupported Variable Type of Grad in SgdOp. Excepted " "LodTensor or SelectedRows, But received [%s]", paddle::framework::ToTypeName(grad_var->Type()))); @@ -273,7 +273,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(grad_var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When param is SelectedRows, gradient should also " "be SelectedRows")); const auto ¶m = param_var->Get(); @@ -291,7 +291,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( param_row_width, grad_row_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The param_row in SgdOP should have the same size with grad_row. " "But received param_row's width is [%s], and grad_row's width is " "[%s]", @@ -306,7 +306,7 @@ class SGDOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GE( id_index, static_cast(0), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The id in SgdOp should be >= 0. But received id_index is [%s]", id_index)); for (int64_t j = 0; j < grad_row_width; j++) { diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index c9f9181664e51..7ef426cedad19 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -25,7 +25,7 @@ class SparseMomentumOpInferVarType : public framework::VarTypeInference { auto in_var_type = ctx->GetInputType("Param"); PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::LOD_TENSOR, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support LodTensor, Unexpected Input Type.")); ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS); diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cu b/paddle/fluid/operators/optimizers/sparse_momentum_op.cu index a0df85e1453da..0a98ee4b3e5de 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cu +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cu @@ -14,7 +14,7 @@ #include "paddle/fluid/operators/optimizers/sparse_momentum_op.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -24,4 +24,4 @@ PD_REGISTER_STRUCT_KERNEL(sparse_momentum, ops::SparseMomentumOpKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index 4c47fd2b62178..6f1a9712115af 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -21,9 +21,9 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #ifdef __NVCC__ #include "cub/cub.cuh" @@ -154,7 +154,7 @@ class SparseMomentumOp : public framework::OperatorWithKernel { auto lr_dims = common::product(ctx->GetInputDim("LearningRate")); PADDLE_ENFORCE_EQ(lr_dims != 0 && lr_dims == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Learning_rate should be a scalar. But Received " "LearningRate's dim [%s]", lr_dims)); @@ -163,7 +163,7 @@ class SparseMomentumOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( param_dim, ctx->GetInputDim("Velocity"), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Param and Velocity of SparseMomentumOp should have the same " "dimension. But received Param's dim [%s] and Velocity [%s].", param_dim, @@ -384,8 +384,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( axis == 0 || axis == 1, true, - platform::errors::InvalidArgument("The axis of sparse_momentum_op only " - "support axis=0 or axis=1 now.")); + phi::errors::InvalidArgument("The axis of sparse_momentum_op only " + "support axis=0 or axis=1 now.")); auto learning_rate = ctx.Input("LearningRate"); auto param = ctx.Input("Param"); @@ -400,13 +400,13 @@ class SparseMomentumOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( index->dims()[0], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of sparse_momentum_op should not be empty" "when the index's rank is 1.")); } else if (index->dims().size() == 2) { PADDLE_ENFORCE_EQ(index->dims()[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If the index's rank of sparse_momentum_op is 2," " the second dimension should be 1.")); } @@ -418,7 +418,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); PADDLE_ENFORCE_EQ(has_master, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); @@ -443,16 +443,16 @@ class SparseMomentumOpKernel : public framework::OpKernel { auto param_dims = param->dims(); auto grad_dims = grad->dims(); - PADDLE_ENFORCE_EQ(param_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Param's rank of sparse_momentum_op" - " must be 2 now.")); - PADDLE_ENFORCE_EQ(grad_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Grad's rank of sparse_momentum_op" - " must be 2 now.")); + PADDLE_ENFORCE_EQ( + param_dims.size(), + 2, + phi::errors::InvalidArgument("The Param's rank of sparse_momentum_op" + " must be 2 now.")); + PADDLE_ENFORCE_EQ( + grad_dims.size(), + 2, + phi::errors::InvalidArgument("The Grad's rank of sparse_momentum_op" + " must be 2 now.")); phi::DenseTensor sorted_index, grad_index, sort_value; auto sorted_index_ptr = @@ -511,7 +511,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { grad_index_ptr[i] = vec_tosort[i].second; } } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "sparse_momentum %s is not supported.", ctx.GetPlace())); } diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc deleted file mode 100644 index 6529bbc29fcfe..0000000000000 --- a/paddle/fluid/operators/pad2d_op.cc +++ /dev/null @@ -1,649 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -void Pad2DConstNCHW(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T value, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - out_data[out_h * out_width + out_w] = - (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width) - ? value - : in_data[in_h * in_width + in_w]; - } - } - in_data += in_height * in_width; - out_data += out_height * out_width; - } - } -} - -template -void Pad2DConstNHWC(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T value, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - const int out_index = (out_h * out_width + out_w) * channels; - if (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width) { - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = value; - } - } else { - const int in_index = (in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } - } - } - } - in_data += in_height * in_width * channels; - out_data += out_height * out_width * channels; - } -} - -template -void Pad2DReflectNCHW(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = std::max(in_h, -in_h); // reflect by 0 - in_h = - std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height - in_w = std::max(in_w, -in_w); // reflect by 0 - in_w = - std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width - out_data[out_h * out_width + out_w] = in_data[in_h * in_width + in_w]; - } - } - in_data += in_height * in_width; - out_data += out_height * out_width; - } - } -} - -template -void Pad2DReflectNHWC(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - const int out_index = (out_h * out_width + out_w) * channels; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = std::max(in_h, -in_h); - in_h = std::min(in_h, 2 * in_height - in_h - 2); - in_w = std::max(in_w, -in_w); - in_w = std::min(in_w, 2 * in_width - in_w - 2); - const int in_index = (in_h * in_width + in_w) * channels; - - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } - } - } - in_data += in_height * in_width * channels; - out_data += out_height * out_width * channels; - } -} - -template -void Pad2DEdgeNCHW(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - out_data[out_h * out_width + out_w] = in_data[in_h * in_width + in_w]; - } - } - in_data += in_height * in_width; - out_data += out_height * out_width; - } - } -} - -template -void Pad2DEdgeNHWC(const T* in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - T* out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - const int out_index = (out_h * out_width + out_w) * channels; - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - const int in_index = (in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } - } - } - in_data += in_height * in_width * channels; - out_data += out_height * out_width * channels; - } -} - -template -void Pad2DGradConstNCHW(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - if (!(in_h < 0 || in_w < 0 || in_h >= in_height || - in_w >= in_width)) { - d_in_data[in_h * in_width + in_w] = - d_out_data[out_h * out_width + out_w]; - } - } - } - d_in_data += in_height * in_width; - d_out_data += out_height * out_width; - } - } -} - -template -void Pad2DGradConstNHWC(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - const int out_index = (out_h * out_width + out_w) * channels; - if (!(in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width)) { - const int in_index = (in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] = d_out_data[out_index + c]; - } - } - } - } - d_in_data += in_height * in_width * channels; - d_out_data += out_height * out_width * channels; - } -} - -template -void Pad2DGradReflectNCHW(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = std::max(in_h, -in_h); // reflect over 0 - in_h = std::min(in_h, - 2 * in_height - in_h - 2); // reflect over in_height - in_w = std::max(in_w, -in_w); // reflect over 0 - in_w = - std::min(in_w, 2 * in_width - in_w - 2); // reflect over in_width - d_in_data[in_h * in_width + in_w] += - d_out_data[out_h * out_width + out_w]; - } - } - d_in_data += in_height * in_width; - d_out_data += out_height * out_width; - } - } -} - -template -void Pad2DGradReflectNHWC(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - const int out_index = (out_h * out_width + out_w) * channels; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - in_h = std::max(in_h, -in_h); - in_h = std::min(in_h, 2 * in_height - in_h - 2); - in_w = std::max(in_w, -in_w); - in_w = std::min(in_w, 2 * in_width - in_w - 2); - const int in_index = (in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] += d_out_data[out_index + c]; - } - } - } - d_in_data += in_height * in_width * channels; - d_out_data += out_height * out_width * channels; - } -} - -template -void Pad2DGradEdgeNCHW(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - d_in_data[in_h * in_width + in_w] += - d_out_data[out_h * out_width + out_w]; - } - } - d_in_data += in_height * in_width; - d_out_data += out_height * out_width; - } - } -} - -template -void Pad2DGradEdgeNHWC(T* d_in_data, - const int num, - const int channels, - const int in_height, - const int in_width, - const int out_height, - const int out_width, - const int pad_top, - const int pad_left, - const T* d_out_data) { - for (int n = 0; n < num; ++n) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - const int out_index = (out_h * out_width + out_w) * channels; - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - const int in_index = (in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] += d_out_data[out_index + c]; - } - } - } - d_in_data += in_height * in_width * channels; - d_out_data += out_height * out_width * channels; - } -} - -static inline void GetPaddings(int* paddings, - const framework::ExecutionContext& context) { - auto* paddings_t = context.Input("Paddings"); - if (paddings_t) { - auto paddings_data = paddings_t->data(); - paddings[0] = paddings_data[0]; - paddings[1] = paddings_data[1]; - paddings[2] = paddings_data[2]; - paddings[3] = paddings_data[3]; - } else { - auto pads = context.Attr>("paddings"); - std::copy(pads.begin(), pads.end(), paddings); - } -} -class Pad2dOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad2d"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Pad2d"); - - auto x_dim = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dim.size(), - 4, - platform::errors::InvalidArgument( - "The size of Input(X)'s dimension should be equal to " - "4, but received %d. ", - x_dim.size())); - - std::vector out_dims(x_dim.size()); - auto data_format = ctx->Attrs().Get("data_format"); - out_dims[0] = x_dim[0]; - if (ctx->HasInput("Paddings")) { - auto paddings_dim = ctx->GetInputDim("Paddings"); - PADDLE_ENFORCE_EQ(paddings_dim.size(), - 1, - platform::errors::InvalidArgument( - "Size of Input(Paddings)'s dimension should be " - "equal to 1, but received %d.", - paddings_dim.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(paddings_dim[0], - 4, - platform::errors::InvalidArgument( - "Shape of Input(Paddings) should be equal to " - "[4], but received [%d].", - paddings_dim[0])); - } - out_dims[1] = x_dim[1]; - out_dims[2] = x_dim[2]; - out_dims[3] = x_dim[3]; - } else { - auto paddings = ctx->Attrs().Get>("paddings"); - PADDLE_ENFORCE_EQ( - paddings.size(), - 4, - platform::errors::InvalidArgument( - "Size of paddings should be equal to 4, but received %d.", - static_cast(paddings.size()))); - if (data_format == "NCHW") { - out_dims[1] = x_dim[1]; // channel - out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) - ? x_dim[2] - : (x_dim[2] + paddings[0] + paddings[1]); // height - out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0)) - ? x_dim[3] - : (x_dim[3] + paddings[2] + paddings[3]); // width - } else { // NHWC - out_dims[3] = x_dim[3]; // channel - out_dims[1] = ((!ctx->IsRuntime()) && (x_dim[1] < 0)) - ? x_dim[1] - : (x_dim[1] + paddings[0] + paddings[1]); // height - out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) - ? x_dim[2] - : (x_dim[2] + paddings[2] + paddings[3]); // width - } - } - - ctx->SetOutputDim("Out", common::make_ddim(out_dims)); - ctx->ShareLoD("X", /*->*/ "Out"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); -#ifdef PADDLE_WITH_DNNL - // only constant mode and non-blocked layouts are supported for oneDNN - if (this->CanMKLDNNBeUsed(ctx, input_data_type) && - ctx.Attr("mode") == "constant" && - ctx.Input("X")->mem_desc().get_inner_nblks() == 0) { - return phi::KernelKey(phi::Backend::ONEDNN, - phi::DataLayout::ONEDNN, - phi::TransToPhiDataType(input_data_type)); - } -#endif - return phi::KernelKey(input_data_type, ctx.GetPlace()); - } - - phi::KernelKey GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const override { -#ifdef PADDLE_WITH_DNNL - if ((expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && - (tensor.layout() != phi::DataLayout::ONEDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - return phi::KernelKey(tensor.place(), - common::StringToDataLayout(data_format), - expected_kernel_type.dtype()); - } -#endif - return phi::KernelKey( - tensor.place(), tensor.layout(), expected_kernel_type.dtype()); - } -}; - -class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "The input of pad2d op. " - "The input should be a 4-D tensor with formate NCHW or NHWC."); - AddOutput("Out", - "The output of pad2d op. " - "A tensor with the same shape as X."); - AddInput("Paddings", - "A 1-D tensor to describe the padding rules." - "paddings=[0, 1, 2, 3] means " - "padding 0 row to top, 1 row to bottom, 2 columns to left " - "and 3 columns to right. Size of paddings must be 4.") - .AsDispensable(); - AddAttr>( - "paddings", - "(vector) " - "A list to describe the padding rules." - "paddings=[0, 1, 2, 3] means " - "padding 0 row to top, 1 row to bottom, 2 columns to left " - "and 3 columns to right. Size of paddings must be 4."); - AddAttr("pad_value", - "(float, default 0.0) " - "The value to fill the padded areas in constant mode.") - .SetDefault(0.0f); - AddAttr("mode", - "(float, default constant) " - "Three modes: constant(default), reflect, edge.") - .SetDefault("constant"); - AddAttr( - "data_format", - "(string, default NCHW) Only used in " - "An optional string from: \"NHWC\", \"NCHW\". " - "Defaults to \"NHWC\". Specify the data format of the input data.") - .SetDefault("NCHW"); - AddComment(R"DOC( -Pad2d Operator. -Pad 2-d images according to 'paddings' and 'mode'. -If mode is 'reflect', paddings[0] and paddings[1] must be no greater -than height-1. And the width dimension has the same condition. - -Given that X is a channel of image from input: - -X = [[1, 2, 3], - [4, 5, 6]] - -Case 0: - -paddings = [0, 1, 2, 3], -mode = 'constant' -pad_value = 0 - -Out = [[0, 0, 1, 2, 3, 0, 0, 0] - [0, 0, 4, 5, 6, 0, 0, 0] - [0, 0, 0, 0, 0, 0, 0, 0]] - -Case 1: - -paddings = [0, 1, 2, 1], -mode = 'reflect' - -Out = [[3, 2, 1, 2, 3, 2] - [6, 5, 4, 5, 6, 5] - [3, 2, 1, 2, 3, 2]] - -Case 2: - -paddings = [0, 1, 2, 1], -mode = 'edge' - -Out = [[1, 1, 1, 2, 3, 3] - [4, 4, 4, 5, 6, 6] - [4, 4, 4, 5, 6, 6]] -)DOC"); - } -}; - -class Pad2dOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad2d@Grad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "Pad2d@Grad"); - - auto x_dims = ctx->GetInputDim("X"); - auto x_grad_name = framework::GradVarName("X"); - if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -template -class Pad2dOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr bind) const override { - bind->SetInput("X", this->Input("X")); - if (this->HasInput("Paddings")) { - bind->SetInput("Paddings", this->Input("Paddings")); - } - bind->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - bind->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - bind->SetAttrMap(this->Attrs()); - bind->SetType("pad2d_grad"); - } -}; - -// TODO(zjl): Paddings can also be skipped! -DECLARE_NO_NEED_BUFFER_VARS_INFERER(Pad2dOpGradNoNeedBufferVarsInferer, "X"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(pad2d, - ops::Pad2dOp, - ops::Pad2dOpMaker, - ops::Pad2dOpGradMaker, - ops::Pad2dOpGradMaker); -REGISTER_OPERATOR(pad2d_grad, - ops::Pad2dOpGrad, - ops::Pad2dOpGradNoNeedBufferVarsInferer); diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index 7fca2eea27c45..518da44d1a08e 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -26,26 +26,26 @@ class PartialConcatOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of Partial ConcatOp should not be empty.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of Partial ConcatOp should not be null.")); auto inputs_dims = ctx->GetInputsDim("X"); PADDLE_ENFORCE_EQ(inputs_dims[0].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); const size_t inputs_num = inputs_dims.size(); PADDLE_ENFORCE_GT(inputs_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: Input tensors count should > 0. But " "received inputs' length is 0.")); if (inputs_num == 1) { @@ -57,7 +57,7 @@ class PartialConcatOp : public framework::OperatorWithKernel { for (size_t i = 0; i < inputs_num; ++i) { PADDLE_ENFORCE_EQ(inputs_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "It only supports two dimensions input now.")); if (i == 0) { batch_size = inputs_dims[0][0]; @@ -65,11 +65,11 @@ class PartialConcatOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(inputs_dims[i][0], batch_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch size of all inputs must be same")); PADDLE_ENFORCE_EQ(inputs_dims[i][1], input_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input length of all inputs must be same")); } } @@ -101,10 +101,10 @@ class PartialConcatOp : public framework::OperatorWithKernel { break; } } - PADDLE_ENFORCE_EQ(flag, - 1, - platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ( + flag, + 1, + phi::errors::InvalidArgument("All Inputs of PartialSum OP are Empty!")); return phi::KernelKey(input_data_type, ctx.GetPlace()); } }; @@ -124,7 +124,7 @@ class PartialConcatGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_names.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of arguments in %s[%d] and %s[%d] is not equal.", in_x, in_names.size(), diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index fb746b2944acc..a597cb11f08ff 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_concat_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; @@ -73,13 +73,13 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel { phi::DenseTensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(in_vars[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); auto input_dim = in_vars[0]->dims(); PADDLE_ENFORCE_EQ(input_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); auto in_size = input_dim[1]; @@ -156,7 +156,7 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); // all parameters auto batch_size = ins[0]->dims()[0]; @@ -240,7 +240,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_concat, double, int, int64_t, - plat::float16, + phi::dtype::float16, phi::dtype::complex, phi::dtype::complex) {} PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, @@ -251,6 +251,6 @@ PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, double, int, int64_t, - plat::float16, + phi::dtype::float16, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index fb0d17aa97b84..16dca9c8c8050 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { @@ -28,7 +28,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { PADDLE_ENFORCE_EQ( start_index >= -size && start_index < size, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The start_index is expected to be in range of [%d, %d), but got %d", -size, size, @@ -47,13 +47,13 @@ class PartialConcatKernel : public framework::OpKernel { phi::DenseTensor* out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); auto input_dim = ins[0]->dims(); PADDLE_ENFORCE_EQ(input_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only supports 2-D array with batch size in the 1st " "dimension and data in the 2nd.")); auto in_size = input_dim[1]; @@ -94,7 +94,7 @@ class PartialConcatGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input of partial concat should not be null.")); // all parameters auto batch_size = ins[0]->dims()[0]; diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index 0ac288069f11a..b0c97b4fcc914 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -25,12 +25,12 @@ class PartialSumOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of PartialSumOp should not be empty.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PartialSumOp should not be null.")); auto inputs_dims = ctx->GetInputsDim("X"); @@ -38,7 +38,7 @@ class PartialSumOp : public framework::OperatorWithKernel { const size_t inputs_num = inputs_dims.size(); PADDLE_ENFORCE_GT(inputs_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: Input tensors count should > 0. But " "received inputs' length is 0.")); if (inputs_num == 1) { @@ -55,7 +55,7 @@ class PartialSumOp : public framework::OperatorWithKernel { for (size_t i = 0; i < inputs_num; ++i) { PADDLE_ENFORCE_EQ(inputs_dims[i].size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support two dimensions input now.")); if (i == 0) { batch_size = inputs_dims[0][0]; @@ -63,23 +63,23 @@ class PartialSumOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(inputs_dims[i][0], batch_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The batch size of all inputs must be same")); PADDLE_ENFORCE_EQ(inputs_dims[i][1], input_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input len of all inputs must be same")); } } - PADDLE_ENFORCE_GT(input_len, - start_index, - platform::errors::OutOfRange( - "start_index must be less than input len")); + PADDLE_ENFORCE_GT( + input_len, + start_index, + phi::errors::OutOfRange("start_index must be less than input len")); if (length > 0) { PADDLE_ENFORCE_GE( input_len, start_index + length, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "start_index + length is larger than input length")); } @@ -104,10 +104,10 @@ class PartialSumOp : public framework::OperatorWithKernel { } } - PADDLE_ENFORCE_EQ(flag, - 1, - platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ( + flag, + 1, + phi::errors::InvalidArgument("All Inputs of PartialSum OP are Empty!")); return phi::KernelKey(input_data_type, platform::CPUPlace()); } }; @@ -127,7 +127,7 @@ class PartialSumGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_names.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of arguments in %s[%d] and %s[%d] is not equal.", in_x, in_names.size(), diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index a38ec4c839469..25758cfde4870 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_sum_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace plat = paddle::platform; @@ -80,7 +80,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in_vars[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto place = ctx.GetPlace(); // GPUPlace only now auto start_index = ctx.Attr("start_index"); @@ -156,7 +156,7 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto start_index = ctx.Attr("start_index"); auto length = ctx.Attr("length"); if (length == -1) { diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index 1b88eafae77db..f0b55728efbc6 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -30,7 +30,7 @@ class PartialSumKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto place = ctx.GetPlace(); // CPUPlace only now @@ -68,7 +68,7 @@ class PartialSumGradientOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, - platform::errors::InvalidArgument("The input should not be null.")); + phi::errors::InvalidArgument("The input should not be null.")); auto start_index = ctx.Attr("start_index"); auto length = ctx.Attr("length"); auto batch_size = ins[0]->dims()[0]; diff --git a/paddle/fluid/operators/positive_negative_pair_op.cc b/paddle/fluid/operators/positive_negative_pair_op.cc index 96d8bbaa6f772..2974b38ffb5ba 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.cc +++ b/paddle/fluid/operators/positive_negative_pair_op.cc @@ -49,7 +49,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { ctx->HasInput("AccumulateNegativePair") && ctx->HasInput("AccumulateNeutralPair"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "All optional inputs(AccumulatePositivePair, " "AccumulateNegativePair, AccumulateNeutralPair) of " "PositiveNegativePairOp are required if one of them " @@ -57,21 +57,21 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulatePositivePair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulatePositivePair) should be [1]. Received " "shape of Input(AccumulatePositivePair): [%s].", ctx->GetInputDim("AccumulatePositivePair"))); PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulateNegativePair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulateNegativePair) should be [1]. Received " "shape of Input(AccumulateNegativePair): [%s].", ctx->GetInputDim("AccumulateNegativePair"))); PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulateNeutralPair"), scalar_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of Input(AccumulateNeutralPair) should be [1]. Received " "shape of Input(AccumulateNeutralPair): [%s].", ctx->GetInputDim("AccumulateNeutralPair"))); @@ -82,13 +82,13 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { auto query_dim = ctx->GetInputDim("QueryID"); PADDLE_ENFORCE_EQ(score_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Score should be a 2-D tensor. Received shape of " "Input(Score): [%s].", score_dim)); PADDLE_ENFORCE_EQ(label_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Label should be a 2-D tensor. Received shape of " "Input(Label): [%s].", label_dim)); @@ -98,7 +98,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dim[0], score_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Score) and Input(Label) should have the same " "height (batch size). Received: the shape of Input(Score) is " "[%s], while the shape of Input(Label) is [%s]. The first " @@ -109,7 +109,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( label_dim[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of Label should be 1, i.e. each item should " "have a scalar label. Received shape of Input(Label) is [%s]. " "The second dimension of it is %d, while the expected is %d.", @@ -120,7 +120,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( query_dim, label_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(QueryID) should have the same shape as Input(Label). " "Received: the shape of Input(QueryID) is [%s], " "while the shape of Input(Label) is [%s].", @@ -131,7 +131,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->GetInputDim("Weight"), label_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Weight) should have the same shape as Input(Label). " "Received: the shape of Input(Weight) is [%s] while the shape " "of Input(Label) is [%s].", @@ -144,7 +144,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( column, depth, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(column) should be less than depth(the second " "dimension of Input(Score)). Received Attr(column): %d, while " "depth is %d.", @@ -153,7 +153,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( column, -depth, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(column) should be greater than equal to negative " "depth, i.e. the second dimension of Input(Score). " "Received Attr(column): %d, while negative depth is %d.", diff --git a/paddle/fluid/operators/prim_ops/CMakeLists.txt b/paddle/fluid/operators/prim_ops/CMakeLists.txt deleted file mode 100644 index 7a1278219bb6d..0000000000000 --- a/paddle/fluid/operators/prim_ops/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(operators) -if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/prim_ops. - include(unity_build_rule.cmake) -endif() -register_operators() diff --git a/paddle/fluid/operators/prim_ops/abs_p_op.cc b/paddle/fluid/operators/prim_ops/abs_p_op.cc deleted file mode 100644 index 87b5243d6afe7..0000000000000 --- a/paddle/fluid/operators/prim_ops/abs_p_op.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class AbsPrimOp : public framework::OperatorBase { - public: - AbsPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator abs_p should not be executed directly")); - } -}; - -class AbsPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of abs_p op."); - AddOutput("Y", "(Tensor), The output tensor of abs_p op."); - AddComment(R"DOC(Autograd primitive abs_p operator.)DOC"); - } -}; - -class AbsPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class AbsPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(abs_p, - paddle::operators::AbsPrimOp, - paddle::operators::AbsPrimOpMaker, - paddle::operators::AbsPrimOpShapeInference, - paddle::operators::AbsPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/add_p_op.cc b/paddle/fluid/operators/prim_ops/add_p_op.cc deleted file mode 100644 index 7fbbdf136929c..0000000000000 --- a/paddle/fluid/operators/prim_ops/add_p_op.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class AddPrimOp : public framework::OperatorBase { - public: - AddPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator add_p should not be executed directly")); - } -}; - -class AddPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of add_p op."); - AddInput("Y", "(Tensor), The input tensor of add_p op."); - AddOutput("Z", "(Tensor), The output tensor of add_p op."); - AddComment(R"DOC( -Autograd primitive add_p operator. -)DOC"); - } -}; - -class AddPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class AddPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(add_p, - paddle::operators::AddPrimOp, - paddle::operators::AddPrimOpMaker, - paddle::operators::AddPrimOpShapeInference, - paddle::operators::AddPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc b/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc deleted file mode 100644 index 251cd9bff5400..0000000000000 --- a/paddle/fluid/operators/prim_ops/bernoulli_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class BernoulliPrimOp : public framework::OperatorBase { - public: - BernoulliPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator bernoulli_p should not be executed directly")); - } -}; - -class BernoulliPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Y", "(Tensor), The output tensor of bernoulli_p op."); - AddAttr>( - "shape", "(std::vector) The shape of output tensor."); - AddAttr("dtype", "(int) The dtype of output tensor."); - AddAttr("p", "(float) The probability of bernoulli distribution."); - AddComment(R"DOC( -Autograd primitive bernoulli_p operator. -)DOC"); - } -}; - -class BernoulliPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - auto shape = ctx->Attrs().Get>("shape"); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); - } -}; - -class BernoulliPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto y_name = Output(ctx, "Y")[0]; - auto data_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - SetDataType(ctx, y_name, data_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(bernoulli_p, - paddle::operators::BernoulliPrimOp, - paddle::operators::BernoulliPrimOpMaker, - paddle::operators::BernoulliPrimOpShapeInference, - paddle::operators::BernoulliPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/broadcast_p_op.cc b/paddle/fluid/operators/prim_ops/broadcast_p_op.cc deleted file mode 100644 index d2c391f7a9bc6..0000000000000 --- a/paddle/fluid/operators/prim_ops/broadcast_p_op.cc +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class BroadcastPrimOp : public framework::OperatorBase { - public: - BroadcastPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator broadcast_p should not be executed directly")); - } -}; - -class BroadcastPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of broadcast_p op."); - AddOutput("Y", "(Tensor), The output tensor of broadcast_p op."); - AddAttr>( - "shape", - "(std::vector) Target shape of broadcast_p operator."); - AddComment(R"DOC( -Autograd primitive broadcast_p operator. -)DOC"); - } -}; - -static void CheckShapeValid(const std::vector &x_shape, - const std::vector &target_shape) { - size_t x_rank = x_shape.size(); - size_t target_rank = target_shape.size(); - PADDLE_ENFORCE_GE(target_rank, - x_rank, - platform::errors::InvalidArgument( - "The rank of target shape should be greater than or " - "equal to input tensor's dimensions, " - "but received %d and %d", - target_rank, - x_rank)); - std::vector::const_iterator it = target_shape.begin(); - for (size_t i = 0; i < x_rank; i++, it++) { - if (x_shape[i] != 1) { - it = std::find(it, target_shape.end(), x_shape[i]); - } - PADDLE_ENFORCE_EQ( - it != target_shape.end(), - true, - platform::errors::InvalidArgument( - "Invalid shape, can not broadcast input tensor into target shape," - "the first dismatching shape %d is shape of input tensor at " - "dimension %d", - x_shape[i], - i)); - } -} - -class BroadcastPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto target_shape = ctx->Attrs().Get>("shape"); - CheckShapeValid(x_shape, target_shape); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(target_shape); - } -}; - -class BroadcastPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(broadcast_p, - paddle::operators::BroadcastPrimOp, - paddle::operators::BroadcastPrimOpMaker, - paddle::operators::BroadcastPrimOpShapeInference, - paddle::operators::BroadcastPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/cast_p_op.cc b/paddle/fluid/operators/prim_ops/cast_p_op.cc deleted file mode 100644 index ead6cc53ceea7..0000000000000 --- a/paddle/fluid/operators/prim_ops/cast_p_op.cc +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class CastPrimOp : public framework::OperatorBase { - public: - CastPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator cast_p should not be executed directly")); - } -}; - -class CastPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of cast_p op."); - AddOutput("Y", "(Tensor), The output tensor of cast_p op."); - AddAttr("dtype", "output data type"); - AddComment(R"DOC(Autograd primitive cast_p operator.)DOC"); - } -}; - -class CastPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class CastPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto out_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - ctx->SetOutputDataType("Y", out_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(cast_p, - paddle::operators::CastPrimOp, - paddle::operators::CastPrimOpMaker, - paddle::operators::CastPrimOpShapeInference, - paddle::operators::CastPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/concat_p_op.cc b/paddle/fluid/operators/prim_ops/concat_p_op.cc deleted file mode 100644 index 6b8d6c0a3322a..0000000000000 --- a/paddle/fluid/operators/prim_ops/concat_p_op.cc +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class ConcatPrimOp : public framework::OperatorBase { - public: - ConcatPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator concat_p should not be executed directly")); - } -}; - -class ConcatPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("XS", "(Tensor), The input tensors of concat_p op.") - .AsDuplicable(); - AddOutput("Y", "(Tensor), The output tensor of concat_p op."); - AddAttr("axis", "(int64_t), The axis along which to concat."); - AddComment(R"DOC( -Autograd primitive concat_p operator. -)DOC"); - } -}; - -class ConcatPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - auto x_var_ptrs = ctx->GetInputVarPtrs("XS"); - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - auto axis = ctx->Attrs().Get("axis"); - int64_t cnt_along_axis = 0; - framework::VarDesc *first_x_var = - PADDLE_GET(framework::VarDesc *, x_var_ptrs[0]); - auto first_x_shape = first_x_var->GetShape(); - cnt_along_axis += first_x_shape[axis]; - size_t first_x_rank = first_x_shape.size(); - for (size_t i = 1; i < x_var_ptrs.size(); ++i) { - framework::VarDesc *x_var = - PADDLE_GET(framework::VarDesc *, x_var_ptrs[i]); - auto x_shape = x_var->GetShape(); - cnt_along_axis += x_shape[axis]; - size_t x_rank = x_shape.size(); - PADDLE_ENFORCE_EQ( - x_rank, - first_x_rank, - platform::errors::InvalidArgument("The dimensions of %d input tensor " - "should be same as the dimensions " - "of 1st input tensor's, " - "but get %d and %d", - i + 1, - x_rank, - first_x_rank)); - for (size_t j = 0; j < x_rank; ++j) { - if (j != size_t(axis)) { - PADDLE_ENFORCE_EQ(x_shape[j], - first_x_shape[j], - platform::errors::InvalidArgument( - "The shape of %d input tensor at dimension %d " - "should be same as the 1st input tensor's, " - "but get %d and %d", - i + 1, - j, - x_shape[j], - first_x_shape[j])); - } - } - } - - std::vector y_shape(first_x_shape); - y_shape[axis] = cnt_along_axis; - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(y_shape); - } -}; - -class ConcatPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_names = Input(ctx, "XS"); - auto y_name = Output(ctx, "Y")[0]; - auto first_x_name = x_names[0]; - auto first_x_type = GetType(ctx, first_x_name); - auto first_x_dtype = GetDataType(ctx, first_x_name); - for (size_t i = 1; i < x_names.size(); ++i) { - auto x_name = x_names[i]; - auto x_type = GetType(ctx, x_name); - auto x_dtype = GetDataType(ctx, x_name); - PADDLE_ENFORCE_EQ(x_type, - first_x_type, - platform::errors::InvalidArgument( - "The type of %d input tensor should be same as the " - "first input tensor's, " - "but get %d and %d", - i + 1, - x_type, - first_x_type)); - PADDLE_ENFORCE_EQ(x_dtype, - first_x_dtype, - platform::errors::InvalidArgument( - "The datatype of %d input tensor should be same as " - "the first input tensor's, " - "but get %d and %d", - i + 1, - x_dtype, - first_x_dtype)); - } - SetType(ctx, y_name, GetType(ctx, first_x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, first_x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(concat_p, - paddle::operators::ConcatPrimOp, - paddle::operators::ConcatPrimOpMaker, - paddle::operators::ConcatPrimOpShapeInference, - paddle::operators::ConcatPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/cos_p_op.cc b/paddle/fluid/operators/prim_ops/cos_p_op.cc deleted file mode 100644 index c8acc30ba6107..0000000000000 --- a/paddle/fluid/operators/prim_ops/cos_p_op.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class CosPrimOp : public framework::OperatorBase { - public: - CosPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator cos_p should not be executed directly")); - } -}; - -class CosPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of cos_p op."); - AddOutput("Y", "(Tensor), The output tensor of cos_p op."); - AddComment(R"DOC(Autograd primitive cos_p operator.)DOC"); - } -}; - -class CosPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class CosPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(cos_p, - paddle::operators::CosPrimOp, - paddle::operators::CosPrimOpMaker, - paddle::operators::CosPrimOpShapeInference, - paddle::operators::CosPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/div_p_op.cc b/paddle/fluid/operators/prim_ops/div_p_op.cc deleted file mode 100644 index c046c63b8abad..0000000000000 --- a/paddle/fluid/operators/prim_ops/div_p_op.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class DivPrimOp : public framework::OperatorBase { - public: - DivPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator div_p should not be executed directly")); - } -}; - -class DivPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of div_p op."); - AddInput("Y", "(Tensor), The input tensor of div_p op."); - AddOutput("Z", "(Tensor), The output tensor of div_p op."); - AddComment(R"DOC( -Autograd primitive div_p operator. -)DOC"); - } -}; - -class DivPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class DivPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(div_p, - paddle::operators::DivPrimOp, - paddle::operators::DivPrimOpMaker, - paddle::operators::DivPrimOpShapeInference, - paddle::operators::DivPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/eq_p_op.cc b/paddle/fluid/operators/prim_ops/eq_p_op.cc deleted file mode 100644 index 389fd548677d6..0000000000000 --- a/paddle/fluid/operators/prim_ops/eq_p_op.cc +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class EqPrimOp : public framework::OperatorBase { - public: - EqPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator eq_p should not be executed directly")); - } -}; - -class EqPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of eq_p op."); - AddInput("Y", "(Tensor), The input tensor of eq_p op."); - AddOutput("Z", "(Tensor), The output tensor of eq_p op."); - AddComment(R"DOC( -Autograd primitive eq_p operator. -)DOC"); - } -}; - -class EqPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class EqPrimOpVarTypeInference : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, framework::proto::VarType::BOOL); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(eq_p, - paddle::operators::EqPrimOp, - paddle::operators::EqPrimOpMaker, - paddle::operators::EqPrimOpShapeInference, - paddle::operators::EqPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/erf_p_op.cc b/paddle/fluid/operators/prim_ops/erf_p_op.cc deleted file mode 100644 index 95bbeadfd6798..0000000000000 --- a/paddle/fluid/operators/prim_ops/erf_p_op.cc +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class ErfPrimOp : public framework::OperatorBase { - public: - ErfPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator erf_p should not be executed directly")); - } -}; - -class ErfPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of erf_p op."); - AddOutput("Y", "(Tensor), The output tensor of erf_p op."); - AddComment(R"DOC(Autograd primitive erf_p operator.)DOC"); - } -}; - -class ErfPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class ErfPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(erf_p, - paddle::operators::ErfPrimOp, - paddle::operators::ErfPrimOpMaker, - paddle::operators::ErfPrimOpShapeInference, - paddle::operators::ErfPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/exp_p_op.cc b/paddle/fluid/operators/prim_ops/exp_p_op.cc deleted file mode 100644 index 220ed7672ab25..0000000000000 --- a/paddle/fluid/operators/prim_ops/exp_p_op.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class ExpPrimOp : public framework::OperatorBase { - public: - ExpPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator exp_p should not be executed directly")); - } -}; - -class ExpPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of exp_p op."); - AddOutput("Y", "(Tensor), The output tensor of exp_p op."); - AddComment(R"DOC(Autograd primitive exp_p operator.)DOC"); - } -}; - -class ExpPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class ExpPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(exp_p, - paddle::operators::ExpPrimOp, - paddle::operators::ExpPrimOpMaker, - paddle::operators::ExpPrimOpShapeInference, - paddle::operators::ExpPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/fill_constant_p_op.cc b/paddle/fluid/operators/prim_ops/fill_constant_p_op.cc deleted file mode 100644 index a570ccd1cecba..0000000000000 --- a/paddle/fluid/operators/prim_ops/fill_constant_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class FillConstantPrimOp : public framework::OperatorBase { - public: - FillConstantPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator fill_constant_p should not be executed directly")); - } -}; - -class FillConstantPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Y", "(Tensor), The output tensor of fill_constant_p op."); - AddAttr("value", "(float) The value of output tensor."); - AddAttr>( - "shape", "(std::vector) The shape of output tensor."); - AddAttr("dtype", "(int) The dtype of output tensor."); - AddComment(R"DOC( -Autograd primitive fill_constant_p operator. -)DOC"); - } -}; - -class FillConstantPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - auto shape = ctx->Attrs().Get>("shape"); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); - } -}; - -class FillConstantPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto y_name = Output(ctx, "Y")[0]; - auto data_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - SetDataType(ctx, y_name, data_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(fill_constant_p, - paddle::operators::FillConstantPrimOp, - paddle::operators::FillConstantPrimOpMaker, - paddle::operators::FillConstantPrimOpShapeInference, - paddle::operators::FillConstantPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/gather_p_op.cc b/paddle/fluid/operators/prim_ops/gather_p_op.cc deleted file mode 100644 index 23d8349f22eee..0000000000000 --- a/paddle/fluid/operators/prim_ops/gather_p_op.cc +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class GatherPrimOp : public framework::OperatorBase { - public: - GatherPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator gather_p should not be executed directly")); - } -}; - -class GatherPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of gather_p op."); - AddInput("IndexTensor", - "(Tensor), The index tensor of gather_p op, which is a 1D tensor.") - .AsDispensable(); - AddOutput("Y", "(Tensor), The output tensor of gather_p op."); - AddAttr("axis", "(int64_t), The axis along which to gather."); - AddAttr>( - "index", "(std::vector) The index of gather_p op") - .SetDefault({0}); - AddComment(R"DOC( -Autograd primitive gather_p operator. -)DOC"); - } -}; - -class GatherPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - int64_t num_index = 0; - if (ctx->HasInput("IndexTensor")) { - framework::InferShapeVarPtr index_var_ptr = - ctx->GetInputVarPtrs("IndexTensor")[0]; - framework::VarDesc *index_var = - PADDLE_GET(framework::VarDesc *, index_var_ptr); - auto index_shape = index_var->GetShape(); - PADDLE_ENFORCE_EQ(index_shape.size(), - 1, - platform::errors::InvalidArgument( - "The index tensor should be a 1D tensor," - "but get rank %d", - index_shape.size())); - num_index = index_shape[0]; - } else { - num_index = static_cast( - ctx->Attrs().Get>("index").size()); - } - auto axis = ctx->Attrs().Get("axis"); - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - x_shape[axis] = num_index; - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_shape); - } -}; - -class GatherPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - if (ctx->HasInput("IndexTensor")) { - auto index_name = Input(ctx, "IndexTensor")[0]; - auto index_dtype = GetDataType(ctx, index_name); - PADDLE_ENFORCE_EQ( - index_dtype, - framework::proto::VarType_Type_INT32, - platform::errors::InvalidArgument( - "The datatype of input tensor should be VarType_Type_INT32(%d), " - "but get %d", - framework::proto::VarType_Type_INT32, - index_dtype)); - } - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(gather_p, - paddle::operators::GatherPrimOp, - paddle::operators::GatherPrimOpMaker, - paddle::operators::GatherPrimOpShapeInference, - paddle::operators::GatherPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/ge_p_op.cc b/paddle/fluid/operators/prim_ops/ge_p_op.cc deleted file mode 100644 index 20a6496158611..0000000000000 --- a/paddle/fluid/operators/prim_ops/ge_p_op.cc +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class GePrimOp : public framework::OperatorBase { - public: - GePrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator ge_p should not be executed directly")); - } -}; - -class GePrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of ge_p op."); - AddInput("Y", "(Tensor), The input tensor of ge_p op."); - AddOutput("Z", "(Tensor), The output tensor of ge_p op."); - AddComment(R"DOC( -Autograd primitive ge_p operator. -)DOC"); - } -}; - -class GePrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class GePrimOpVarTypeInference : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, framework::proto::VarType::BOOL); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(ge_p, - paddle::operators::GePrimOp, - paddle::operators::GePrimOpMaker, - paddle::operators::GePrimOpShapeInference, - paddle::operators::GePrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/gt_p_op.cc b/paddle/fluid/operators/prim_ops/gt_p_op.cc deleted file mode 100644 index 01e8c1612cc43..0000000000000 --- a/paddle/fluid/operators/prim_ops/gt_p_op.cc +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class GtPrimOp : public framework::OperatorBase { - public: - GtPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator gt_p should not be executed directly")); - } -}; - -class GtPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of gt_p op."); - AddInput("Y", "(Tensor), The input tensor of gt_p op."); - AddOutput("Z", "(Tensor), The output tensor of gt_p op."); - AddComment(R"DOC( -Autograd primitive gt_p operator. -)DOC"); - } -}; - -class GtPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class GtPrimOpVarTypeInference : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, framework::proto::VarType::BOOL); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(gt_p, - paddle::operators::GtPrimOp, - paddle::operators::GtPrimOpMaker, - paddle::operators::GtPrimOpShapeInference, - paddle::operators::GtPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/log_p_op.cc b/paddle/fluid/operators/prim_ops/log_p_op.cc deleted file mode 100644 index d077510fd5c46..0000000000000 --- a/paddle/fluid/operators/prim_ops/log_p_op.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class LogPrimOp : public framework::OperatorBase { - public: - LogPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator log_p should not be executed directly")); - } -}; - -class LogPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of log_p op."); - AddOutput("Y", "(Tensor), The output tensor of log_p op."); - AddComment(R"DOC( -Autograd primitive log_p operator. -)DOC"); - } -}; - -class LogPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class LogPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(log_p, - paddle::operators::LogPrimOp, - paddle::operators::LogPrimOpMaker, - paddle::operators::LogPrimOpShapeInference, - paddle::operators::LogPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/matmul_p_op.cc b/paddle/fluid/operators/prim_ops/matmul_p_op.cc deleted file mode 100644 index 6a53dda16f71c..0000000000000 --- a/paddle/fluid/operators/prim_ops/matmul_p_op.cc +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class MatmulPrimOp : public framework::OperatorBase { - public: - MatmulPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator matmul_p should not be executed directly")); - } -}; - -class MatmulPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of matmul_p op."); - AddInput("Y", "(Tensor), The input tensor of matmul_p op."); - AddOutput("Z", "(Tensor), The output tensor of matmul_p op."); - AddComment(R"DOC( -Autograd primitive matmul_p operator. -)DOC"); - } -}; - -class MatmulPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The two input tensor's dimension should be equal" - "But received first input tensor's dimension is %d, " - "and another input tensor's dimension is %d", - x_rank, - y_rank)); - - PADDLE_ENFORCE_EQ(x_rank == 2 || x_rank == 3, - true, - platform::errors::InvalidArgument( - "The input tensor's dimension should be 2 or 3" - "But received input tensor's dimension is %d", - x_rank)); - - PADDLE_ENFORCE_EQ( - x_shape[x_rank - 1], - y_shape[y_rank - 2], - platform::errors::InvalidArgument( - "Invalid shape for matmul, the last dimension of first input and " - "the penultimate dimension for the second input should be same." - "But received %d and %d.", - x_shape[x_rank - 1], - y_shape[y_rank - 2])); - if (x_rank == 2) { - std::vector z_shape{x_shape[x_rank - 2], y_shape[y_rank - 1]}; - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(z_shape); - } else { - PADDLE_ENFORCE_EQ(x_shape[0], - y_shape[0], - platform::errors::InvalidArgument( - "Invalid shape for matmul when input tensor's " - "dimension is 3, the first dimension of first " - "input and the second input should be same." - "But received %d and %d.", - x_shape[0], - y_shape[0])); - - std::vector z_shape{ - x_shape[0], x_shape[x_rank - 2], y_shape[y_rank - 1]}; - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(z_shape); - } - } -}; - -class MatmulPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(matmul_p, - paddle::operators::MatmulPrimOp, - paddle::operators::MatmulPrimOpMaker, - paddle::operators::MatmulPrimOpShapeInference, - paddle::operators::MatmulPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/max_p_op.cc b/paddle/fluid/operators/prim_ops/max_p_op.cc deleted file mode 100644 index 782925b748eac..0000000000000 --- a/paddle/fluid/operators/prim_ops/max_p_op.cc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class MaxPrimOp : public framework::OperatorBase { - public: - MaxPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator max_p should not be executed directly")); - } -}; - -class MaxPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of max_p op."); - AddInput("Y", "(Tensor), The input tensor of max_p op."); - AddOutput("Z", "(Tensor), The output tensor of max_p op."); - AddComment(R"DOC( -Autograd primitive max_p operator. -)DOC"); - } -}; - -class MaxPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class MaxPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(max_p, - paddle::operators::MaxPrimOp, - paddle::operators::MaxPrimOpMaker, - paddle::operators::MaxPrimOpShapeInference, - paddle::operators::MaxPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/mul_p_op.cc b/paddle/fluid/operators/prim_ops/mul_p_op.cc deleted file mode 100644 index fd655e887be90..0000000000000 --- a/paddle/fluid/operators/prim_ops/mul_p_op.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class MulPrimOp : public framework::OperatorBase { - public: - MulPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator mul_p should not be executed directly")); - } -}; - -class MulPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of mul_p op."); - AddInput("Y", "(Tensor), The input tensor of mul_p op."); - AddOutput("Z", "(Tensor), The output tensor of mul_p op."); - AddComment(R"DOC( -Autograd primitive mul_p operator. -)DOC"); - } -}; - -class MulPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class MulPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(mul_p, - paddle::operators::MulPrimOp, - paddle::operators::MulPrimOpMaker, - paddle::operators::MulPrimOpShapeInference, - paddle::operators::MulPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/ne_p_op.cc b/paddle/fluid/operators/prim_ops/ne_p_op.cc deleted file mode 100644 index 0d65d1a7e33d9..0000000000000 --- a/paddle/fluid/operators/prim_ops/ne_p_op.cc +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class NePrimOp : public framework::OperatorBase { - public: - NePrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator ne_p should not be executed directly")); - } -}; - -class NePrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of ne_p op."); - AddInput("Y", "(Tensor), The input tensor of ne_p op."); - AddOutput("Z", "(Tensor), The output tensor of ne_p op."); - AddComment(R"DOC( -Autograd primitive ne_p operator. -)DOC"); - } -}; - -class NePrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class NePrimOpVarTypeInference : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, framework::proto::VarType::BOOL); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(ne_p, - paddle::operators::NePrimOp, - paddle::operators::NePrimOpMaker, - paddle::operators::NePrimOpShapeInference, - paddle::operators::NePrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/pow_p_op.cc b/paddle/fluid/operators/prim_ops/pow_p_op.cc deleted file mode 100644 index 50e625a328e58..0000000000000 --- a/paddle/fluid/operators/prim_ops/pow_p_op.cc +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class PowPrimOp : public framework::OperatorBase { - public: - PowPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator pow_p should not be executed directly")); - } -}; - -class PowPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The base of pow_p op."); - AddInput("Y", "(Tensor), The exponents of pow_p op."); - AddOutput("Z", "(Tensor), The output tensor of pow_p op."); - AddComment(R"DOC( -Autograd primitive pow_p operator. -)DOC"); - } -}; - -class PowPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class PowPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(pow_p, - paddle::operators::PowPrimOp, - paddle::operators::PowPrimOpMaker, - paddle::operators::PowPrimOpShapeInference, - paddle::operators::PowPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/reduce_sum_p_op.cc b/paddle/fluid/operators/prim_ops/reduce_sum_p_op.cc deleted file mode 100644 index dbb33a98b108c..0000000000000 --- a/paddle/fluid/operators/prim_ops/reduce_sum_p_op.cc +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class ReduceSumPrimOp : public framework::OperatorBase { - public: - ReduceSumPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator reduce_sum_p should not be executed directly")); - } -}; - -class ReduceSumPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of reduce_sum_p op."); - AddOutput("Y", "(Tensor), The output tensor of reduce_sum_p op."); - AddAttr>( - "axis", - "(std::vector) The axis along which to reduce on. Must be in " - "range [-rank(input), rank(input)]. If `axis[i] < 0`, the axis[i] to " - "reduce is `rank + axis[i]`."); - AddAttr("keepdim", - "(bool, default false) " - "If true, retain the reduced axis with length 1.") - .SetDefault(false); - AddComment(R"DOC( -Autograd primitive reduce_sum_p operator. -)DOC"); - } -}; - -class ReduceSumPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto axis = ctx->Attrs().Get>("axis"); - auto keepdim = ctx->Attrs().Get("keepdim"); - if (keepdim) { - for (auto item : axis) { - x_shape[item] = 1; - } - } else { - const int kDelFlag = -2; - for (auto item : axis) { - x_shape[item] = kDelFlag; - } - x_shape.erase(remove(x_shape.begin(), x_shape.end(), kDelFlag), - x_shape.end()); - } - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_shape); - } -}; - -class ReduceSumPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(reduce_sum_p, - paddle::operators::ReduceSumPrimOp, - paddle::operators::ReduceSumPrimOpMaker, - paddle::operators::ReduceSumPrimOpShapeInference, - paddle::operators::ReduceSumPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/reshape_p_op.cc b/paddle/fluid/operators/prim_ops/reshape_p_op.cc deleted file mode 100644 index 8137dfd629b01..0000000000000 --- a/paddle/fluid/operators/prim_ops/reshape_p_op.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class ReshapePrimOp : public framework::OperatorBase { - public: - ReshapePrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator reshape_p should not be executed directly")); - } -}; - -class ReshapePrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of reshape_p op."); - AddOutput("Y", "(Tensor), The output tensor of reshape_p op."); - AddAttr>( - "shape", "(std::vector) Target shape of reshape_p operator."); - AddComment(R"DOC( -Autograd primitive reshape_p operator. -)DOC"); - } -}; - -static int64_t product(const std::vector &shape) { - int64_t rslt = 1; - for (auto item : shape) { - rslt *= item; - } - return rslt; -} - -class ReshapePrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto shape = ctx->Attrs().Get>("shape"); - PADDLE_ENFORCE_EQ(product(x_shape), - product(shape), - platform::errors::InvalidArgument( - "The input tensor can't be reshaped to target shape, " - "the input tensor has %d elements but target shape " - "contains %d elements", - product(x_shape), - product(shape))); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); - } -}; - -class ReshapePrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(reshape_p, - paddle::operators::ReshapePrimOp, - paddle::operators::ReshapePrimOpMaker, - paddle::operators::ReshapePrimOpShapeInference, - paddle::operators::ReshapePrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/rsqrt_p_op.cc b/paddle/fluid/operators/prim_ops/rsqrt_p_op.cc deleted file mode 100644 index d2401c6d4e40f..0000000000000 --- a/paddle/fluid/operators/prim_ops/rsqrt_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class RsqrtPrimOp : public framework::OperatorBase { - public: - RsqrtPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator rsqrt_p should not be executed directly")); - } -}; - -class RsqrtPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of rsqrt_p op."); - AddOutput("Y", "(Tensor), The output tensor of rsqrt_p op."); - AddComment(R"DOC( -Autograd primitive rsqrt_p operator. -)DOC"); - } -}; - -class RsqrtPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class RsqrtPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(rsqrt_p, - paddle::operators::RsqrtPrimOp, - paddle::operators::RsqrtPrimOpMaker, - paddle::operators::RsqrtPrimOpShapeInference, - paddle::operators::RsqrtPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/scatter_add_p_op.cc b/paddle/fluid/operators/prim_ops/scatter_add_p_op.cc deleted file mode 100644 index 2b116d5224073..0000000000000 --- a/paddle/fluid/operators/prim_ops/scatter_add_p_op.cc +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class ScatterAddPrimOp : public framework::OperatorBase { - public: - ScatterAddPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator scatter_add_p should not be executed directly")); - } -}; - -class ScatterAddPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The tensor to apply scatter rule and add on."); - AddInput("Y", "(Tensor), The source tensor of scatter_add_p op."); - AddInput( - "IndexTensor", - "(Tensor), The index tensor of scatter_add_p op, which is a 1D tensor.") - .AsDispensable(); - AddOutput("Z", "(Tensor), The output tensor of scatter_add_p op."); - AddAttr("axis", - "(int64_t), The axis along which to scatter and add."); - AddAttr>( - "index", "(std::vector) The index of scatter_add_p op") - .SetDefault({0}); - AddComment(R"DOC( -Autograd primitive scatter_add_p operator. -)DOC"); - } -}; - -class ScatterAddPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - int64_t num_index = 0; - if (ctx->HasInput("IndexTensor")) { - framework::InferShapeVarPtr index_var_ptr = - ctx->GetInputVarPtrs("IndexTensor")[0]; - framework::VarDesc *index_var = - PADDLE_GET(framework::VarDesc *, index_var_ptr); - auto index_shape = index_var->GetShape(); - PADDLE_ENFORCE_EQ(index_shape.size(), - 1, - platform::errors::InvalidArgument( - "The index tensor should be a 1D tensor," - "but get rank %d", - index_shape.size())); - num_index = index_shape[0]; - } else { - num_index = static_cast( - ctx->Attrs().Get>("index").size()); - } - auto axis = ctx->Attrs().Get("axis"); - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - PADDLE_ENFORCE_EQ(y_shape[axis], - num_index, - platform::errors::InvalidArgument( - "The shape of source input tensor at scatter axis " - "should be equal to num_index, " - "but get %d and %d", - y_shape[axis], - num_index)); - for (size_t i = 0; i < x_rank; ++i) { - if (i != size_t(axis)) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_rank, - y_rank)); - } - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class ScatterAddPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - if (ctx->HasInput("IndexTensor")) { - auto index_name = Input(ctx, "IndexTensor")[0]; - auto index_dtype = GetDataType(ctx, index_name); - PADDLE_ENFORCE_EQ( - index_dtype, - framework::proto::VarType_Type_INT32, - platform::errors::InvalidArgument( - "The datatype of input tensor should be VarType_Type_INT32(%d), " - "but get %d", - framework::proto::VarType_Type_INT32, - index_dtype)); - } - SetType(ctx, z_name, GetType(ctx, x_name)); - SetDataType(ctx, z_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(scatter_add_p, - paddle::operators::ScatterAddPrimOp, - paddle::operators::ScatterAddPrimOpMaker, - paddle::operators::ScatterAddPrimOpShapeInference, - paddle::operators::ScatterAddPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/select_p_op.cc b/paddle/fluid/operators/prim_ops/select_p_op.cc deleted file mode 100644 index 69253da41d7d2..0000000000000 --- a/paddle/fluid/operators/prim_ops/select_p_op.cc +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class SelectPrimOp : public framework::OperatorBase { - public: - SelectPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator select_p should not be executed directly")); - } -}; - -class SelectPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Condition", "(Tensor), The input condition of select_p op."); - AddInput("X", "(Tensor), The input tensor of select_p op."); - AddInput("Y", "(Tensor), The input tensor of select_p op."); - AddOutput("Z", "(Tensor), The output tensor of select_p op."); - AddComment(R"DOC( -Autograd primitive select_p operator. -)DOC"); - } -}; - -class SelectPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr condition_var_ptr = - ctx->GetInputVarPtrs("Condition")[0]; - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *condition_var = - PADDLE_GET(framework::VarDesc *, condition_var_ptr); - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - - auto condition_shape = condition_var->GetShape(); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - - size_t condition_rank = condition_shape.size(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - - PADDLE_ENFORCE_EQ( - condition_rank, - x_rank, - platform::errors::InvalidArgument( - "The dimensions of condtion and Inputs(X) should be same, " - "but get %d and %d", - condition_rank, - x_rank)); - PADDLE_ENFORCE_EQ( - x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of Inputs(X) and Inputs(Y) should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < condition_rank; ++i) { - PADDLE_ENFORCE_EQ(condition_shape[i], - x_shape[i], - platform::errors::InvalidArgument( - "The shape of condition and Inputs(X) at dimension " - "%d should be same, " - "but get %d and %d", - i, - condition_shape[i], - x_shape[i])); - } - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ(x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of Inputs(X) and Inputs(Y) at dimension " - "%d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(condition_shape); - } -}; - -class SelectPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(select_p, - paddle::operators::SelectPrimOp, - paddle::operators::SelectPrimOpMaker, - paddle::operators::SelectPrimOpShapeInference, - paddle::operators::SelectPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/sin_p_op.cc b/paddle/fluid/operators/prim_ops/sin_p_op.cc deleted file mode 100644 index 95b413acc77af..0000000000000 --- a/paddle/fluid/operators/prim_ops/sin_p_op.cc +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { -class SinPrimOp : public framework::OperatorBase { - public: - SinPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator sin_p should not be executed directly")); - } -}; - -class SinPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of sin_p op."); - AddOutput("Y", "(Tensor), The output tensor of sin_p op."); - AddComment(R"DOC(Autograd primitive sin_p operator.)DOC"); - } -}; - -class SinPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class SinPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(sin_p, - paddle::operators::SinPrimOp, - paddle::operators::SinPrimOpMaker, - paddle::operators::SinPrimOpShapeInference, - paddle::operators::SinPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/slice_assign_p_op.cc b/paddle/fluid/operators/prim_ops/slice_assign_p_op.cc deleted file mode 100644 index 9485d621aa5d4..0000000000000 --- a/paddle/fluid/operators/prim_ops/slice_assign_p_op.cc +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class SliceAssignPrimOp : public framework::OperatorBase { - public: - SliceAssignPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator slice_assign_p should not be executed directly")); - } -}; - -class SliceAssignPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The tensor to slice from and assign on."); - AddInput("Y", "(Tensor), The source tensor of slice_assign_p op."); - AddOutput("Z", "(Tensor), The output tensor of slice_assign_p op."); - AddAttr>( - "axis", "(std::vector), The axis along which to gather."); - AddAttr>( - "starts", - "(std::vector) The slice starts of slice_assign_p op"); - AddAttr>( - "ends", "(std::vector) The slice ends of slice_assign_p op"); - AddAttr>( - "strides", - "(std::vector) The slice strides of slice_assign_p op"); - AddComment(R"DOC( -Autograd primitive slice_assign_p operator. -)DOC"); - } -}; - -class SliceAssignPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - auto axis = ctx->Attrs().Get>("axis"); - auto starts = ctx->Attrs().Get>("starts"); - auto ends = ctx->Attrs().Get>("ends"); - auto strides = ctx->Attrs().Get>("strides"); - PADDLE_ENFORCE_EQ( - starts.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of starts attribute and axis attribute should be same, " - "but get %d and %d", - starts.size(), - axis.size())); - PADDLE_ENFORCE_EQ( - ends.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of ends attribute and axis attribute should be same, " - "but get %d and %d", - ends.size(), - axis.size())); - PADDLE_ENFORCE_EQ( - strides.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of strides attribute and axis attribute should be same, " - "but get %d and %d", - strides.size(), - axis.size())); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - std::vector y_target_shape(x_shape); - for (size_t i = 0; i < axis.size(); ++i) { - y_target_shape[axis[i]] = - (ends[i] - starts[i] + strides[i] - 1) / strides[i]; - } - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ(y_target_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of source tensor of slice_assign_p op " - "at dimension %d should be %d, " - "but get %d", - i, - y_target_shape[i], - y_shape[i])); - } - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class SliceAssignPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, GetType(ctx, x_name)); - SetDataType(ctx, z_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(slice_assign_p, - paddle::operators::SliceAssignPrimOp, - paddle::operators::SliceAssignPrimOpMaker, - paddle::operators::SliceAssignPrimOpShapeInference, - paddle::operators::SliceAssignPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/slice_select_p_op.cc b/paddle/fluid/operators/prim_ops/slice_select_p_op.cc deleted file mode 100644 index dd2242368b739..0000000000000 --- a/paddle/fluid/operators/prim_ops/slice_select_p_op.cc +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class SliceSelectPrimOp : public framework::OperatorBase { - public: - SliceSelectPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator slice_select_p should not be executed directly")); - } -}; - -class SliceSelectPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of slice_select_p op."); - AddOutput("Y", "(Tensor), The output tensor of slice_select_p op."); - AddAttr>( - "axis", "(std::vector), The axis along which to gather."); - AddAttr>( - "starts", - "(std::vector) The slice starts of slice_select_p op"); - AddAttr>( - "ends", "(std::vector) The slice ends of slice_select_p op"); - AddAttr>( - "strides", - "(std::vector) The slice strides of slice_select_p op"); - AddComment(R"DOC( -Autograd primitive slice_select_p operator. -)DOC"); - } -}; - -class SliceSelectPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto axis = ctx->Attrs().Get>("axis"); - auto starts = ctx->Attrs().Get>("starts"); - auto ends = ctx->Attrs().Get>("ends"); - auto strides = ctx->Attrs().Get>("strides"); - PADDLE_ENFORCE_EQ( - starts.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of starts attribute and axis attribute should be same, " - "but get %d and %d", - starts.size(), - axis.size())); - PADDLE_ENFORCE_EQ( - ends.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of ends attribute and axis attribute should be same, " - "but get %d and %d", - ends.size(), - axis.size())); - PADDLE_ENFORCE_EQ( - strides.size(), - axis.size(), - platform::errors::InvalidArgument( - "Number of strides attribute and axis attribute should be same, " - "but get %d and %d", - strides.size(), - axis.size())); - for (size_t i = 0; i < axis.size(); ++i) { - x_shape[axis[i]] = (ends[i] - starts[i] + strides[i] - 1) / strides[i]; - } - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_shape); - } -}; - -class SliceSelectPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(slice_select_p, - paddle::operators::SliceSelectPrimOp, - paddle::operators::SliceSelectPrimOpMaker, - paddle::operators::SliceSelectPrimOpShapeInference, - paddle::operators::SliceSelectPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/split_p_op.cc b/paddle/fluid/operators/prim_ops/split_p_op.cc deleted file mode 100644 index bc0f8b8a31cda..0000000000000 --- a/paddle/fluid/operators/prim_ops/split_p_op.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class SplitPrimOp : public framework::OperatorBase { - public: - SplitPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator split_p should not be executed directly")); - } -}; - -class SplitPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of split_p op."); - AddOutput("YS", "(Tensor), The output tensors of split_p op.") - .AsDuplicable(); - AddAttr("axis", "(int64_t), The axis along which to split."); - AddAttr>( - "num_or_sections", - "(std::vector) If num_or_sections has only one element, then " - "num_or_sections indicates the number of equal sized sub-Tensors that " - "the input will be divided into. If num_or_sections has more then one " - "element, the length of it indicates the number of sub-Tensors and the " - "elements in it indicate the sizes of sub-Tensors' dimension orderly. " - "The length of the vector must not be larger than the input's size of " - "specified axis."); - AddComment(R"DOC( -Autograd primitive split_p operator. -)DOC"); - } -}; - -class SplitPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - auto y_var_ptrs = ctx->GetOutputVarPtrs("YS"); - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto axis = ctx->Attrs().Get("axis"); - auto num_or_sections = - ctx->Attrs().Get>("num_or_sections"); - std::vector y_shape(x_shape); - if (num_or_sections.size() == 1) { - PADDLE_ENFORCE_EQ(x_shape[axis] % num_or_sections[0], - 0, - platform::errors::InvalidArgument( - "The input tensor can't be devided equally into %d " - "parts equally along axis %d", - num_or_sections[0], - axis)); - y_shape[axis] = x_shape[axis] / num_or_sections[0]; - for (size_t i = 0; i < size_t(num_or_sections[0]); ++i) { - PADDLE_GET(framework::VarDesc *, y_var_ptrs[i])->SetShape(y_shape); - } - } else { - int64_t cnt_along_axis = 0; - for (size_t i = 0; i < num_or_sections.size(); ++i) { - y_shape[axis] = num_or_sections[i]; - cnt_along_axis += num_or_sections[i]; - PADDLE_GET(framework::VarDesc *, y_var_ptrs[i])->SetShape(y_shape); - } - PADDLE_ENFORCE_EQ( - x_shape[axis], - cnt_along_axis, - platform::errors::InvalidArgument( - "The input tensor has %d elements along axis %d, thus can't be " - "devided into %d tensor with %d elements totally.", - x_shape[axis], - axis, - num_or_sections.size(), - cnt_along_axis)); - } - } -}; - -class SplitPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_names = Output(ctx, "YS"); - for (auto const &y_name : y_names) { - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(split_p, - paddle::operators::SplitPrimOp, - paddle::operators::SplitPrimOpMaker, - paddle::operators::SplitPrimOpShapeInference, - paddle::operators::SplitPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/sqrt_p_op.cc b/paddle/fluid/operators/prim_ops/sqrt_p_op.cc deleted file mode 100644 index caebfd388f68f..0000000000000 --- a/paddle/fluid/operators/prim_ops/sqrt_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class SqrtPrimOp : public framework::OperatorBase { - public: - SqrtPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator sqrt_p should not be executed directly")); - } -}; - -class SqrtPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of sqrt_p op."); - AddOutput("Y", "(Tensor), The output tensor of sqrt_p op."); - AddComment(R"DOC( -Autograd primitive sqrt_p operator. -)DOC"); - } -}; - -class SqrtPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class SqrtPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(sqrt_p, - paddle::operators::SqrtPrimOp, - paddle::operators::SqrtPrimOpMaker, - paddle::operators::SqrtPrimOpShapeInference, - paddle::operators::SqrtPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/sub_p_op.cc b/paddle/fluid/operators/prim_ops/sub_p_op.cc deleted file mode 100644 index 4497978093f4f..0000000000000 --- a/paddle/fluid/operators/prim_ops/sub_p_op.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class SubPrimOp : public framework::OperatorBase { - public: - SubPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator sub_p should not be executed directly")); - } -}; - -class SubPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of sub_p op."); - AddInput("Y", "(Tensor), The input tensor of sub_p op."); - AddOutput("Z", "(Tensor), The output tensor of sub_p op."); - AddComment(R"DOC( -Autograd primitive sub_p operator. -)DOC"); - } -}; - -class SubPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetInputVarPtrs("Y")[0]; - framework::InferShapeVarPtr z_var_ptr = ctx->GetOutputVarPtrs("Z")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - framework::VarDesc *y_var = PADDLE_GET(framework::VarDesc *, y_var_ptr); - auto x_shape = x_var->GetShape(); - auto y_shape = y_var->GetShape(); - size_t x_rank = x_shape.size(); - size_t y_rank = y_shape.size(); - PADDLE_ENFORCE_EQ(x_rank, - y_rank, - platform::errors::InvalidArgument( - "The dimensions of two input tensor should be same, " - "but get %d and %d", - x_rank, - y_rank)); - for (size_t i = 0; i < x_rank; ++i) { - PADDLE_ENFORCE_EQ( - x_shape[i], - y_shape[i], - platform::errors::InvalidArgument( - "The shape of two input tensor at dimension %d should be same, " - "but get %d and %d", - i, - x_shape[i], - y_shape[i])); - } - - PADDLE_GET(framework::VarDesc *, z_var_ptr)->SetShape(x_shape); - } -}; - -class SubPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Input(ctx, "Y")[0]; - auto z_name = Output(ctx, "Z")[0]; - auto x_type = GetType(ctx, x_name); - auto y_type = GetType(ctx, y_name); - auto x_dtype = GetDataType(ctx, x_name); - auto y_dtype = GetDataType(ctx, y_name); - PADDLE_ENFORCE_EQ(x_type, - y_type, - platform::errors::InvalidArgument( - "The type of two input tensor should be same, " - "but get %d and %d", - x_type, - y_type)); - PADDLE_ENFORCE_EQ(x_dtype, - y_dtype, - platform::errors::InvalidArgument( - "The datatype of two input tensor should be same, " - "but get %d and %d", - x_dtype, - y_dtype)); - - SetType(ctx, z_name, x_type); - SetDataType(ctx, z_name, x_dtype); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(sub_p, - paddle::operators::SubPrimOp, - paddle::operators::SubPrimOpMaker, - paddle::operators::SubPrimOpShapeInference, - paddle::operators::SubPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/tanh_p_op.cc b/paddle/fluid/operators/prim_ops/tanh_p_op.cc deleted file mode 100644 index 042394aa15068..0000000000000 --- a/paddle/fluid/operators/prim_ops/tanh_p_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class TanhPrimOp : public framework::OperatorBase { - public: - TanhPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator tanh_p should not be executed directly")); - } -}; - -class TanhPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of tanh_p op."); - AddOutput("Y", "(Tensor), The output tensor of tanh_p op."); - AddComment(R"DOC( -Autograd primitive tanh_p operator. -)DOC"); - } -}; - -class TanhPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_var->GetShape()); - } -}; - -class TanhPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(tanh_p, - paddle::operators::TanhPrimOp, - paddle::operators::TanhPrimOpMaker, - paddle::operators::TanhPrimOpShapeInference, - paddle::operators::TanhPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/transpose_p_op.cc b/paddle/fluid/operators/prim_ops/transpose_p_op.cc deleted file mode 100644 index cb76f81ef0901..0000000000000 --- a/paddle/fluid/operators/prim_ops/transpose_p_op.cc +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class TransposePrimOp : public framework::OperatorBase { - public: - TransposePrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator transpose_p should not be executed directly")); - } -}; - -class TransposePrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor), The input tensor of transpose_p op."); - AddOutput("Y", "(Tensor), The output tensor of transpose_p op."); - AddAttr>("axis", - "(std::vector) Tanspose axis."); - AddComment(R"DOC( -Autograd primitive transpose_p operator. -)DOC"); - } -}; - -class TransposePrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr x_var_ptr = ctx->GetInputVarPtrs("X")[0]; - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Y")[0]; - framework::VarDesc *x_var = PADDLE_GET(framework::VarDesc *, x_var_ptr); - auto x_shape = x_var->GetShape(); - auto axis = ctx->Attrs().Get>("axis"); - size_t x_rank = x_shape.size(); - size_t axis_size = axis.size(); - PADDLE_ENFORCE_EQ(x_rank, - axis_size, - platform::errors::InvalidArgument( - "The input tensor's dimension " - "should be equal to the axis's size. " - "But received input tensor's dimension is %d, " - "axis's size is %d", - x_rank, - axis_size)); - - std::vector count(axis_size, 0); - for (size_t i = 0; i < axis_size; i++) { - PADDLE_ENFORCE_GE(axis[i], - 0, - platform::errors::InvalidArgument( - "The axis should be greater than or equal to 0." - "But received %d of axis[%d]", - axis[i], - i)); - - PADDLE_ENFORCE_EQ( - axis[i] < static_cast(axis_size) && ++count[axis[i]] == 1, - true, - platform::errors::InvalidArgument( - "Each element of Attribute axis should " - "be a unique value range from 0 to (dims - 1), " - "where the dims is the axis's size, " - "unique value means this axis value can appear only once. " - "But received axis[%d] is %d, axis_size is %d, " - "count[axis[%d]] is %d", - i, - axis[i], - axis_size, - i, - count[axis[i]])); - } - std::vector y_shape(axis_size); - for (size_t i = 0; i < axis_size; i++) { - y_shape[i] = x_shape[axis[i]]; - } - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(y_shape); - } -}; - -class TransposePrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto x_name = Input(ctx, "X")[0]; - auto y_name = Output(ctx, "Y")[0]; - SetType(ctx, y_name, GetType(ctx, x_name)); - SetDataType(ctx, y_name, GetDataType(ctx, x_name)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(transpose_p, - paddle::operators::TransposePrimOp, - paddle::operators::TransposePrimOpMaker, - paddle::operators::TransposePrimOpShapeInference, - paddle::operators::TransposePrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc b/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc deleted file mode 100644 index 3a06459d33798..0000000000000 --- a/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class VarDesc; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -class UniformRandomPrimOp : public framework::OperatorBase { - public: - UniformRandomPrimOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : framework::OperatorBase(type, inputs, outputs, attrs) {} - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - PADDLE_THROW(platform::errors::Unimplemented( - "Prim operator uniform_randrom_p should not be executed directly")); - } -}; - -class UniformRandomPrimOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Out", "(Tensor), The output tensor of uniform_random_p op."); - AddAttr>("shape", "The shape of the output tensor") - .SetDefault({}); - AddAttr("min", "Minimum value of uniform_random_p. [default -1.0]."); - AddAttr("max", "Maximun value of uniform_random_p. [default 1.0]."); - AddAttr("seed", - "Random seed used for generating samples. " - "0 means use a seed generated by the system." - "Note that if seed is not 0, this operator will always " - "generate the same random numbers every time. "); - AddAttr("dtype", "Output tensor data type. "); - AddComment(R"DOC( -Autograd primitive uniform_random_p operator. -)DOC"); - } -}; - -class UniformRandomPrimOpShapeInference : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *ctx) const override { - framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Out")[0]; - auto shape = ctx->Attrs().Get>("shape"); - PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); - } -}; - -class UniformRandomPrimOpVarTypeInference - : public framework::StaticGraphVarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto y_name = Output(ctx, "Out")[0]; - auto data_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - SetDataType(ctx, y_name, data_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(uniform_random_p, - paddle::operators::UniformRandomPrimOp, - paddle::operators::UniformRandomPrimOpMaker, - paddle::operators::UniformRandomPrimOpShapeInference, - paddle::operators::UniformRandomPrimOpVarTypeInference); diff --git a/paddle/fluid/operators/prim_ops/unity_build_rule.cmake b/paddle/fluid/operators/prim_ops/unity_build_rule.cmake deleted file mode 100644 index 74b04d234fcde..0000000000000 --- a/paddle/fluid/operators/prim_ops/unity_build_rule.cmake +++ /dev/null @@ -1,20 +0,0 @@ -register_unity_group( - cc - reshape_p_op.cc - broadcast_p_op.cc - reduce_p_op.cc - transpose_p_op.cc - split_p_op.cc - concat_p_op.cc - slice_select_p_op.cc - slice_assign_p_op.cc - gather_p_op.cc - scatter_add_p_op.cc - add_p_op.cc - sub_p_op.cc - mul_p_op.cc - div_p_op.cc - sqrt_p_op.cc - tanh_p_op.cc - matmul_p_op.cc - fill_constant_p_op.cc) diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 26647e8f05c83..e521fc0ffcacf 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -58,12 +58,11 @@ class PrintOp : public framework::OperatorBase { PADDLE_ENFORCE_NOT_NULL( in_var, - platform::errors::NotFound("The input:%s not found in scope", - Input("In"))); + phi::errors::NotFound("The input:%s not found in scope", Input("In"))); PADDLE_ENFORCE_NOT_NULL( out_var, - platform::errors::NotFound("The output:%s not found in scope", - Output("Out"))); + phi::errors::NotFound("The output:%s not found in scope", + Output("Out"))); auto &in_tensor = in_var->Get(); phi::DenseTensor *out_tensor = out_var->GetMutable(); diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cc b/paddle/fluid/operators/prune_gate_by_capacity_op.cc index 365342fa7ea5f..4e4bc4d291d68 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cc +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cc @@ -51,7 +51,7 @@ class PruneGateByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( expert_count_num_ele, n_expert * n_worker, - platform::errors::Unavailable( + phi::errors::Unavailable( "The number of elements for expert_count is ( %ld ) incorrect. " "Because the number of expert_count must equal the " "product of n_worker ( %ld ) and n_expert ( %ld ). " @@ -76,11 +76,11 @@ class PruneGateByCapacityOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( gate_idx_data_type, expert_count_data_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the gate_idx and expert_count should be same")); PADDLE_ENFORCE_EQ(gate_idx_data_type, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the gate_idx and expert_count should " "be same as int64")); return phi::KernelKey(gate_idx_data_type, ctx.GetPlace()); diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index 9d5e3eb00d0ef..ff8f9931b0e06 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -29,15 +29,15 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of LookupTableOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(W) of LookupTableOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Outs) of LookupTableOp should not be null.")); auto ids_dims = ctx->GetInputsDim("Ids"); @@ -46,13 +46,13 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( table_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only 2 dimensions of the 'Embedding' is supported.")); for (auto &ids_dim : ids_dims) { PADDLE_ENFORCE_EQ(ids_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the 'Ids' tensor must be 2.")); } diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index 414500c2faac3..258de211c482b 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -39,7 +39,7 @@ class DistributedLookupTableKernel : public framework::OpKernel { } else if (var->IsType()) { emb_dim = var->Get().value().dims()[1]; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of `W` must be Tensor, SelectedRows.But got " "unsupport type: %s.", framework::ToTypeName(var->Type()))); diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc index 2a8b4f9be7698..cc9e0aeff1f01 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc @@ -29,11 +29,11 @@ class DistributedPushSparseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("Ids"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PushSparseOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Outs) of PushSparseOp should not be null.")); auto ids_dims = ctx->GetInputsDim("Ids"); @@ -41,7 +41,7 @@ class DistributedPushSparseOp : public framework::OperatorWithKernel { for (auto &ids_dim : ids_dims) { PADDLE_ENFORCE_EQ(ids_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of the 'Ids' tensor must be 2.")); } diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index cd919cb7ca0bf..1104b8bed673e 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -43,7 +43,7 @@ class FakeInitOp : public framework::OperatorBase { tensor = out_var.GetMutable()->mutable_value(); tensor->Resize(common::make_ddim(Attr>("shape"))); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "fake init op's output only" "supports SelectedRows and phi::DenseTensor")); } diff --git a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc index 978981a6fcdf3..be1e6c64b2484 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc @@ -66,13 +66,13 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const { VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1]; PADDLE_ENFORCE_EQ(pieces.size(), 2, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Invalid format of message_and_id argument. " "Expected \"message:block_id\". Received %s", grad_and_id.c_str())); PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0, - platform::errors::AlreadyExists( + phi::errors::AlreadyExists( "The message name %s has already existed in out_map", pieces[0].c_str())); @@ -87,7 +87,7 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const { size_t num_blocks = program->Size(); PADDLE_ENFORCE_GE(num_blocks, 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Invalid number of blocks in server program. Expected " "equal or greater than 1. Received %zu", num_blocks)); @@ -136,7 +136,7 @@ void HeterListenAndServOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE_EQ(heter_server_, nullptr, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "RPC service has been created unexpectedly.")); std::string endpoint = Attr("endpoint"); @@ -150,7 +150,7 @@ void HeterListenAndServOp::RunImpl(const framework::Scope &scope, Attr>("optimize_blocks"); PADDLE_ENFORCE_GE(optimize_blocks.size(), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "optimize blocks is less than 1. Optimize blocks " "should be 1 at least on the pserver side.")); diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cc b/paddle/fluid/operators/pull_box_extended_sparse_op.cc deleted file mode 100644 index 75918b9ad62a4..0000000000000 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cc +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/pull_box_extended_sparse_op.h" - -namespace paddle { -namespace operators { - -class PullBoxExtendedSparseOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_GE( - ctx->Inputs("Ids").size(), - 1UL, - platform::errors::InvalidArgument( - "Inputs(Ids) of PullBoxExtendedSparseOp should not be empty.")); - PADDLE_ENFORCE_GE( - ctx->Outputs("Out").size(), - 1UL, - platform::errors::InvalidArgument( - "Outputs(Out) of PullBoxExtendedSparseOp should not be empty.")); - PADDLE_ENFORCE_GE(ctx->Outputs("OutExtend").size(), - 1UL, - platform::errors::InvalidArgument( - "Outputs(OutExtend) of PullBoxExtendedSparseOp " - "should not be empty.")); - auto emb_size = static_cast(ctx->Attrs().Get("emb_size")); - auto emb_extended_size = - static_cast(ctx->Attrs().Get("emb_extended_size")); - auto all_ids_dim = ctx->GetInputsDim("Ids"); - const size_t n_ids = all_ids_dim.size(); - std::vector outs_dims; - std::vector outs_extended_dims; - outs_dims.resize(n_ids); - outs_extended_dims.resize(n_ids); - for (size_t i = 0; i < n_ids; ++i) { - const auto ids_dims = all_ids_dim[i]; - int ids_rank = ids_dims.size(); - PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], - 1, - platform::errors::InvalidArgument( - "Shape error in %lu id, the last dimension of the " - "'Ids' tensor must be 1.", - i)); - auto out_dim = - common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); - out_dim.push_back(emb_size); - outs_dims[i] = common::make_ddim(out_dim); - - auto out_extended_dim = - common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); - out_extended_dim.push_back(emb_extended_size); - outs_extended_dims[i] = common::make_ddim(out_extended_dim); - } - ctx->SetOutputsDim("Out", outs_dims); - ctx->SetOutputsDim("OutExtend", outs_extended_dims); - for (size_t i = 0; i < n_ids; ++i) { - ctx->ShareLoD("Ids", "Out", i, i); - ctx->ShareLoD("Ids", "OutExtend", i, i); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(framework::proto::VarType::FP32, ctx.GetPlace()); - } -}; - -class PullBoxExtendedSparseOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Ids", - "Input tensors with type int32 or int64 " - "contains the ids to be looked up in BoxPS. " - "The last dimension size must be 1.") - .AsDuplicable(); - AddOutput("Out", "The lookup results tensors.").AsDuplicable(); - AddOutput("OutExtend", "The lookup extended results tensors.") - .AsDuplicable(); - AddAttr("emb_size", "(int, the embedding hidden size").SetDefault(1); - AddAttr("emb_extended_size", - "(int, the extended_embedding hidden size") - .SetDefault(128); - AddComment(R"DOC( -Pull Box Extended Sparse Operator. - -This operator is used to perform lookups on the BoxPS, -then concatenated into a dense tensor. - -The input Ids can carry the LoD (Level of Details) information, -or not. And the output only shares the LoD information with input Ids. - -)DOC"); - } -}; - -template -class PushBoxExtendedSparseOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("push_box_extended_sparse"); - op->SetInput("Ids", this->Input("Ids")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput(framework::GradVarName("OutExtend"), - this->OutputGrad("OutExtend")); - op->SetOutput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetAttrMap(this->Attrs()); - } -}; - -class PushBoxExtendedSparseOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override {} - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - pull_box_extended_sparse, - ops::PullBoxExtendedSparseOp, - ops::PullBoxExtendedSparseOpMaker, - ops::PushBoxExtendedSparseOpMaker, - ops::PushBoxExtendedSparseOpMaker); - -REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp); - -PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse, - CPU, - ALL_LAYOUT, - ops::PullBoxExtendedSparseCPUKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse, - CPU, - ALL_LAYOUT, - ops::PushBoxExtendedSparseCPUKernel, - float, - double) {} diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cu b/paddle/fluid/operators/pull_box_extended_sparse_op.cu deleted file mode 100644 index 570c367c93182..0000000000000 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cu +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/pull_box_extended_sparse_op.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" - -namespace paddle { -namespace operators { - -template -class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PullBoxExtendedSparseFunctor(ctx); - } -}; - -template -class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PushBoxExtendedSparseFunctor(ctx); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse, - GPU, - ALL_LAYOUT, - ops::PullBoxExtendedSparseCUDAKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse, - GPU, - ALL_LAYOUT, - ops::PushBoxExtendedSparseCUDAKernel, - float, - double) {} diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h deleted file mode 100644 index 76e570f10fb64..0000000000000 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.h +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include - -#include "paddle/fluid/framework/fleet/box_wrapper.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/tensor.h" - -namespace paddle { -namespace operators { - -template -static void PullBoxExtendedSparseFunctor( - const framework::ExecutionContext &ctx) { - auto inputs = ctx.MultiInput("Ids"); - auto outputs = ctx.MultiOutput("Out"); - auto outputs_extend = ctx.MultiOutput("OutExtend"); - const auto slot_size = inputs.size(); - std::vector all_keys(slot_size); - // BoxPS only supports float now - std::vector all_values(slot_size * 2); - std::vector slot_lengths(slot_size); - for (size_t i = 0; i < slot_size; i++) { - const auto *slot = inputs[i]; - const uint64_t *single_slot_keys = - reinterpret_cast(slot->data()); - all_keys[i] = single_slot_keys; - slot_lengths[i] = slot->numel(); - auto *output = outputs[i]->mutable_data(ctx.GetPlace()); - auto *output_extend = outputs_extend[i]->mutable_data(ctx.GetPlace()); - all_values[i] = reinterpret_cast(output); - all_values[i + slot_size] = reinterpret_cast(output_extend); - } -#ifdef PADDLE_WITH_BOX_PS - auto emb_size = ctx.Attr("emb_size"); - auto emb_extended_size = ctx.Attr("emb_extended_size"); - auto box_ptr = paddle::framework::BoxWrapper::GetInstance(); - box_ptr->PullSparse(ctx.GetPlace(), - all_keys, - all_values, - slot_lengths, - emb_size, - emb_extended_size); -#endif -} - -template -static void PushBoxExtendedSparseFunctor( - const framework::ExecutionContext &ctx) { - auto inputs = ctx.MultiInput("Ids"); - auto d_output = - ctx.MultiInput(framework::GradVarName("Out")); - auto d_output_extend = - ctx.MultiInput(framework::GradVarName("OutExtend")); - const auto slot_size = inputs.size(); - std::vector all_keys(slot_size); - std::vector all_grad_values(slot_size * 2); - std::vector slot_lengths(slot_size); - int batch_size = -1; - for (size_t i = 0; i < slot_size; i++) { - const auto *slot = inputs[i]; - const uint64_t *single_slot_keys = - reinterpret_cast(slot->data()); - all_keys[i] = single_slot_keys; - slot_lengths[i] = slot->numel(); - int cur_batch_size = - slot->lod().size() ? slot->lod()[0].size() - 1 : slot->dims()[0]; - if (batch_size == -1) { - batch_size = cur_batch_size; - } else { - PADDLE_ENFORCE_EQ(batch_size, - cur_batch_size, - platform::errors::PreconditionNotMet( - "The batch size of all input slots should be same," - "please check")); - } - const float *grad_value = d_output[i]->data(); - const float *grad_value_extend = d_output_extend[i]->data(); - all_grad_values[i] = reinterpret_cast(grad_value); - all_grad_values[i + slot_size] = - reinterpret_cast(grad_value_extend); - } -#ifdef PADDLE_WITH_BOX_PS - auto emb_size = ctx.Attr("emb_size"); - auto emb_extended_size = ctx.Attr("emb_extended_size"); - auto box_ptr = paddle::framework::BoxWrapper::GetInstance(); - box_ptr->PushSparseGrad(ctx.GetPlace(), - all_keys, - all_grad_values, - slot_lengths, - emb_size, - emb_extended_size, - batch_size); -#endif -} - -template -class PullBoxExtendedSparseCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PullBoxExtendedSparseFunctor(ctx); - } -}; - -template -class PushBoxExtendedSparseCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PushBoxExtendedSparseFunctor(ctx); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/pull_box_sparse_op.cc b/paddle/fluid/operators/pull_box_sparse_op.cc index d37cc35a59945..51786ffc0180d 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_sparse_op.cc @@ -24,12 +24,12 @@ class PullBoxSparseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Ids) of PullBoxSparseOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of PullBoxSparseOp should not be empty.")); auto hidden_size = static_cast(ctx->Attrs().Get("size")); auto all_ids_dim = ctx->GetInputsDim("Ids"); @@ -41,7 +41,7 @@ class PullBoxSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_box_sparse_op.h b/paddle/fluid/operators/pull_box_sparse_op.h index 1ebfa11a2b2e6..06ebe7b5a93d3 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.h +++ b/paddle/fluid/operators/pull_box_sparse_op.h @@ -82,7 +82,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input slots should be same, " "please cheack")); } diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cc b/paddle/fluid/operators/pull_gpups_sparse_op.cc index 6055632f5681a..946a1b8c7136b 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.cc +++ b/paddle/fluid/operators/pull_gpups_sparse_op.cc @@ -24,21 +24,21 @@ class PullGpuPSSparseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Ids) of PullGpuPSSparseOp should not be empty.")); PADDLE_ENFORCE_GE( ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of PullGpuPSSparseOp should not be empty.")); auto embedding_size_vec = ctx->Attrs().Get>("size"); PADDLE_ENFORCE_EQ( ctx->Inputs("Ids").size(), embedding_size_vec.size(), - platform::errors::InvalidArgument("The ids size: %lu must be equal to " - "the length of embedding size: %lu.", - ctx->Inputs("Ids").size(), - embedding_size_vec.size())); + phi::errors::InvalidArgument("The ids size: %lu must be equal to " + "the length of embedding size: %lu.", + ctx->Inputs("Ids").size(), + embedding_size_vec.size())); auto all_ids_dim = ctx->GetInputsDim("Ids"); const size_t n_ids = all_ids_dim.size(); std::vector outs_dims; @@ -49,7 +49,7 @@ class PullGpuPSSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h index e5e08cfdde685..098e9b143a8e1 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.h +++ b/paddle/fluid/operators/pull_gpups_sparse_op.h @@ -78,7 +78,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { } else { PADDLE_ENFORCE_EQ(batch_size, cur_batch_size, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The batch size of all input slots should be same, " "please check")); } diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc index 55a6af8466b86..dcea341c8a9e9 100644 --- a/paddle/fluid/operators/pull_sparse_op.cc +++ b/paddle/fluid/operators/pull_sparse_op.cc @@ -25,11 +25,11 @@ class PullSparseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PullSparseOp can not be null")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PullSparseOp can not be null")); auto hidden_size = @@ -43,7 +43,7 @@ class PullSparseOp : public framework::OperatorWithKernel { int ids_rank = ids_dims.size(); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape error in %lu id, the last dimension of " " the 'Ids' tensor must be 1.", i)); diff --git a/paddle/fluid/operators/pull_sparse_v2_op.cc b/paddle/fluid/operators/pull_sparse_v2_op.cc index d134607d3c4bb..07af5da7ef92a 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.cc +++ b/paddle/fluid/operators/pull_sparse_v2_op.cc @@ -25,11 +25,11 @@ class PullSparseV2Op : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PullSparseV2Op can not be null")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of PullSparseV2Op can not be null")); auto hidden_size = diff --git a/paddle/fluid/operators/push_dense_op.cc b/paddle/fluid/operators/push_dense_op.cc index c0b9b04500648..080610c6e0df1 100644 --- a/paddle/fluid/operators/push_dense_op.cc +++ b/paddle/fluid/operators/push_dense_op.cc @@ -25,7 +25,7 @@ class PushDenseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_GE(ctx->Inputs("Ids").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Ids) of PushDenseOp can not be null.")); } diff --git a/paddle/fluid/operators/push_dense_op.h b/paddle/fluid/operators/push_dense_op.h index ec7b6b6c3f0bf..6ec833df39583 100644 --- a/paddle/fluid/operators/push_dense_op.h +++ b/paddle/fluid/operators/push_dense_op.h @@ -33,7 +33,7 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { auto table_id = static_cast(ctx.Attr("TableId")); PADDLE_ENFORCE_GT(table_id, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "table id should > 0, but value is ", table_id)); float scale_datanorm = ctx.Attr("ScaleDataNorm"); const auto& ids = ctx.MultiInput("Ids"); @@ -41,7 +41,7 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { ids[0]->lod().size() ? ids[0]->lod()[0].size() - 1 : ids[0]->dims()[0]; PADDLE_ENFORCE_GT(batch_size, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "batch size should > 0, but value is ", batch_size)); auto fleet_ptr = framework::FleetWrapper::GetInstance(); @@ -51,10 +51,10 @@ void PushDenseFunctor(const framework::ExecutionContext& ctx) { // note: GetInstance() is not thread-safe // we assume PullDenseWorker has been already initialized in DistMultiTrainer auto pull_dense_worker = framework::PullDenseWorker::GetInstance(); - PADDLE_ENFORCE_NE(pull_dense_worker, - nullptr, - platform::errors::PreconditionNotMet( - "pull_dense_worker should not be null")); + PADDLE_ENFORCE_NE( + pull_dense_worker, + nullptr, + phi::errors::PreconditionNotMet("pull_dense_worker should not be null")); int thread_id = pull_dense_worker->GetThreadIdByScope(&ctx.scope()); pull_dense_worker->IncreaseThreadVersion(thread_id, table_id); #endif diff --git a/paddle/fluid/operators/py_func_op.cc b/paddle/fluid/operators/py_func_op.cc index 7d9c8ceca4943..5e3fa0b5507a0 100644 --- a/paddle/fluid/operators/py_func_op.cc +++ b/paddle/fluid/operators/py_func_op.cc @@ -47,7 +47,7 @@ static py::object *GetPythonCallableObject(size_t i) { PADDLE_ENFORCE_LT( i, g_py_callables.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid python callable id %d, which should be less than %d.", i, g_py_callables.size())); @@ -81,7 +81,7 @@ static void CallPythonFunc(py::object *callable, // Otherwise, ret_num must be equal to out_num PADDLE_ENFORCE_EQ(ret_num == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns " "None. In this case, ret_num = 1 && ret[0] == None " "&& out_num should be 0. But ret_num is %d", @@ -90,7 +90,7 @@ static void CallPythonFunc(py::object *callable, PADDLE_ENFORCE_EQ( out_num == 0, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns None. In " "this case, ret_num = 1 && ret[0] == None && out_num should " "be 0. But out_num is %d", @@ -99,7 +99,7 @@ static void CallPythonFunc(py::object *callable, PADDLE_ENFORCE_EQ( py::cast(ret_tuple[0]) == nullptr, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Python function has no return values or returns None. In " "this case, ret_num = 1 && ret[0] == None && out_num should " "be 0. But ret[0] is not None")); @@ -113,12 +113,12 @@ static void CallPythonFunc(py::object *callable, try { auto *py_out_tensor = py::cast(ret_tuple[i]); PADDLE_ENFORCE_NOT_NULL(py_out_tensor, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output tensor %d should not be nullptr", i)); out->set_lod(py_out_tensor->lod()); out->ShareDataWith(*py_out_tensor); } catch (py::cast_error &) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "py::cast to phi::DenseTensor error. The %d-th output exception is " "phi::DenseTensor", i)); @@ -139,15 +139,15 @@ class PyFuncOpVarTypeInference : public framework::StaticGraphVarTypeInference { PADDLE_ENFORCE_EQ( has_in || has_out, true, - platform::errors::InvalidArgument("Input(X) or Output(Out) must exist, " - "but has_in is %d, has_out is %d.", - has_in, - has_out)); + phi::errors::InvalidArgument("Input(X) or Output(Out) must exist, " + "but has_in is %d, has_out is %d.", + has_in, + has_out)); PADDLE_ENFORCE_GE( PADDLE_GET_CONST(int, ctx->GetAttr(kForwardPythonCallableId.data())), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Function id cannot be less than 0, but received value is %d.", PADDLE_GET_CONST(int, ctx->GetAttr(kForwardPythonCallableId.data())))); @@ -192,8 +192,8 @@ class PyFuncOpShapeInference : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( !ctx->IsRuntime(), true, - platform::errors::InvalidArgument("Shape inference cannot be called at " - "run time in 'py_func' operator.")); + phi::errors::InvalidArgument("Shape inference cannot be called at " + "run time in 'py_func' operator.")); } }; diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index f5a8fcaa9de0c..3f92da5d73676 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -88,24 +88,24 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("Input(X) of PyramidHashOP is not found.")); + phi::errors::NotFound("Input(X) of PyramidHashOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::NotFound("Input(W) of PyramidHashOP is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of PyramidHashOP is not found.")); + phi::errors::NotFound("Input(W) of PyramidHashOP is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) of PyramidHashOP is not found.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("DropPos"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(DropPos) of PyramidHashOP is not found.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(X) of PyramidHashOP is invalid. " "It should be 2, but got %d", x_dims.size())); @@ -113,7 +113,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { auto w_dims = ctx->GetInputDim("W"); PADDLE_ENFORCE_EQ(w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(W) of PyramidHashOP is invalid. " "It should be 2, but got %d", w_dims.size())); @@ -124,7 +124,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims[0], space_len + rand_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(W) of PyramidHashOP is invalid. " "It should be space_len + rand_len, but now %d != %d + %d", w_dims[0], @@ -133,7 +133,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( w_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(W) of PyramidHashOP is invalid." " It should be 1, but got %d", w_dims[1])); @@ -142,7 +142,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( num_emb % rand_len, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The PyramidHashOP's Attr(num_emb) should mod Attr(rand_len), " "but num_emb is %d, rand_len is %d", num_emb, @@ -153,19 +153,19 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("WhiteList"), true, - platform::errors::NotFound("Input(WhiteList) of PyramidHashOP is not " - "found but white_list_len > 0.")); + phi::errors::NotFound("Input(WhiteList) of PyramidHashOP is not " + "found but white_list_len > 0.")); auto wl_dims = ctx->GetInputDim("WhiteList"); PADDLE_ENFORCE_EQ( wl_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(WhiteList) of PyramidHashOP is invalid." " It should be 2, but got %d", wl_dims.size())); PADDLE_ENFORCE_EQ(wl_dims[0], white_list_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(WhiteList) of " "PyramidHashOP is invalid." " It should be equal to Attr(white_list_len) " @@ -174,7 +174,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { white_list_len)); PADDLE_ENFORCE_EQ(wl_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(WhiteList) of " "PyramidHashOP is invalid." " It should be 1, but got %d", @@ -186,19 +186,19 @@ class PyramidHashOP : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("BlackList"), true, - platform::errors::NotFound("Input(BlackList) of PyramidHashOP is not " - "found but black_list_len > 0.")); + phi::errors::NotFound("Input(BlackList) of PyramidHashOP is not " + "found but black_list_len > 0.")); auto bl_dims = ctx->GetInputDim("BlackList"); PADDLE_ENFORCE_EQ( bl_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of Input(BlackList) of PyramidHashOP is invalid." " It should be 2, but got %d", bl_dims.size())); PADDLE_ENFORCE_EQ(bl_dims[0], black_list_len, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(BlackList) of " "PyramidHashOP is invalid." " It should be equal to Attr(black_list_len)" @@ -207,7 +207,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { black_list_len)); PADDLE_ENFORCE_EQ(bl_dims[1], 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The second dimension of Input(BlackList) of " "PyramidHashOP is invalid." " It should be 1, but got %d", @@ -315,7 +315,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( math::bloomfilter_check(_filter), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The white filter is not loaded successfully, please make sure " "'white_list_len': %d is valid for Input(WhiteList).", white_list_len)); @@ -325,7 +325,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( math::bloomfilter_check(_black_filter), 1, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The black filter is not loaded successfully, please make sure " "'black_list_len': %d is valid for Input(BlackList).", black_list_len)); @@ -442,27 +442,27 @@ class PyramidHashOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of PyramidHashOpGrad is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), - true, - platform::errors::NotFound( - "Input(W) of PyramidHashOpGrad is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of PyramidHashOpGrad is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("W"), + true, + phi::errors::NotFound("Input(W) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput("DropPos"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(DropPos) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("X_Temp_Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X_Temp_Out) of PyramidHashOpGrad is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@Grad) of PyramidHashOpGrad is not found.")); } diff --git a/paddle/fluid/operators/quantize_linear_op.cc b/paddle/fluid/operators/quantize_linear_op.cc index c0ef288b5134b..44ff53e8a7d7b 100644 --- a/paddle/fluid/operators/quantize_linear_op.cc +++ b/paddle/fluid/operators/quantize_linear_op.cc @@ -164,17 +164,16 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1 || quant_axis == -1, true, - platform::errors::InvalidArgument( - "'quant_axis' should be 0 or 1, but " - "the received is %d", - quant_axis)); + phi::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); }); AddAttr("bit_length", "(int, default 8)") .SetDefault(8) .AddCustomChecker([](const int &bit_length) { PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'bit_length' should be between 1 and 16, but " "the received is %d", bit_length)); @@ -190,7 +189,7 @@ class QuantizeLinearOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( round_type == 0 || round_type == 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'round_type' should be 0 or 1, 0 rounding to " "nearest ties to even and 1 is rounding to nearest " "ties away from zero.but the received is %d", diff --git a/paddle/fluid/operators/quantize_linear_op.cu b/paddle/fluid/operators/quantize_linear_op.cu index 8bcbc1107e9d1..d9aa1a860f405 100644 --- a/paddle/fluid/operators/quantize_linear_op.cu +++ b/paddle/fluid/operators/quantize_linear_op.cu @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/quantize_linear_op.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -using float16 = paddle::platform::float16; +using float16 = phi::dtype::float16; namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/queue_generator_op.cc b/paddle/fluid/operators/queue_generator_op.cc index 8eee44d6827ea..ca4f943885b2f 100644 --- a/paddle/fluid/operators/queue_generator_op.cc +++ b/paddle/fluid/operators/queue_generator_op.cc @@ -46,13 +46,13 @@ class QueueGeneratorOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( names.size(), 0, - platform::errors::InvalidArgument("The attribute 'names' for " - "Op(queue_generator) must be set.")); + phi::errors::InvalidArgument("The attribute 'names' for " + "Op(queue_generator) must be set.")); int capacity = Attr("capacity"); PADDLE_ENFORCE_GT(capacity, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The attribute 'capacity' for Op(queue_generator) " "must be set a positive value, " "but the one received is %d.", @@ -71,8 +71,8 @@ class QueueGeneratorOp : public framework::OperatorBase { auto var = scope->FindVar(name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound( - "Can't find var named '%s' in the global scope.", name)); + phi::errors::NotFound("Can't find var named '%s' in the global scope.", + name)); auto ptr = var->GetMutable(); ptr->InitOnce(capacity); diff --git a/paddle/fluid/operators/random_routing_op.cc b/paddle/fluid/operators/random_routing_op.cc index dffcc9c361a66..e579b3f6146e2 100644 --- a/paddle/fluid/operators/random_routing_op.cc +++ b/paddle/fluid/operators/random_routing_op.cc @@ -37,17 +37,17 @@ class RandomRoutingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(prob_dims[0], topk_val_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); PADDLE_ENFORCE_EQ(topk_idx_dims[1], topk_val_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); PADDLE_ENFORCE_EQ(topk_idx_dims[0], topk_val_dims[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ScatterNdAddOp should not be null.")); ctx->SetOutputDim("Out", topk_idx_dims); @@ -62,7 +62,7 @@ class RandomRoutingOp : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, "TopK_Idx"); PADDLE_ENFORCE_EQ(topk_idx_dtype, framework::proto::VarType::INT64, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dtype of the topk_idx_dtype should be int64")); const auto& topk_value_type = diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 195ef276b957e..8350794081bbe 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -23,23 +23,23 @@ namespace operators { template void GetSize(T start, T end, T step, int64_t* size) { - PADDLE_ENFORCE_NE(step, - 0, - platform::errors::InvalidArgument( - "The step of range op should not be 0.")); + PADDLE_ENFORCE_NE( + step, + 0, + phi::errors::InvalidArgument("The step of range op should not be 0.")); if (start < end) { PADDLE_ENFORCE_GT( step, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The step should be greater than 0 while start < end.")); } if (start > end) { PADDLE_ENFORCE_LT(step, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The step should be less than 0 while start > end.")); } diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc index 95de6f4e08054..aaef2782f5e21 100644 --- a/paddle/fluid/operators/rank_attention_op.cc +++ b/paddle/fluid/operators/rank_attention_op.cc @@ -27,32 +27,32 @@ class RankAttentionOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("RankOffset"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(RankOffset) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("RankParam"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(RankParam) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("InsRank"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(InsRank) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("InputHelp"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(InputHelp) of RankAttentionOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of RankAttentionOp should not be null.")); auto max_rank = ctx->Attrs().Get("MaxRank"); @@ -64,13 +64,13 @@ class RankAttentionOp : public framework::OperatorWithKernel { auto x_fea_dim = x_dims[1]; auto block_matrix_row = max_rank * x_fea_dim; - PADDLE_ENFORCE_EQ((rank_offset_dims[1] - 1) / 2, - max_rank, - platform::errors::InvalidArgument( - "Input(RankOffset) has wrong columns, " - "except columns to be %d, but got %d", - max_rank, - (rank_offset_dims[1] - 1) / 2)); + PADDLE_ENFORCE_EQ( + (rank_offset_dims[1] - 1) / 2, + max_rank, + phi::errors::InvalidArgument("Input(RankOffset) has wrong columns, " + "except columns to be %d, but got %d", + max_rank, + (rank_offset_dims[1] - 1) / 2)); ctx->SetOutputDim("Out", {ins_num, para_col}); ctx->SetOutputDim("InputHelp", {ins_num, block_matrix_row}); @@ -94,23 +94,23 @@ class RankAttentionGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("RankParam"), - true, - platform::errors::InvalidArgument( - "Input(RankParam) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("RankOffset"), - true, - platform::errors::InvalidArgument( - "Input(RankOffset) should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput("InputHelp"), - true, - platform::errors::InvalidArgument( - "Input(InputHelp) should not be null")); + phi::errors::InvalidArgument("Input(X) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("RankParam"), + true, + phi::errors::InvalidArgument("Input(RankParam) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("RankOffset"), + true, + phi::errors::InvalidArgument("Input(RankOffset) should not be null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("InputHelp"), + true, + phi::errors::InvalidArgument("Input(InputHelp) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput("InsRank"), true, - platform::errors::InvalidArgument("Input(InsRank) should not be null")); + phi::errors::InvalidArgument("Input(InsRank) should not be null")); ctx->SetOutputDim(framework::GradVarName("RankParam"), ctx->GetInputDim("RankParam")); diff --git a/paddle/fluid/operators/rank_attention_op.cu b/paddle/fluid/operators/rank_attention_op.cu index 6d6c4c6a6d1dc..d73de790a527e 100644 --- a/paddle/fluid/operators/rank_attention_op.cu +++ b/paddle/fluid/operators/rank_attention_op.cu @@ -48,15 +48,15 @@ class RankAttentionCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rank_offset_dims[0], ins_num, - platform::errors::InvalidArgument("Input(RankOffset) has wrong rows.")); - PADDLE_ENFORCE_EQ((rank_offset_dims[1] - 1) / 2, - max_rank, - platform::errors::InvalidArgument( - "Input(RankOffset) has wrong columns.")); + phi::errors::InvalidArgument("Input(RankOffset) has wrong rows.")); + PADDLE_ENFORCE_EQ( + (rank_offset_dims[1] - 1) / 2, + max_rank, + phi::errors::InvalidArgument("Input(RankOffset) has wrong columns.")); PADDLE_ENFORCE_EQ( max_rank * max_rank * x_fea_dim, para_row, - platform::errors::InvalidArgument("Input(RankParam) has wrong rows.")); + phi::errors::InvalidArgument("Input(RankParam) has wrong rows.")); int block_matrix_row = max_rank * x_fea_dim; diff --git a/paddle/fluid/operators/rank_attention_op.h b/paddle/fluid/operators/rank_attention_op.h index 5124e91653810..f119c4a2f315c 100644 --- a/paddle/fluid/operators/rank_attention_op.h +++ b/paddle/fluid/operators/rank_attention_op.h @@ -23,10 +23,10 @@ template class RankAttentionKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::Unimplemented( - "Rank Attention only supports GPU now.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::Unimplemented("Rank Attention only supports GPU now.")); } }; } // namespace operators diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc deleted file mode 100644 index ebdddfd41b33f..0000000000000 --- a/paddle/fluid/operators/rank_loss_op.cc +++ /dev/null @@ -1,254 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/rank_loss_op.h" - -#include - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -class RankLossOp : public framework::OperatorWithKernel { - public: - RankLossOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "RankLoss"); - OP_INOUT_CHECK(ctx->HasInput("Left"), "Input", "Left", "RankLoss"); - OP_INOUT_CHECK(ctx->HasInput("Right"), "Input", "Right", "RankLoss"); - - auto label_dims = ctx->GetInputDim("Label"); - auto left_dims = ctx->GetInputDim("Left"); - auto right_dims = ctx->GetInputDim("Right"); - // check label_dims valid - PADDLE_ENFORCE_GE( - label_dims.size(), - 1, - platform::errors::InvalidArgument( - "The dimension size of Input(Label) must be greater than " - "or equal to 1, but received %d.", - label_dims.size())); - PADDLE_ENFORCE_LE( - label_dims.size(), - 2, - platform::errors::InvalidArgument("The dimension size of Input(Label) " - "must be less than or equal to 2, " - "but received %d.", - label_dims.size())); - if (label_dims.size() == 2U) { - PADDLE_ENFORCE_EQ( - label_dims[1], - 1, - platform::errors::InvalidArgument( - "The last dimension of Input(Label) must be 1, but received %d.", - label_dims[1])); - } - // check left_dims valid - PADDLE_ENFORCE_GE( - left_dims.size(), - 1, - platform::errors::InvalidArgument( - "The dimension size of Input(Left) must be greater than " - "or equal to 1, but received %d.", - left_dims.size())); - PADDLE_ENFORCE_LE( - left_dims.size(), - 2, - platform::errors::InvalidArgument("The dimension size of Input(Left) " - "must be less than or equal to 2, " - "but received %d.", - left_dims.size())); - if (left_dims.size() == 2U) { - PADDLE_ENFORCE_EQ( - left_dims[1], - 1, - platform::errors::InvalidArgument( - "The last dimension of Input(Left) must be 1, but received %d.", - left_dims[1])); - } - // check right_dims valid - PADDLE_ENFORCE_GE( - right_dims.size(), - 1, - platform::errors::InvalidArgument( - "The dimension size of Input(Right) must be greater than " - "or equal to 1, but received %d.", - right_dims.size())); - PADDLE_ENFORCE_LE( - right_dims.size(), - 2, - platform::errors::InvalidArgument("The dimension size of Input(Right) " - "must be less than or equal to 2, " - "but received %d.", - right_dims.size())); - if (right_dims.size() == 2U) { - PADDLE_ENFORCE_EQ( - right_dims[1], - 1, - platform::errors::InvalidArgument( - "The last dimension of Input(Right) must be 1, but received %d.", - right_dims[1])); - } - PADDLE_ENFORCE_EQ( - label_dims[0], - left_dims[0], - platform::errors::InvalidArgument( - "The first dimension of Input(Label) and Input(Left) " - "must have the same value. But received Label.dims[0]=%d, " - "Left.dims[0]=%d.", - label_dims[0], - left_dims[0])); - PADDLE_ENFORCE_EQ( - label_dims[0], - right_dims[0], - platform::errors::InvalidArgument( - "The first dimension of Input(Label) and Input(Right) " - "must have the same value. But received Label.dims[0]=%d, " - "Right.dims[0]=%d.", - label_dims[0], - right_dims[0])); - ctx->SetOutputDim("Out", label_dims); - } -}; - -class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Label", - "(2-D Tensor with shape [batch_size x 1]) " - "The label indicating A ranked higher than B or not."); - AddInput("Left", - "(2-D Tensor with shape [batch_size x 1]) " - "The output of RankNet for doc A."); - AddInput("Right", - "(2-D Tensor with shape [batch_size x 1]) " - "The output of RankNet for doc B."); - AddOutput("Out", - "(2-D Tensor with shape [batch_size x 1]) " - "The output loss of RankLoss operator."); - AddComment(R"DOC( -RankLoss Operator. - -RankLoss operator for RankNet -(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf). -RankNet is a pairwise ranking model with -one training sample consisting of a pair of doc A and B, and the label P -indicating that A is ranked higher than B or not: - -P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of -the input pair. - -The RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label -(P_{i,j}), which represent the output score of RankNet for the two docs and -the label respectively, and yields the rank loss C_{i,j} using the following -equation: - -$$ - C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + \log(1 + e^{o_{i,j}}) \\ - o_{i,j} = o_i - o_j \\ - \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} -$$ - -The operator can take batch inputs with size batch_size (batch_size >= 1). - -)DOC"); - } -}; - -class RankLossGradOp : public framework::OperatorWithKernel { - public: - RankLossGradOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "RankLossGrad"); - OP_INOUT_CHECK(ctx->HasInput("Left"), "Input", "Left", "RankLossGrad"); - OP_INOUT_CHECK(ctx->HasInput("Right"), "Input", "Right", "RankLossGrad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "RankLossGrad"); - - auto left_dims = ctx->GetInputDim("Left"); - auto right_dims = ctx->GetInputDim("Right"); - auto left_grad_name = framework::GradVarName("Left"); - auto right_grad_name = framework::GradVarName("Right"); - - if (ctx->HasOutput(left_grad_name)) { - ctx->SetOutputDim(left_grad_name, left_dims); - } - - if (ctx->HasOutput(right_grad_name)) { - ctx->SetOutputDim(right_grad_name, right_dims); - } - } -}; - -template -class RankLossGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("rank_loss_grad"); - op->SetInput("Label", this->Input("Label")); - op->SetInput("Left", this->Input("Left")); - op->SetInput("Right", this->Input("Right")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("Left"), this->InputGrad("Left")); - op->SetOutput(framework::GradVarName("Right"), this->InputGrad("Right")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR(rank_loss, - ops::RankLossOp, - ops::RankLossOpMaker, - ops::RankLossGradMaker, - ops::RankLossGradMaker); -REGISTER_OPERATOR(rank_loss_grad, ops::RankLossGradOp); - -PD_REGISTER_STRUCT_KERNEL( - rank_loss, CPU, ALL_LAYOUT, ops::RankLossKernel, float) {} -PD_REGISTER_STRUCT_KERNEL( - rank_loss_grad, CPU, ALL_LAYOUT, ops::RankLossGradKernel, float) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_STRUCT_KERNEL( - rank_loss, GPU, ALL_LAYOUT, ops::RankLossKernel, float) {} -PD_REGISTER_STRUCT_KERNEL( - rank_loss_grad, GPU, ALL_LAYOUT, ops::RankLossGradKernel, float) {} -#endif diff --git a/paddle/fluid/operators/rank_loss_op.h b/paddle/fluid/operators/rank_loss_op.h deleted file mode 100644 index 03e0a094555e3..0000000000000 --- a/paddle/fluid/operators/rank_loss_op.h +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" - -namespace paddle { -namespace operators { - -template -class RankLossKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const { - auto* out_t = ctx.Output("Out"); - auto* label_t = ctx.Input("Label"); - auto* left_t = ctx.Input("Left"); - auto* right_t = ctx.Input("Right"); - out_t->mutable_data(ctx.GetPlace()); - - auto out = framework::EigenVector::Flatten(*out_t); - auto label = framework::EigenVector::Flatten(*label_t); - auto left = framework::EigenVector::Flatten(*left_t); - auto right = framework::EigenVector::Flatten(*right_t); - - auto& dev = *ctx.template device_context().eigen_device(); - EigenRankLoss, T>::Eval( - dev, out, label, left, right); - } -}; - -template -class RankLossGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const { - auto* d_left_t = - ctx.Output(framework::GradVarName("Left")); - auto* d_right_t = - ctx.Output(framework::GradVarName("Right")); - - auto* d_out_t = ctx.Input(framework::GradVarName("Out")); - auto* label_t = ctx.Input("Label"); - auto* left_t = ctx.Input("Left"); - auto* right_t = ctx.Input("Right"); - - auto& dev = *ctx.template device_context().eigen_device(); - auto d_out = framework::EigenVector::Flatten(*d_out_t); - auto label = framework::EigenVector::Flatten(*label_t); - auto left = framework::EigenVector::Flatten(*left_t); - auto right = framework::EigenVector::Flatten(*right_t); - - // compute d_left - if (d_left_t) { - d_left_t->mutable_data(ctx.GetPlace()); - auto d_left = framework::EigenVector::Flatten(*d_left_t); - EigenRankLossGrad, T>::EvalLeft( - dev, d_left, d_out, label, left, right); - } - // compute d_right - if (d_right_t) { - d_right_t->mutable_data(ctx.GetPlace()); - auto d_right = framework::EigenVector::Flatten(*d_right_t); - EigenRankLossGrad, T>::EvalRight( - dev, d_right, d_out, label, left, right); - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index 4d3e79546fbef..35e76304b6c32 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -36,7 +36,7 @@ class BlockingQueue { : capacity_(capacity), speed_test_mode_(speed_test_mode) { PADDLE_ENFORCE_GT(capacity_, static_cast(0), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The capacity of a reader::BlockingQueue must be " "greater than 0, but received capacity is %d.", capacity_)); @@ -59,7 +59,7 @@ class BlockingQueue { PADDLE_ENFORCE_LT( queue_.size(), capacity_, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "The queue size cannot exceed the set queue capacity. Expected " "queue size is less than %d. But received %d", capacity_, @@ -86,7 +86,7 @@ class BlockingQueue { PADDLE_ENFORCE_LT( queue_.size(), capacity_, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "The queue size cannot exceed the set queue capacity. Expected " "queue size is less than %d. But received %d", capacity_, @@ -104,7 +104,7 @@ class BlockingQueue { if (!queue_.empty()) { PADDLE_ENFORCE_NOT_NULL( elem, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The holder to receive queue data is null pointer.")); *elem = queue_.front(); if (LIKELY(!speed_test_mode_)) { @@ -115,7 +115,7 @@ class BlockingQueue { } else { PADDLE_ENFORCE_EQ(closed_, true, - platform::errors::PermissionDenied( + phi::errors::PermissionDenied( "Blocking queue status error, if queue is empty " "when pop data, it should be closed.")); VLOG(3) << "queue is closed! return nothing."; @@ -168,11 +168,10 @@ class BlockingQueue { private: inline void EnforceNotKilled() { - PADDLE_ENFORCE_NE( - killed_, - true, - platform::errors::Fatal("Blocking queue is killed because the " - "data reader raises an exception.")); + PADDLE_ENFORCE_NE(killed_, + true, + phi::errors::Fatal("Blocking queue is killed because the " + "data reader raises an exception.")); } private: diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index cc5034c86f90f..15bbc9ff10965 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -127,7 +127,7 @@ void BufferedReader::ReadAsync(size_t i) { PADDLE_ENFORCE_EQ( cuda.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on GPU and CPU devices are not matched.")); } if (pin_memory_) { @@ -250,7 +250,7 @@ void BufferedReader::ReadAsync(size_t i) { PADDLE_ENFORCE_EQ( xpu.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on XPU and CPU devices are not matched. " "The number on XPU is %d, on CPU is %d", xpu.size(), @@ -308,7 +308,7 @@ void BufferedReader::ReadAsync(size_t i) { } else { PADDLE_ENFORCE_EQ(custom_device.size(), cpu.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input tensor number on CustomDevice and CPU " "devices are not matched. " "The number on CustomDevice is %d, on CPU is %d", diff --git a/paddle/fluid/operators/reader/create_ctr_reader_op.cc b/paddle/fluid/operators/reader/create_ctr_reader_op.cc index 8c38aaf528da0..de0dff6be2533 100644 --- a/paddle/fluid/operators/reader/create_ctr_reader_op.cc +++ b/paddle/fluid/operators/reader/create_ctr_reader_op.cc @@ -35,7 +35,7 @@ class CreateCTRReaderOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "No LoDTensorBlockingQueueHolder variable with name %s found", queue_name)); auto* queue_holder = diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc index 43fb5d9059c15..6a18e417a39bb 100644 --- a/paddle/fluid/operators/reader/create_custom_reader_op.cc +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -100,12 +100,12 @@ class CustomReaderInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "'CustomReaderInferShape' should only be invoked during " "compile time.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output decorated reader should not be null.")); const auto* sub_block = ctx->Attrs().Get("sub_block"); @@ -117,7 +117,7 @@ class CustomReaderInferShape : public framework::InferShapeBase { auto* sink_var = sub_block->FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( sink_var, - platform::errors::NotFound( + phi::errors::NotFound( "The sink variable is not found in CustomReader.")); res_dims.emplace_back(sink_var->GetShape()); res_lod_levels.push_back(sink_var->GetLoDLevel()); @@ -135,7 +135,7 @@ class CustomReaderInferVarType : public framework::VarTypeInference { auto& out_var_name = ctx->Output("Out")[0]; PADDLE_ENFORCE_EQ(ctx->HasVar(out_var_name), true, - platform::errors::NotFound( + phi::errors::NotFound( "The output reader variable should not be null.")); ctx->SetType(out_var_name, framework::proto::VarType::READER); @@ -148,7 +148,7 @@ class CustomReaderInferVarType : public framework::VarTypeInference { framework::VarDesc* var = sub_block->FindVar(var_name); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound( + phi::errors::NotFound( "The sink variable is not found in CustomReader.")); res_data_types.emplace_back(var->GetDataType()); } @@ -167,7 +167,7 @@ void CustomReader::ReadNextImpl(paddle::framework::LoDTensorArray* out) { PADDLE_ENFORCE_EQ( source_var_names_.size(), underlying_outs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of source_var_names(%d) and the size of " "underlying_outs(%d) are not consistent. Each feeding element " "must have its own source variable.", @@ -192,8 +192,8 @@ void CustomReader::ReadNextImpl(paddle::framework::LoDTensorArray* out) { auto* var = exe_scope->FindVar(sink_var_names_[i]); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("The variable %s is not in current scope.", - sink_var_names_[i])); + phi::errors::NotFound("The variable %s is not in current scope.", + sink_var_names_[i])); const auto& tensor = var->Get(); framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); } diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index 5cea8f5963111..975a32e9ab496 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -35,9 +35,9 @@ class CreateDoubleBufferReaderOp : public framework::OperatorBase { dynamic_cast(out->Get().get()); PADDLE_ENFORCE_NOT_NULL( decorated_reader, - platform::errors::NotFound("The inited reader should be a " - "DecoratedReader when running " - "create_double_buffer_reader op.")); + phi::errors::NotFound("The inited reader should be a " + "DecoratedReader when running " + "create_double_buffer_reader op.")); if (decorated_reader->UnderlyingReader() == underlying_reader.Get()) { return; } diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc index e9edce4423e26..c55e77fc14787 100644 --- a/paddle/fluid/operators/reader/create_py_reader_op.cc +++ b/paddle/fluid/operators/reader/create_py_reader_op.cc @@ -35,7 +35,7 @@ class CreatePyReaderOp : public framework::OperatorBase { auto* queue_holder_var = scope.FindVar(queue_name); PADDLE_ENFORCE_NOT_NULL( queue_holder_var, - platform::errors::NotFound( + phi::errors::NotFound( "No LoDTensorBlockingQueueHolder variable with name %s found. This " "may be because the DataLoader is defined in another Scope, " "which is different from the Scope when calling Executor.run.", diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h index da265a6fce76d..208377937c130 100644 --- a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -92,15 +92,15 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { std::lock_guard lock(init_mutex_); PADDLE_ENFORCE_GE(dev_cnt, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Device count to init " "OrderedMultiDeviceLoDTensorBlockingQueue" " must be larger than 1")); if (!queues_.empty()) { - PADDLE_ENFORCE_EQ(queues_.size(), - dev_cnt, - platform::errors::InvalidArgument( - "queues should be only inited once")); + PADDLE_ENFORCE_EQ( + queues_.size(), + dev_cnt, + phi::errors::InvalidArgument("queues should be only inited once")); return; } @@ -119,7 +119,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { PADDLE_ENFORCE_LT( idx, queues_.size(), - platform::errors::OutOfRange("The queue index is out of range")); + phi::errors::OutOfRange("The queue index is out of range")); return queues_[idx]; } @@ -184,7 +184,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueue { void EnforceIsInited() const { PADDLE_ENFORCE_EQ(queues_.empty(), false, - platform::errors::NotFound("queue has not been inited")); + phi::errors::NotFound("queue has not been inited")); } private: @@ -209,8 +209,8 @@ class LoDTensorBlockingQueueHolder { PADDLE_ENFORCE_EQ( queue_, nullptr, - platform::errors::AlreadyExists("LoDTensorBlockingQueueHolder::" - "InitOnce() can only be called once")); + phi::errors::AlreadyExists("LoDTensorBlockingQueueHolder::" + "InitOnce() can only be called once")); queue_ = std::make_unique(capacity, speed_test_mode); } @@ -228,7 +228,7 @@ class OrderedMultiDeviceLoDTensorBlockingQueueHolder { void InitOnce(size_t capacity, bool speed_test_mode = false) { PADDLE_ENFORCE_EQ(queue_, nullptr, - platform::errors::AlreadyExists( + phi::errors::AlreadyExists( "OrderedMultiDeviceLoDTensorBlockingQueueHolder::" "InitOnce() can only be called once")); queue_ = std::make_unique( diff --git a/paddle/fluid/operators/reader/py_reader.cc b/paddle/fluid/operators/reader/py_reader.cc index f0c0409a729a5..d71f4b9e9ca95 100644 --- a/paddle/fluid/operators/reader/py_reader.cc +++ b/paddle/fluid/operators/reader/py_reader.cc @@ -25,7 +25,7 @@ PyReader::PyReader( const std::vector& need_check_feed) : framework::FileReader(dims, var_types, need_check_feed) { PADDLE_ENFORCE_NOT_NULL(queue, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "LoDTensorBlockingQueue must not be null.")); queue_ = queue; } diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 1c65669adc3a9..d88dfb4962a9c 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -51,7 +51,7 @@ class ReadInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( reader_dims.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reader's dim number doesn't match the output number.")); ctx->SetOutputsDim("Out", reader_dims); auto in_desc = @@ -61,7 +61,7 @@ class ReadInferShape : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( in_lod_levels.size(), out_var_ptrs.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "LoDLevels of Input(Reader) must be the same as the " "number of Outputs(Out).")); for (size_t i = 0; i < out_var_ptrs.size(); ++i) { @@ -82,7 +82,7 @@ class ReadInferVarType : public framework::StaticGraphVarTypeInference { auto dtypes = GetDataTypes(ctx, reader_name); PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of input reader's dtypes do not match " "the output variable number.")); for (size_t i = 0; i < dtypes.size(); ++i) { @@ -120,8 +120,8 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( ins.size(), out_arg_names.size(), - platform::errors::InvalidArgument("input data number and output data " - "number of read_op do not match")); + phi::errors::InvalidArgument("input data number and output data " + "number of read_op do not match")); const std::vector& shapes = reader->Shapes(); const std::vector& var_types = @@ -130,7 +130,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( out_arg_names.size(), need_check_feed.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output size of read_op and the number of fed " "variables of reader do not match. Received size of output is %d, " "number of fed variables of reader is %d", @@ -145,7 +145,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( DimensionIsCompatibleWith(shapes[i], in_dims), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The fed Variable %s should have dimensions = %d, " "shape = [%s], but received fed shape [%s]", out_arg_names[i], @@ -155,7 +155,7 @@ class ReadOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( framework::TransToProtoVarType(ins[i].dtype()), var_types[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The data type of fed Variable %s must be %s, but received %s", out_arg_names[i], var_types[i], diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index e62d728b6f017..9a1693c5061c7 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -69,13 +69,13 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet("'FileReaderInferShape' should only " - "be invoked during compile time.")); + phi::errors::PreconditionNotMet("'FileReaderInferShape' should only " + "be invoked during compile time.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound("The output file reader should not be null.")); + phi::errors::NotFound("The output file reader should not be null.")); bool use_data_config = ctx->Attrs().Get("use_data_config"); if (use_data_config) { const auto shape_concat = @@ -88,7 +88,7 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( lod_levels.size(), shapes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of 'lod_levels'(%d) doesn't match the number " "of 'shapes'(%d).", lod_levels.size(), @@ -97,16 +97,16 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( dtypes.size(), shapes.size(), - platform::errors::InvalidArgument("The number of 'dtypes'(%d) doesn't " - "match the number of 'shapes'(%d).", - dtypes.size(), - shapes.size())); + phi::errors::InvalidArgument("The number of 'dtypes'(%d) doesn't " + "match the number of 'shapes'(%d).", + dtypes.size(), + shapes.size())); const auto need_check_feed = ctx->Attrs().Get>("need_check_feed"); PADDLE_ENFORCE_EQ( need_check_feed.size(), shapes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of 'need_check_feed'(%d) doesn't match the " "number of 'shapes'(%d).", need_check_feed.size(), @@ -127,18 +127,18 @@ void DecoratedReaderInferShape::operator()( PADDLE_ENFORCE_NE( ctx->IsRuntime(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "'DecoratedReaderInferShape' should only be invoked during " "compile time.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("UnderlyingReader"), - true, - platform::errors::NotFound( - "Input(UnderlyingReader) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "The output decorated reader should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("UnderlyingReader"), + true, + phi::errors::NotFound("Input(UnderlyingReader) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), + true, + phi::errors::NotFound("The output decorated reader should not be null.")); ctx->SetReaderDims("Out", ctx->GetReaderDims("UnderlyingReader")); framework::VarDesc* in_reader = PADDLE_GET( diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index a5d4ce5e29828..42856d5b3c12a 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -77,10 +77,10 @@ StepScopes::StepScopes(const platform::DeviceContext &dev_ctx, is_train_(is_train), is_backward_(is_backward) { size_t num_step_scopes = is_train ? seq_len : 2; - PADDLE_ENFORCE_EQ(is_train || !is_backward, - true, - platform::errors::PreconditionNotMet( - "Cannot backward when is not training")); + PADDLE_ENFORCE_EQ( + is_train || !is_backward, + true, + phi::errors::PreconditionNotMet("Cannot backward when is not training")); if (!is_backward_) { ClearStepScopes(dev_ctx, const_cast(&parent), scopes); scopes->reserve(static_cast(num_step_scopes)); @@ -101,7 +101,7 @@ void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx, framework::Scope *parent_scope) { PADDLE_ENFORCE_EQ(is_backward_, true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot get backward next scope when is forward")); if (counter_ + 2 == scopes_->size()) { parent_scope->DeleteScope((*scopes_)[counter_ + 1]); @@ -114,7 +114,7 @@ void StepScopes::BackwardNext(const platform::DeviceContext &dev_ctx, void StepScopes::ForwardNext() { PADDLE_ENFORCE_EQ(is_backward_, false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "Cannot get forward next scope when is backward")); ++counter_; } @@ -126,7 +126,7 @@ framework::Scope &StepScopes::GetScope(size_t scope_id) const { PADDLE_ENFORCE_LT( scope_id, scopes_->size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input scope_id is greater than scopes size in RecurrentOp")); return *(*scopes_)[scope_id]; } @@ -149,16 +149,16 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { PADDLE_ENFORCE_EQ( all_inputs.empty(), false, - platform::errors::InvalidArgument("RecurrentOp gets empty input")); + phi::errors::InvalidArgument("RecurrentOp gets empty input")); for (auto &iname : all_inputs) { auto *var = scope.FindVar(iname); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp finds var %s is NULL", iname)); PADDLE_ENFORCE_EQ( var->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp only accepts phi::DenseTensor as input but " "input var %s is not phi::DenseTensor", iname)); @@ -168,7 +168,7 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { } else { PADDLE_ENFORCE_EQ(seq_len, dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sequence length of input %s in RecurrentOp is NOT " "equal to sequence length of previous input", iname)); @@ -176,7 +176,7 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const { } PADDLE_ENFORCE_GE(seq_len, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "RecurrentOp gets invalid sequence length. Expected " "seq_len >= 0. Received seq_len = %d", seq_len)); @@ -331,9 +331,9 @@ StepScopes RecurrentOp::CreateStepScopes(const platform::DeviceContext &dev_ctx, // fault in multithreading in eval process. The performance drop of // adding mutex need to be fixed. auto *var = scope.FindVar(Output(kStepScopes)); - PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( - "RecurrentOp gets empty StepScopes var")); + PADDLE_ENFORCE_NOT_NULL( + var, + phi::errors::InvalidArgument("RecurrentOp gets empty StepScopes var")); return StepScopes(dev_ctx, scope, var->GetMutable(), @@ -413,7 +413,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE_EQ(ex_state_grads.size(), cur_state_grads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "lengths of ex_states and cur_states are not " "equal in RecurrentGradOp")); for (size_t i = 0; i < ex_state_grads.size(); ++i) { @@ -475,7 +475,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of Parameters and ParamGrads are not equal " "in RecurrentGradOp")); @@ -566,7 +566,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, // Delete the scope of StepScopes auto *var = scope.FindVar(Input(kStepScopes)); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "StepScopes var is empty in RecurrentGradOp")); auto *step_scopes = var->GetMutable(); ClearStepScopes(dev_ctx, const_cast(&scope), step_scopes); @@ -578,7 +578,7 @@ StepScopes RecurrentGradOp::CreateStepScopes( size_t seq_len) const { auto *var = scope.FindVar(Input(kStepScopes)); PADDLE_ENFORCE_NOT_NULL(var, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "StepScopes var is empty in RecurrentGradOp")); return StepScopes(dev_ctx, scope, @@ -735,27 +735,27 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { .Get>(RecurrentBase::kExStates) .size(), 0, - platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kExStates)); + phi::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kExStates)); PADDLE_ENFORCE_EQ( ctx->Attrs() .Get>(RecurrentBase::kStates) .size(), 0, - platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kStates)); + phi::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kStates)); } PADDLE_ENFORCE_EQ( ctx->HasInputs(RecurrentBase::kInputs), true, - platform::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kInputs)); + phi::errors::InvalidArgument("The input(%s) should not be empty.", + RecurrentBase::kInputs)); PADDLE_ENFORCE_EQ( ctx->HasInputs(RecurrentBase::kOutputs), true, - platform::errors::InvalidArgument("The input(%s) should not be empty.", - RecurrentBase::kOutputs)); + phi::errors::InvalidArgument("The input(%s) should not be empty.", + RecurrentBase::kOutputs)); // In some case the kInitialStates is empty. if (ctx->HasInputs(RecurrentBase::kInitialStates) && @@ -769,7 +769,7 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { ctx->HasOutputs(framework::GradVarName(RecurrentBase::kInputs), /*allow_null=*/true), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of(%s) should not be empty.", framework::GradVarName(RecurrentBase::kInputs))); ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kInputs), @@ -780,7 +780,7 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { PADDLE_ENFORCE_EQ( ctx->HasOutputs(framework::GradVarName(RecurrentBase::kParameters)), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output of(%s) should not be empty.", framework::GradVarName(RecurrentBase::kParameters))); ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kParameters), diff --git a/paddle/fluid/operators/recurrent_op.h b/paddle/fluid/operators/recurrent_op.h index d027205429513..b1be9a5c0389e 100644 --- a/paddle/fluid/operators/recurrent_op.h +++ b/paddle/fluid/operators/recurrent_op.h @@ -122,7 +122,7 @@ class RecurrentBase : public framework::OperatorBase { bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of source vars and destination vars are not " "equal in LinkTensor.")); for (size_t i = 0; i < dst_vars.size(); ++i) { @@ -148,7 +148,7 @@ class RecurrentBase : public framework::OperatorBase { bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sizes of source vars and destination vars are not " "equal in LinkTensor.")); for (size_t i = 0; i < dst_vars.size(); ++i) { @@ -180,8 +180,8 @@ class RecurrentBase : public framework::OperatorBase { } PADDLE_ENFORCE_NOT_NULL( src_var, - platform::errors::NotFound("Source variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Source variable %s is not found.", + src_var_name)); auto &src_tensor = src_var->Get(); auto *dst_var = dst_scope->Var(dst_var_name); @@ -203,13 +203,13 @@ class RecurrentBase : public framework::OperatorBase { auto *src_var = src_scope.FindVar(src_var_name); PADDLE_ENFORCE_NOT_NULL( src_var, - platform::errors::NotFound("Source variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Source variable %s is not found.", + src_var_name)); auto &src_tensor = src_var->Get(); PADDLE_ENFORCE_NOT_NULL( dst_var, - platform::errors::NotFound("Destination variable %s is not found.", - src_var_name)); + phi::errors::NotFound("Destination variable %s is not found.", + src_var_name)); auto *dst_tensor = dst_var->GetMutable(); callback(src_tensor, dst_tensor); } diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index b23fee1a012df..a7776609b79b8 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -57,12 +57,12 @@ class XPULogsumexpKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); int r = xpu::logsumexp( dev_ctx.x_context(), input_data, output_data, xdims, axis_shape); - PADDLE_ENFORCE_EQ(r, - xpu::Error_t::SUCCESS, - platform::errors::External( - "XPU logsumexp kernel error! error value[%d %]", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, + xpu::Error_t::SUCCESS, + phi::errors::External("XPU logsumexp kernel error! error value[%d %]", + r, + XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h b/paddle/fluid/operators/reduce_ops/reduce_mean_op.h index 017fab6308821..eb82be83ba517 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.h @@ -62,7 +62,7 @@ struct FP16MeanGradFunctor { int size) { dx->device(place) = (dy->template cast().broadcast(dim) / dx->template cast().constant(size)) - .template cast(); + .template cast(); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 428c8d2c9a02c..2e14acddc1485 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -87,7 +87,7 @@ static inline std::vector GetReduceDim(const std::vector& dims, for (auto e : dims) { PADDLE_ENFORCE_LT(e, dim_size, - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ReduceBaseOp: invalid axis, when x_dims is %d, " "axis[i] should less than x_dims, but got %d.", dim_size, @@ -511,7 +511,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { auto dims = ctx->Attrs().Get>("dim"); PADDLE_ENFORCE_GT(dims.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input dim dimensions of ReduceBaseOp " "should be greater than 0. But received the dim " "dimensions of Reduce = %d.", @@ -521,7 +521,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( dims[i], x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)] " "which dimension = %d. But received dim index = %d.", @@ -531,7 +531,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( dims[i], -x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)] " "which dimension = %d. But received dim index = %d.", @@ -628,7 +628,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "float16 can only be used on GPU or XPU place")); } return phi::KernelKey(input_data_type, ctx.GetPlace()); @@ -670,7 +670,7 @@ class ReduceGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( dims[i], x_rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The reduce dim index %d should be in the " "range [-dimension(X), dimension(X)], " "which dimension = %d. But received dim index = %d.", diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index 35cc8fea6d0ba..31279af17e176 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -34,10 +34,9 @@ void XPUReduce(const framework::ExecutionContext& context, T*, const std::vector&, const std::vector&)> func) { - PADDLE_ENFORCE_EQ( - platform::is_xpu_place(context.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on XPU.")); + PADDLE_ENFORCE_EQ(platform::is_xpu_place(context.GetPlace()), + true, + phi::errors::Unavailable("This kernel only runs on XPU.")); bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); auto* x = context.Input("X"); @@ -48,7 +47,7 @@ void XPUReduce(const framework::ExecutionContext& context, int out_dtype = context.Attr("out_dtype"); PADDLE_ENFORCE_EQ(out_dtype == -1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "XPU only support out_dtype == -1 in reduce op.")); const auto* x_data = x->data(); @@ -88,16 +87,16 @@ void XPUReduce(const framework::ExecutionContext& context, dev_ctx.x_context(), x_data, y_data, x->numel() * sizeof(T)); PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::External("XPU copy in reduce op return " - "wrong value[%d %s].", - r, - XPUAPIErrorMsg[r])); + phi::errors::External("XPU copy in reduce op return " + "wrong value[%d %s].", + r, + XPUAPIErrorMsg[r])); } else { int r = func(dev_ctx.x_context(), x_data, y_data, xdims, reduce_dims); PADDLE_ENFORCE_EQ( r == xpu::Error_t::SUCCESS, true, - platform::errors::External( + phi::errors::External( "XPU reduce op return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); } } diff --git a/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake b/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake index 839bb1ac7306c..da67c2c8d8b01 100644 --- a/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake +++ b/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake @@ -4,8 +4,7 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc reduce_all_op.cc reduce_any_op.cc) -register_unity_group(cu reduce_all_op.cu reduce_any_op.cu) + # The following groups are to make better use of `/MP` which MSVC's parallel # compilation instruction when compiling in Unity Build. register_unity_group(cu frobenius_norm_op.cu) diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc index 319fad9b39231..5ce59fc54d6a6 100644 --- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc @@ -210,10 +210,10 @@ class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase { size_t out_offset = 0; out->mutable_lod()->clear(); for (auto &item : rank_table.items()) { - PADDLE_ENFORCE_LT(item.index, - absolute_table.size(), - platform::errors::OutOfRange( - "The value of rank_table is out of range.")); + PADDLE_ENFORCE_LT( + item.index, + absolute_table.size(), + phi::errors::OutOfRange("The value of rank_table is out of range.")); out_offset = CopyTensorAndLod( place, absolute_table[item.index], x, out, out_offset); } diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index d0af82510bdc4..e276ef2082fb6 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -29,12 +29,12 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of RepeatInterleaveOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of RepeatInterleaveOp should not be null.")); auto input_dim = ctx->GetInputDim("X"); @@ -43,7 +43,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( dim < input_dim.size() && dim >= (0 - input_dim.size()), true, - platform::errors::OutOfRange( + phi::errors::OutOfRange( "Attr(dim) is out of range, It's expected " "to be in range of [-%d, %d]. But received Attr(dim) = %d.", input_dim.size(), @@ -58,7 +58,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { repeats_dim.size() == 1 || (repeats_dim.size() == 2 && repeats_dim[1] == 1), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' of Input(RepeatsTensor) must be 1-D tensor. " "But received: the 'shape' of Input(Index) is [%s], " "the dimension of Input(Index) is [%d].", @@ -67,7 +67,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(repeats_dim[0] != 0, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The length of Input(RepeatsTensor) can't be 0.")); if (dim < 0) { @@ -98,14 +98,14 @@ class RepeatInterleaveGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), - true, - platform::errors::InvalidArgument( - "Output(X@GRAD) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) should be not null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput(framework::GradVarName("X")), + true, + phi::errors::InvalidArgument("Output(X@GRAD) should be not null.")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 34d80604ae8b0..d984edc4c4172 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -52,11 +52,11 @@ class ReshapeOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of ReshapeOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of ReshapeOp should not be null.")); if (ctx->IsRuntime()) { @@ -76,7 +76,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( ShapeTensor.size(), 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When `shape` in ReshapeOp is a list or tuple " "which contains Tensor, the shape's size can't be zero. " "But received shape's size is %d.", @@ -89,7 +89,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. But received shape[%d] " "= 0, X's dimensions = %d, X's shape = [%s].", @@ -155,7 +155,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( unk_dim_idx, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", common::make_ddim(shape), @@ -165,7 +165,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( static_cast(i), in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The index of 0 in `shape` must be less than " "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " @@ -178,7 +178,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( shape[i], 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", @@ -204,7 +204,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( output_shape[unk_dim_idx] * capacity, -in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' attribute in ReshapeOp is invalid. " "The input tensor X'size must be divisible by known " "capacity of 'shape'. " @@ -222,7 +222,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X'size must be equal to the capacity of " "'shape'. " @@ -242,7 +242,7 @@ class ReshapeOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE( capacity, in_size, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The 'shape' in ReshapeOp is invalid. " "The input tensor X's shape = [%s], X's capacity = %d." "But the target shape of Out is [%s], the " @@ -359,11 +359,11 @@ class ReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) shouldn't be null.")); + phi::errors::InvalidArgument("Input(X) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) shouldn't be null.")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } @@ -613,11 +613,11 @@ class Reshape2GradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("XShape"), true, - platform::errors::InvalidArgument("Input(XShape) shouldn't be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) shouldn't be null.")); + phi::errors::InvalidArgument("Input(XShape) shouldn't be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Input(Out@GRAD) shouldn't be null.")); // Construct MetaTensor for InferMeta Func using CompatMetaTensor = framework::CompatMetaTensor; @@ -774,7 +774,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, ops::ReshapeKernel, int64_t, ops::ReshapeKernel, - plat::float16, + phi::dtype::float16, ops::ReshapeKernel, plat::bfloat16, ops::ReshapeKernel); @@ -791,7 +791,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, ops::ReshapeGradKernel, uint8_t, ops::ReshapeGradKernel, - plat::float16, + phi::dtype::float16, ops::ReshapeGradKernel, plat::bfloat16, ops::ReshapeGradKernel); diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 04633c9e8e5dd..38d77de90ace4 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -39,7 +39,7 @@ class RowConvOp : public framework::OperatorWithKernel { auto filter_dims = ctx->GetInputDim("Filter"); PADDLE_ENFORCE_EQ(filter_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Filter)'s dimensions should be 2. Received: " "Input(Filter)'s shape: [%s].", filter_dims)); diff --git a/paddle/fluid/operators/rrelu_op.cc b/paddle/fluid/operators/rrelu_op.cc index 53f6969695e8e..3111ad4e5015d 100644 --- a/paddle/fluid/operators/rrelu_op.cc +++ b/paddle/fluid/operators/rrelu_op.cc @@ -52,7 +52,7 @@ class RReluOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& lower) { PADDLE_ENFORCE_EQ(lower >= 0.0f && lower < 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'RRelu_lower' must be between 0.0 and 1.0.")); }); float defalut_upper = 1. / 3.; @@ -61,7 +61,7 @@ class RReluOpMaker : public framework::OpProtoAndCheckerMaker { .AddCustomChecker([](const float& upper) { PADDLE_ENFORCE_EQ(upper > 0.0f && upper <= 1.0f, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "'RRelu_upper' must be between 0.0 and 1.0.")); }); AddComment(R"DOC( diff --git a/paddle/fluid/operators/run_program_op.cc b/paddle/fluid/operators/run_program_op.cc index ffb024d165d36..0dc2d8ea0e20d 100644 --- a/paddle/fluid/operators/run_program_op.cc +++ b/paddle/fluid/operators/run_program_op.cc @@ -24,13 +24,13 @@ class RunProgramOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), - true, - platform::errors::NotFound( - "Input(X) of RunProgramOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInputs("X"), + true, + phi::errors::NotFound("Input(X) of RunProgramOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutputs("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of RunProgramOp should not be null.")); } @@ -173,12 +173,12 @@ class RunProgramGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of RunProgramGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInputs(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of RunProgramGradOp should not be null.")); // NOTE: The X@GRAD and Params@GRAD may not exist, // because they can be set stop_gradient = True diff --git a/paddle/fluid/operators/run_program_op.cu b/paddle/fluid/operators/run_program_op.cu index 9a2b6851a4c73..1d9011429577b 100644 --- a/paddle/fluid/operators/run_program_op.cu +++ b/paddle/fluid/operators/run_program_op.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/run_program_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index 6006d7556423c..895a99608c902 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -32,7 +32,7 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/variable.h" #ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/onednn_helper.h" #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/operators/cuda_graph_with_in_out.h" @@ -58,7 +58,7 @@ static void CheckInputVarStatus(const Variable &var, const std::string &var_name) { PADDLE_ENFORCE_EQ(var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input variable %s of " "RunProgram(Grad)Op holds " "wrong type. Expect type is phi::DenseTensor, but " @@ -68,10 +68,10 @@ static void CheckInputVarStatus(const Variable &var, PADDLE_ENFORCE_EQ( var.Get().IsInitialized(), true, - platform::errors::InvalidArgument("The tensor in input variable %s of " - "RunProgram(Grad)Op " - "is not initialized.", - var_name)); + phi::errors::InvalidArgument("The tensor in input variable %s of " + "RunProgram(Grad)Op " + "is not initialized.", + var_name)); } static void CheckOutputVarStatus(const Variable &src_var, @@ -81,7 +81,7 @@ static void CheckOutputVarStatus(const Variable &src_var, PADDLE_ENFORCE_EQ( src_var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output variable %s get from " "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is phi::DenseTensor, but receive type is " @@ -90,7 +90,7 @@ static void CheckOutputVarStatus(const Variable &src_var, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor in output variable %s get from " "RunProgram(Grad)Op's internal " "scope is not initialized.", @@ -99,7 +99,7 @@ static void CheckOutputVarStatus(const Variable &src_var, PADDLE_ENFORCE_EQ( src_var.IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The output variable %s get from " "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is SelectedRows, but receive type is %s.", @@ -107,14 +107,14 @@ static void CheckOutputVarStatus(const Variable &src_var, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().value().IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor in output variable %s get from " "RunProgram(Grad)Op's " "internal scope is not initialized.", var_name)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The RunProgram(Grad)Op only support output " "variable of type phi::DenseTensor or SelectedRows, " "but received variable %s's type is %s", @@ -173,10 +173,10 @@ static void ShareVarsFromScope(const std::vector &vars, auto *var = scope->FindVar(var_names[i]); PADDLE_ENFORCE_NOT_NULL( var, - platform::errors::NotFound("The output variable %s is not in " - "RunProgram(Grad)Op'" - "s internal scope.", - var_names[i])); + phi::errors::NotFound("The output variable %s is not in " + "RunProgram(Grad)Op'" + "s internal scope.", + var_names[i])); CheckOutputVarStatus(*var, *vars[i], var_names[i]); VariableShare(*var, vars[i]); } @@ -312,14 +312,14 @@ class RunProgramOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( use_cuda_graph, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If not provide OutScope then must run under cuda graph mode.")); inner_scope = std::make_unique(); } else { PADDLE_ENFORCE_EQ( out_scope_vec->size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should only hold one scope.")); } @@ -511,7 +511,7 @@ class RunProgramGradOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( out_scope_vec->size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should only hold one scope.")); framework::Scope *global_inner_scope = out_scope_vec->front(); @@ -519,7 +519,7 @@ class RunProgramGradOpKernel : public framework::OpKernel { VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num; PADDLE_ENFORCE_GT(sub_scope_num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The OutScope of RunProgramGradOp should hold at " "least one sub scope.")); diff --git a/paddle/fluid/operators/sampling_id_op.cc b/paddle/fluid/operators/sampling_id_op.cc deleted file mode 100644 index 5df5270976ca4..0000000000000 --- a/paddle/fluid/operators/sampling_id_op.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sampling_id_op.h" - -namespace paddle { -namespace operators { - -class SamplingIdOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SampleIn"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "X", "SampleOut"); - PADDLE_ENFORCE_LT( - ctx->Attrs().Get("min"), - ctx->Attrs().Get("max"), - platform::errors::InvalidArgument( - "min must less then max, but here min is %f, max is %f", - ctx->Attrs().Get("min"), - ctx->Attrs().Get("max"))); - - auto input_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - input_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(X, Filter) should be 2-D tensor. But X dim is %d", - input_dims.size())); - - auto dim0 = input_dims[0]; - framework::DDim dims = common::make_ddim({dim0}); - ctx->SetOutputDim("Out", dims); - ctx->ShareLoD("X", "Out"); - } -}; - -class SamplingIdOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "The input tensor of softmax. " - "2-D with shape [batch_size, input_feature_dimensions]."); - AddOutput("Out", "SamplingId data tensor."); - AddComment(R"DOC( -SamplingId Operator. -A layer for sampling id from multinomial distribution from the - input. Sampling one id for one sample.)DOC"); - AddAttr("min", "Minimum value of random. (float, default 0.0).") - .SetDefault(0.0f); - AddAttr("max", "Maximun value of random. (float, default 1.0).") - .SetDefault(1.0f); - AddAttr( - "seed", - "Random seed used for the random number engine. " - "0 means use a seed generated by the system." - "Note that if seed is not 0, this operator will " - "generate the same random numbers every time. (int, default 0).") - .SetDefault(0); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - sampling_id, - ops::SamplingIdOp, - ops::SamplingIdOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); - -PD_REGISTER_STRUCT_KERNEL(sampling_id, - CPU, - ALL_LAYOUT, - paddle::operators::SamplingIdKernel, - float, - double) {} diff --git a/paddle/fluid/operators/sampling_id_op.cu b/paddle/fluid/operators/sampling_id_op.cu deleted file mode 100644 index 2ec00d125bcab..0000000000000 --- a/paddle/fluid/operators/sampling_id_op.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/fluid/operators/sampling_id_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sampling_id, - GPU, - ALL_LAYOUT, - paddle::operators::SamplingIdKernel, - float, - double) {} diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h deleted file mode 100644 index 730d84c2a651e..0000000000000 --- a/paddle/fluid/operators/sampling_id_op.h +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/generator.h" - -namespace paddle { -namespace operators { - -template -class SamplingIdKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* input = context.Input("X"); - const int batch_size = static_cast(input->dims()[0]); - const int width = static_cast(input->dims()[1]); - - PADDLE_ENFORCE_GE( - batch_size, - 0, - platform::errors::InvalidArgument( - "batch_size(dims[0]) must be nonnegative. but it is %d.", - batch_size)); - PADDLE_ENFORCE_GE( - width, - 0, - platform::errors::InvalidArgument( - "width(dims[1]) must be nonnegative. but it is %d.", width)); - - std::vector ins_vector; - framework::TensorToVector(*input, context.device_context(), &ins_vector); - - unsigned int seed = static_cast(context.Attr("seed")); - - std::uniform_real_distribution dist( - static_cast(context.Attr("min")), - static_cast(context.Attr("max"))); - - auto engine = phi::GetCPURandomEngine(seed); - std::vector ids(batch_size); - for (int i = 0; i < batch_size; ++i) { - T r = dist(*engine); - int idx = width - 1; - for (int j = 0; j < width; ++j) { - if ((r -= ins_vector[i * width + j]) < 0) { - idx = j; - break; - } - } - ids[i] = int64_t(idx); - } - - std::vector out_dim; - out_dim.push_back(static_cast(batch_size)); - - phi::DenseTensor* output = context.Output("Out"); - output->Resize(common::make_ddim(out_dim)); - output->mutable_data(context.GetPlace()); - framework::TensorFromVector(ids, context.device_context(), output); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sampling_id_op_xpu.cc b/paddle/fluid/operators/sampling_id_op_xpu.cc deleted file mode 100644 index 9fd0193733e6e..0000000000000 --- a/paddle/fluid/operators/sampling_id_op_xpu.cc +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/fluid/operators/sampling_id_op.h" -#include "paddle/fluid/platform/device_context.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL( - sampling_id, XPU, ALL_LAYOUT, ops::SamplingIdKernel, float, double) {} diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index f5c3fb9969f1e..60c844678924b 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -185,7 +185,7 @@ class SaveCombineOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GT(inp_var_names.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of variables to be saved is %d, expect " "it to be greater than 0.", inp_var_names.size())); @@ -199,12 +199,12 @@ class SaveCombineOpKernel : public framework::OpKernel { for (size_t i = 0; i < inp_vars.size(); i++) { PADDLE_ENFORCE_NOT_NULL( inp_vars[i], - platform::errors::InvalidArgument( - "Cannot find variable %s to save.", inp_var_names[i])); + phi::errors::InvalidArgument("Cannot find variable %s to save.", + inp_var_names[i])); PADDLE_ENFORCE_EQ( inp_vars[i]->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SaveCombine operator only supports saving " "phi::DenseTensor or Vocab variable, %s has wrong type.", inp_var_names[i])); @@ -222,12 +222,12 @@ class SaveCombineOpKernel : public framework::OpKernel { for (size_t i = 0; i < inp_vars.size(); i++) { PADDLE_ENFORCE_NOT_NULL( inp_vars[i], - platform::errors::InvalidArgument( - "Cannot find variable %s to save.", inp_var_names[i])); + phi::errors::InvalidArgument("Cannot find variable %s to save.", + inp_var_names[i])); PADDLE_ENFORCE_EQ( inp_vars[i]->IsType(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SaveCombine operator only supports saving " "phi::DenseTensor or Vocab variable, %s has wrong type.", inp_var_names[i])); diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index abf8365182483..579f31fbcf388 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -145,7 +145,7 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) { } #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \ defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) - PADDLE_THROW(platform::errors::Unimplemented("axpy is not supported")); + PADDLE_THROW(phi::errors::Unimplemented("axpy is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); @@ -175,7 +175,7 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) { } #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \ defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) - PADDLE_THROW(platform::errors::Unimplemented("axpy_noadd is not supported")); + PADDLE_THROW(phi::errors::Unimplemented("axpy_noadd is not supported")); #else lll = len & ~SSE_CUT_LEN_MASK; __m128x mm_alpha = _mm_load1_px(&alpha); @@ -194,7 +194,7 @@ inline void axpy_noadd(const int8_t* x, int8_t* y, size_t len, const float alpha) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "int8_t input of axpy_noadd is not supported")); } diff --git a/paddle/fluid/operators/select_input_op.cc b/paddle/fluid/operators/select_input_op.cc index 3b00aab8c8e89..8383a8bec3bd3 100644 --- a/paddle/fluid/operators/select_input_op.cc +++ b/paddle/fluid/operators/select_input_op.cc @@ -43,7 +43,7 @@ class SelectInputOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( output_branch, x_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Mask' in SelectInputOp is invalid. " "'Mask' must be less than the size of input vector 'X'. " "But received Mask = %d, X's size = %d.", diff --git a/paddle/fluid/operators/select_op_helper.h b/paddle/fluid/operators/select_op_helper.h index 2b7f884f6170c..33c5879de71f2 100644 --- a/paddle/fluid/operators/select_op_helper.h +++ b/paddle/fluid/operators/select_op_helper.h @@ -28,7 +28,7 @@ namespace operators { inline int GetBranchNumber(const phi::DenseTensor &mask) { PADDLE_ENFORCE_EQ(mask.numel(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The numel of Input(Mask) in SelectInputOp or " "SelectOutputOp must be 1. " "But received %d, and it's shape is [%s].", @@ -43,7 +43,7 @@ inline int GetBranchNumber(const phi::DenseTensor &mask) { defined(PADDLE_WITH_CUSTOM_DEVICE) || defined(PADDLE_WITH_XPU) framework::TensorCopySync(mask, platform::CPUPlace(), cpu_mask.get()); #else - PADDLE_THROW(platform::errors::PreconditionNotMet( + PADDLE_THROW(phi::errors::PreconditionNotMet( "This version of PaddlePaddle does NOT support GPU, " "but got GPU tensor 'Mask' in SelectInputOp or SelectOutputOp. " "Please compile PaddlePaddle WITH_GPU first.")); diff --git a/paddle/fluid/operators/select_output_op.cc b/paddle/fluid/operators/select_output_op.cc index 623d1bb5c6ce9..8f61ef7bb712a 100644 --- a/paddle/fluid/operators/select_output_op.cc +++ b/paddle/fluid/operators/select_output_op.cc @@ -55,7 +55,7 @@ class SelectOutputOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( output_branch, out_names.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input 'Mask' in SelectOutputOp is invalid. " "'Mask' must be less than the size of output vector 'Out'. " "But received Mask = %d, Out's size = %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc deleted file mode 100644 index dd65162b3aad4..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/sequence_ops/sequence_concat_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class SeqConcatOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The inputs of sequence concat op").AsDuplicable(); - AddOutput("Out", "The output of sequence concat op"); - AddComment( - "Sequence Concat Op\n" - "It will concat LoD tensors by its sequence information.\n" - "For example:\n" - " LoD of X1 = [0, 3, 7]\n" - " LoD of X2 = [0, 7, 9]\n" - " Result LoD is [0, (3+7), (7+9)]\n" - " i.e.[0, 10, 16]\n"); - } -}; - -class SequenceConcatOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *context) const override { - PADDLE_ENFORCE_EQ( - context->HasInputs("X"), - true, - platform::errors::NotFound("SequenceConcatOp Input(X) of Sequence " - "Concat Op should not be null.")); - PADDLE_ENFORCE_EQ( - context->HasOutput("Out"), - true, - platform::errors::NotFound("SequenceConcatOp Output(Out) of Sequence " - "Concat Op should not be null.")); - - PADDLE_ENFORCE_GT(context->Inputs("X").size(), - 1, - platform::errors::InvalidArgument( - "The number of SequenceConcatOp inputs should be " - "greater than 1. But " - "the number of inputs we received is %d", - context->Inputs("X").size())); - auto x_dims = context->GetInputsDim("X"); - int64_t batch_size = 0; - int64_t feature_size = 0; - std::vector out_dims; - for (auto &x_dim : x_dims) { - if (out_dims.empty()) { - out_dims = common::vectorize(x_dim); - } - batch_size += x_dim[0]; - PADDLE_ENFORCE_NE( - x_dim[0], - 0, - platform::errors::InvalidArgument( - "The first dim of SequenceConcatOp inputs must not be 0.")); - if (feature_size == 0) { - feature_size = common::product(x_dim) / x_dim[0]; - } else { - PADDLE_ENFORCE_EQ( - feature_size, - common::product(x_dim) / x_dim[0], - platform::errors::InvalidArgument( - "Each input of SequenceConcatOp inputs must have same feature " - "size, But " - "the feature size we received is %d, the feature size of 1st " - "input is %d", - feature_size, - common::product(x_dim) / x_dim[0])); - } - } - if (batch_size < 0) { - batch_size = -1; // Normalize batch size for compile time. - } - out_dims[0] = batch_size; - context->SetOutputDim("Out", common::make_ddim(out_dims)); - if (!context->IsRuntime()) { // Runtime LoD infershape will be computed - // in Kernel. - context->ShareLoD("X", "Out"); - } - } -}; - -template -class SeqConcatGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_concat_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X", false)); - op->SetAttrMap(this->Attrs()); - } -}; - -class SeqConcatGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *context) const override { - context->SetOutputsDim(framework::GradVarName("X"), - context->GetInputsDim("X")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SeqConcatGradNoNeedBufferVarsInferer, "X"); - -} // namespace operators -} // namespace paddle - -namespace op = paddle::operators; - -REGISTER_OPERATOR(sequence_concat, - op::SequenceConcatOp, - op::SeqConcatOpMaker, - op::SeqConcatGradOpMaker, - op::SeqConcatGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(sequence_concat, - CPU, - ALL_LAYOUT, - op::SeqConcatKernel, - float, - double, - int, - int64_t) {} - -REGISTER_OPERATOR(sequence_concat_grad, - op::SeqConcatGradOp, - op::SeqConcatGradNoNeedBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(sequence_concat_grad, - CPU, - ALL_LAYOUT, - op::SeqConcatGradKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc deleted file mode 100644 index b668a9d2558ef..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/sequence_ops/sequence_concat_op.h" - -#include "paddle/fluid/framework/op_registry.h" - -PD_REGISTER_STRUCT_KERNEL(sequence_concat, - GPU, - ALL_LAYOUT, - paddle::operators::SeqConcatKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_concat_grad, - GPU, - ALL_LAYOUT, - paddle::operators::SeqConcatGradKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h deleted file mode 100644 index 463cadc3ce733..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/concat_and_split.h" - -namespace paddle { -namespace operators { - -namespace detail { -template -inline framework::LoD ConcatLoD(const Container &xs, - std::vector *xs_in_order) { - std::vector result; - result.resize(xs[0].get().lod()[0].size()); - - for (size_t i = 1; i < result.size(); ++i) { - size_t sum = 0; - for (size_t j = 0; j < xs.size(); ++j) { - auto &x_lod = xs[j].get().lod()[0]; - const phi::DenseTensor &tensor = xs[j].get(); - if (x_lod[i - 1] < x_lod[i]) { - xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i])); - } - sum += x_lod[i]; - } - result[i] = sum; - } - framework::LoD lod; - lod.emplace_back(result); - return lod; -} - -template -inline std::vector> GetDataVectorSafely( - const std::vector &vec, ARGS &&...args) { - std::vector> result; - result.reserve(vec.size()); - for (auto *ptr : vec) { - PADDLE_ENFORCE_NOT_NULL(ptr, - platform::errors::InvalidArgument( - "The input variable X contains nullptr.")); - result.emplace_back(*ptr); - } - return result; -} -} // namespace detail - -template -class SeqConcatKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto xs = - detail::GetDataVectorSafely(context.MultiInput("X")); - auto &out = *context.Output("Out"); - - size_t lod_size = 0; - for (auto &x : xs) { - if (lod_size == 0) { - PADDLE_ENFORCE_EQ(x.get().lod().empty(), - false, - platform::errors::NotFound( - "Input(X) Tensor of SequenceConcatOp does not " - "contain LoD information.")); - lod_size = x.get().lod()[0].size(); - } else { - PADDLE_ENFORCE_EQ(lod_size, - x.get().lod()[0].size(), - platform::errors::InvalidArgument( - "The lod size of each input must be the same, " - "But the lod size of input we received is %d, " - "the first input is %d", - x.get().lod()[0].size(), - lod_size)); - } - } - PADDLE_ENFORCE_NE( - lod_size, - 0, - platform::errors::InvalidArgument( - "Each input must have sequence lod information. But we " - "received input lod size is %d", - lod_size)); - - std::vector x_in_order; - out.set_lod(detail::ConcatLoD(xs, &x_in_order)); - out.mutable_data(context.GetPlace()); - math::ConcatFunctor functor; - functor( - context.template device_context(), x_in_order, 0, &out); - } -}; - -template -class SeqConcatGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto xs = context.MultiInput("X"); - auto dxs = - context.MultiOutput(framework::GradVarName("X")); - PADDLE_ENFORCE_EQ(xs.size(), - dxs.size(), - platform::errors::InvalidArgument( - "The rank of Input X and Output Grad X must be " - "same, But the rank of Input X we received is %d, " - "the rank of Output Grad X is %d", - xs.size(), - dxs.size())); - for (size_t i = 0; i < dxs.size(); ++i) { - if (dxs[i] != nullptr) { - dxs[i]->set_lod(xs[i]->lod()); - dxs[i]->mutable_data(context.GetPlace()); - } - } - - std::vector sliced_x; - std::vector> sliced_dx; - - for (size_t i = 1; i < xs[0]->lod()[0].size(); ++i) { - for (size_t j = 0; j < xs.size(); ++j) { - const phi::DenseTensor *x = xs[j]; - framework::DDim x_dims = x->dims(); - - phi::DenseTensor *dx = dxs[j]; - auto &x_lod = x->lod()[0]; - if (x_lod[i - 1] == x_lod[i]) continue; - - auto prev_lod = x_lod[i - 1]; - auto next_lod = x_lod[i]; - - x_dims[0] = next_lod - prev_lod; - - sliced_x.emplace_back(); - sliced_x.back().Resize(x_dims); - - if (dx) { - sliced_dx.emplace_back(dx->Slice(prev_lod, next_lod)); - } else { - sliced_dx.emplace_back(paddle::none); - } - } - } - - std::vector sliced_x_ptr; - sliced_x_ptr.reserve(sliced_x.size()); - for (auto &x : sliced_x) { - sliced_x_ptr.emplace_back(&x); - } - - std::vector sliced_dx_ptr; - sliced_dx_ptr.reserve(sliced_dx.size()); - for (auto &dx : sliced_dx) { - if (dx) { - sliced_dx_ptr.emplace_back(&dx.get()); - } - } - - math::SplitFunctor functor; - functor(context.template device_context(), - GET_DATA_SAFELY( - context.Input(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "SeqConcatGrad"), - sliced_x_ptr, - 0, - &sliced_dx_ptr); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc index c94f57807cd52..24109e8ed4531 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc @@ -40,14 +40,14 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->Attrs().Get("contextStride"), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Currently, SequenceConvOp only supports contextStride=1. But " "received contextStride = %u.", ctx->Attrs().Get("contextStride"))); PADDLE_ENFORCE_EQ( in_dims.size() == 2 && filter_dims.size() == 2, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X, Filter) should be 2-D tensor. But received Input(X): " "input rank %u, input shape [%s]; received Input(Filter): " "input rank %u, input shape [%s].", @@ -58,7 +58,7 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( filter_dims[0], context_length * in_dims[1], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Filter's height should be context_length * " "input_hidden_size. But received: filter's height = %d, " "context_length * input_hidden_size = %d.", @@ -82,13 +82,13 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( start_length, false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "If context_start is 0 and context_length is 1, paddingTrainable " "should be false.")); PADDLE_ENFORCE_EQ( padding_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(PaddingData) should be 2-D tensor. But received: " "input rank %u, input shape [%s].", padding_dim.size(), @@ -96,14 +96,14 @@ class SequenceConvOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( padding_dim[0] == total_pad && padding_dim[1] == input_width, true, - platform::errors::InvalidArgument("Input(PaddingData)'s shape is not " - "consistent with 'context_start' " - "and 'context_length'. Received " - "Input(PaddingData): input rank " - "%u, " - "input shape [%s].", - padding_dim.size(), - padding_dim)); + phi::errors::InvalidArgument("Input(PaddingData)'s shape is not " + "consistent with 'context_start' " + "and 'context_length'. Received " + "Input(PaddingData): input rank " + "%u, " + "input shape [%s].", + padding_dim.size(), + padding_dim)); } in_dims[1] = filter_dims[1]; diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index 347db6e37db10..b7820f5dda0a7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -39,13 +39,13 @@ class SequenceConvKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -107,7 +107,7 @@ class SequenceConvGradKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc index 53fb13180c36a..94f65ecd1c6e4 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc @@ -37,13 +37,13 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -51,19 +51,19 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( padding_trainable, false, - platform::errors::InvalidArgument("Only support padding_trainable " - "equal false.")); + phi::errors::InvalidArgument("Only support padding_trainable " + "equal false.")); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); PADDLE_ENFORCE_EQ( up_pad, 2, - platform::errors::InvalidArgument("Only support up_pad equal 2.")); + phi::errors::InvalidArgument("Only support up_pad equal 2.")); PADDLE_ENFORCE_EQ( down_pad, 2, - platform::errors::InvalidArgument("Only support down_pad equal 2.")); + phi::errors::InvalidArgument("Only support down_pad equal 2.")); auto xpu_context = context.template device_context().x_context(); @@ -73,8 +73,8 @@ class SequenceConvXPUKernel : public framework::OpKernel { xpu::ctx_guard RAII_GUARD(xpu_context); int col_numel = col_shape[0] * col_shape[1]; T* col_data = RAII_GUARD.alloc_l3_or_gm(col_numel); - PADDLE_ENFORCE_NOT_NULL( - col_data, paddle::platform::errors::Fatal("XPU memory is not enough")); + PADDLE_ENFORCE_NOT_NULL(col_data, + phi::errors::Fatal("XPU memory is not enough")); auto lod_level_0 = in->lod()[0]; int lod_size = lod_level_0.size(); @@ -84,7 +84,7 @@ class SequenceConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( lod_size, 257, - platform::errors::InvalidArgument("Only support batch size <= 256.")); + phi::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { @@ -113,7 +113,7 @@ class SequenceConvXPUKernel : public framework::OpKernel { int n = filter.dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", @@ -173,13 +173,13 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceConvOp " "does not contain LoD information.")); PADDLE_ENFORCE_EQ( in->lod().size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only support input sequence with lod level equal to 1 at " "present. But received: lod level %u.", in->lod().size())); @@ -187,26 +187,26 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( padding_trainable, false, - platform::errors::InvalidArgument("Only support padding_trainable " - "equal false.")); + phi::errors::InvalidArgument("Only support padding_trainable " + "equal false.")); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); PADDLE_ENFORCE_EQ( up_pad, 2, - platform::errors::InvalidArgument("Only support up_pad equal 2.")); + phi::errors::InvalidArgument("Only support up_pad equal 2.")); PADDLE_ENFORCE_EQ( down_pad, 2, - platform::errors::InvalidArgument("Only support down_pad equal 2.")); + phi::errors::InvalidArgument("Only support down_pad equal 2.")); auto lod_level_0 = in->lod()[0]; int lod_size = lod_level_0.size(); PADDLE_ENFORCE_LE( lod_size, 257, - platform::errors::InvalidArgument("Only support batch size <= 256.")); + phi::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { @@ -223,8 +223,8 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { xpu::ctx_guard RAII_GUARD(xpu_context); int col_numel = col_shape[0] * col_shape[1]; T* col_data = RAII_GUARD.alloc_l3_or_gm(col_numel); - PADDLE_ENFORCE_NOT_NULL( - col_data, paddle::platform::errors::Fatal("XPU memory is not enough")); + PADDLE_ENFORCE_NOT_NULL(col_data, + phi::errors::Fatal("XPU memory is not enough")); if (in_g || filter_g) { bool trans_a = false; @@ -235,7 +235,7 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { int k1 = filter->dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvGradOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", @@ -273,10 +273,10 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { } if (in_g) { - PADDLE_ENFORCE_LT(sequence_width, - 512, - platform::errors::InvalidArgument( - "Only support sequence_width < 512.")); + PADDLE_ENFORCE_LT( + sequence_width, + 512, + phi::errors::InvalidArgument("Only support sequence_width < 512.")); in_g->mutable_data(context.GetPlace()); in_g->set_lod(in->lod()); @@ -317,7 +317,7 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { int n = out_g->dims()[1]; PADDLE_ENFORCE_EQ(k, k1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of FC in SequenceConvGradOp is invalid." "The k of matrix A is %d, k1 of matrix B is %d." "But expect k == k1", diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc deleted file mode 100644 index 9ff5f1f96f389..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h" - -namespace paddle { -namespace operators { - -class SequenceEnumerateOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceEnumerate"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceEnumerate"); - - const auto x_dims = ctx->GetInputDim("X"); - const auto win_size = ctx->Attrs().Get("win_size"); - ctx->SetOutputDim("Out", {x_dims[0], win_size}); - ctx->ShareLoD("X", "Out"); - } -}; - -class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(2-D phi::DenseTensor with the 2nd dimension equal to 1) " - "Input phi::DenseTensor of SequenceEnumerate operator."); - AddOutput("Out", - "(2-D phi::DenseTensor with the 2nd dimension equal to win_size) " - "Output phi::DenseTensor of SequenceEnumerate operator."); - AddAttr("win_size", "(int) The enumerate sequence window size.") - .AddCustomChecker([](const int& win_size) { - PADDLE_ENFORCE_GE(win_size, - 2, - platform::errors::InvalidArgument( - "The window size should be not less than 2." - "Received window size is %d", - win_size)); - }); - AddAttr("pad_value", "(int) The enumerate sequence padding value.") - .SetDefault(0); - AddAttr(framework::kAllKernelsMustComputeRuntimeShape, - "Skip calling InferShape() function in the runtime.") - .SetDefault(true); - AddComment(R"DOC( -Sequence Enumerate Operator. - -Generate a new sequence for the input index sequence, which enumerates all the -sub-sequences with length `win_size` of the input. -The enumerated sequence has the same 1st dimension with variable `input`, and -the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation. - -Examples: -Case 1: - Input: - X.lod = [[0, 3, 5]] - X.data = [[1], [2], [3], [4], [5]] - X.dims = [5, 1] - Attrs: - win_size = 2 - pad_value = 0 - Output: - Out.lod = [[0, 3, 5]] - Out.data = [[1, 2], [2, 3], [3, 0], [4, 5], [5, 0]] - Out.dims = [5, 2] - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(sequence_enumerate, - ops::SequenceEnumerateOp, - ops::SequenceEnumerateOpMaker); -PD_REGISTER_STRUCT_KERNEL(sequence_enumerate, - CPU, - ALL_LAYOUT, - ops::SequenceEnumerateKernel, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu deleted file mode 100644 index 7884232e5b10f..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" - -namespace paddle { -namespace operators { -using phi::PADDLE_CUDA_NUM_THREADS; - -template -__global__ void CalcOutPut(const T* in_data, - const size_t* in_lod, - const size_t lod_len, - const int64_t win_size, - const int64_t pad_value, - T* out_data) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < in_lod[lod_len - 1]) { - int end_idx = 0; - // Get LoD interval of index - for (int i = 1; i < lod_len; ++i) { - if (index < in_lod[i]) { - end_idx = in_lod[i]; - break; - } - } - for (size_t i = 0; i < win_size; ++i) { - int word_pos = index + i; - out_data[index * win_size + i] = - word_pos < end_idx ? in_data[word_pos] : pad_value; - } - } -} - -template -class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); - int win_size = context.Attr("win_size"); - int pad_value = context.Attr("pad_value"); - - auto in_dims = in->dims(); - auto in_lod = in->lod(); - - PADDLE_ENFORCE_EQ( - static_cast(in_dims[0]), - in_lod[0].back(), - platform::errors::InvalidArgument( - "The actual input data's size mismatched with LoD information." - "Received input data size is %d (actual) vs %d (loD information).", - static_cast(in_dims[0]), - in_lod[0].back())); - - /* Generate enumerate sequence set */ - auto stream = context.cuda_device_context().stream(); - auto lod0 = in_lod[0]; - auto in_len = in->numel(); - auto in_data = in->data(); - out->Resize({in_dims[0], win_size}); - auto out_data = out->mutable_data(context.GetPlace()); - // Copy LoD to GPU - phi::MixVector mixv_lod0(&lod0); - const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace()); - // Calc output tensor - CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, - PADDLE_CUDA_NUM_THREADS, - 0, - stream>>>( - in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data); - out->set_lod(in->lod()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sequence_enumerate, - GPU, - ALL_LAYOUT, - ops::SequenceEnumerateOpCUDAKernel, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h deleted file mode 100644 index c66f4065a58f1..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class SequenceEnumerateKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); - int win_size = context.Attr("win_size"); - auto pad_value = static_cast(context.Attr("pad_value")); - - PADDLE_ENFORCE_EQ( - in->lod().empty(), - false, - platform::errors::InvalidArgument( - "Input(X) phi::DenseTensor of SequenceEnumerateOp does not contain " - "LoD information.")); - - auto in_dims = common::vectorize(in->dims()); - auto lod0 = in->lod()[0]; - PADDLE_ENFORCE_EQ( - static_cast(in_dims[0]), - lod0.back(), - platform::errors::InvalidArgument( - "The actual input data's size mismatched with LoD information." - "Received input data size is %d (actual) vs %d (loD information).", - static_cast(in_dims[0]), - lod0.back())); - PADDLE_ENFORCE_EQ( - in_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "Input(X) of SequenceEnumerate operator's rank should be 2." - "Received %d instead.", - in_dims.size())); - PADDLE_ENFORCE_EQ(in_dims[1], - 1, - platform::errors::InvalidArgument( - "Input(X) of SequenceEnumerate operator's 2nd " - "dimension should be 1. Received %d instead.", - in_dims[1])); - - // Generate enumerate sequence set - auto in_data = in->data(); - out->Resize({in_dims[0], win_size}); - out->set_lod(in->lod()); - auto out_data = out->mutable_data(context.GetPlace()); - for (size_t i = 0; i < lod0.size() - 1; ++i) { - if (lod0[i] == lod0[i + 1]) continue; - int start = lod0[i]; - int end = lod0[i + 1]; - - int copy_size = win_size < end - start + 1 ? win_size : end - start + 1; - int mid = end + 1 - copy_size; - int pad_num = win_size - copy_size; - copy_size *= sizeof(T); - for (int idx = start; idx < mid; ++idx) { - std::memcpy(out_data, in_data + idx, copy_size); - out_data += win_size; - } - for (int idx = mid; idx < end; ++idx) { - copy_size -= sizeof(T); - pad_num++; - std::memcpy(out_data, in_data + idx, copy_size); - T* pdata = out_data + copy_size / sizeof(T); - for (int i = 0; i < pad_num; ++i) { - pdata[i] = pad_value; - } - out_data += win_size; - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc index 7f6eeff11b5be..03edbdc1a5d04 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc @@ -29,7 +29,7 @@ class SequenceEraseOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE( x_dims.size() == 2 && x_dims[1] == 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SequenceEraseOp should be a 2-D phi::DenseTensor " "with the 2nd dimension equal to 1," "but received size %d with the 2nd dimension %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index bbc80587a9cf7..8b4b76a762d94 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -73,7 +73,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( lod[lod.size() - 1].back(), (size_t)in->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual size mismatches with the LoD information.")); auto tokens = ctx.Attr>("tokens"); auto in_len = in->numel(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h index 9a4aef1d93ab4..505c4245155ad 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h @@ -32,11 +32,11 @@ class SequenceEraseKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( lod.empty(), false, - platform::errors::InvalidArgument("Input(X) Tensor of SequenceEraseOp " - "does not contain LoD information.")); + phi::errors::InvalidArgument("Input(X) Tensor of SequenceEraseOp " + "does not contain LoD information.")); PADDLE_ENFORCE_EQ(lod[lod.size() - 1].back(), static_cast(in->numel()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The actual input size %d mismatches with the LoD " "information size %d.", lod[lod.size() - 1].back(), diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc deleted file mode 100644 index 1f9fd565ca77c..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc +++ /dev/null @@ -1,227 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class SequenceExpandAsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "SequenceExpandAs"); - OP_INOUT_CHECK(ctx->HasInputs("Y"), "Input", "Y", "SequenceExpandAs"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceExpandAs"); - - auto x_dims = ctx->GetInputDim("X"); - auto out_dims = x_dims; - - PADDLE_ENFORCE_GE(x_dims.size(), - 2, - platform::errors::InvalidArgument( - "Dimension number of Input(X) should be at least 2. " - "But received X's dimensions = %d, X's shape = [%s].", - x_dims.size(), - x_dims)); - - if (ctx->IsRuntime()) { - framework::Variable* x_var = - PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]); - framework::Variable* y_var = - PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Y")[0]); - - auto& x_dim = x_var->Get().dims(); - auto& y_lod = y_var->Get().lod(); - - PADDLE_ENFORCE_EQ(y_lod.size(), - 1, - platform::errors::InvalidArgument( - "Level number of Input(Y)'s lod should be 1. But " - "received Y's lod level = %d.", - y_lod.size())); - - PADDLE_ENFORCE_EQ(static_cast(x_dim[0]), - y_lod[0].size() - 1, - platform::errors::InvalidArgument( - "The first dimension of Input(X) should be one " - "less than the size of Input(Y)'s 0 level lod. But " - "received X's shape[0] = %d, Y's lod[0].size = %d.", - x_dim[0], - y_lod[0].size())); - - int64_t out_first_dim = 0; - if (y_lod[0].size() <= 1) { - out_first_dim = x_dims[0]; - } else { - for (size_t i = 1; i < y_lod[0].size(); ++i) { - out_first_dim += static_cast(y_lod[0][i] - y_lod[0][i - 1]); - } - } - out_dims[0] = out_first_dim; - } else { - out_dims[0] = -1; - } - - ctx->SetOutputDim("Out", out_dims); - ctx->ShareLoD("Y", /*->*/ "Out"); - } - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class SequenceExpandAsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(phi::DenseTensor, default phi::DenseTensor) A 2-D " - "phi::DenseTensor whose lod " - "level is at most 1."); - AddInput("Y", - "(phi::DenseTensor, default phi::DenseTensor) Referred " - "phi::DenseTensor whose " - "lod (specified level) is referred by Input(X)."); - AddOutput("Out", - "(phi::DenseTensor, default phi::DenseTensor) Output " - "phi::DenseTensor which is " - "generated from Input(X) by referring lod of Input(Y)."); - AddComment(R"DOC( -Sequence Expand As Operator. - -This operator expands `X` according to the zeroth level lod of `Y`. Current -implementation requires the level number of Input(Y)'s lod should be 1, and -the first dimension of Input(X) should be equal to the size of Input(Y)'s zeroth -level lod, and lod of Input(X) is not considered. - -Following are cases to better explain how this works: - -Case 1: - -Given a 1-level phi::DenseTensor input(X) - X.data = [[a], [b], [c], [d]] - X.dims = [4, 1] -and input(Y) - Y.lod = [[0, 3, 6, 7, 8]] -ref_level: 0 -then we get 1-level phi::DenseTensor - Out.lod = [[0, 3, 6, 7, 8]] - Out.data = [[a], [a], [a], [b], [b], [b], [c], [d]] - Out.dims = [8, 1] - -Case 2: - -Given a common phi::DenseTensor input(X) - X.data = [[a, b], [c, d], [e, f]] - X.dims = [3, 2] -and input(Y) - Y.lod = [[0, 2, 3, 6]] -ref_level: 0 -then we get a common phi::DenseTensor - Out.lod = [[0, 2, 3, 6]] - Out.data = [[a, b], [a, b] [c, d], [e, f], [e, f], [e, f]] - Out.dims = [6, 2] - -)DOC"); - } -}; - -class SequenceExpandAsOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "SequenceExpandAsGrad"); - OP_INOUT_CHECK(ctx->HasInputs(framework::GradVarName("Out")), - "Input", - "Out@GRAD", - "SequenceExpandAsGrad"); - - auto x_dims = ctx->GetInputDim("X"); - auto x_grad_name = framework::GradVarName("X"); - - if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); - ctx->ShareLoD("X", x_grad_name); - } - } - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -template -class SequenceExpandAsOpGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_expand_as_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Y", this->Input("Y")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SequenceExpandAsOpNoNeedBufferVarsInferer, - "Y"); -DECLARE_NO_NEED_BUFFER_VARS_INFERER( - SequenceExpandAsGradOpNoNeedBufferVarsInferer, "X", "Y"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - sequence_expand_as, - ops::SequenceExpandAsOp, - ops::SequenceExpandAsOpMaker, - ops::SequenceExpandAsOpGradOpMaker, - ops::SequenceExpandAsOpGradOpMaker, - ops::SequenceExpandAsOpNoNeedBufferVarsInferer); -REGISTER_OPERATOR(sequence_expand_as_grad, - ops::SequenceExpandAsOpGrad, - ops::SequenceExpandAsGradOpNoNeedBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(sequence_expand_as, - CPU, - ALL_LAYOUT, - ops::SequenceExpandAsKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_expand_as_grad, - CPU, - ALL_LAYOUT, - ops::SequenceExpandAsGradKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu deleted file mode 100644 index 053c439814e95..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" - -namespace paddle { -namespace operators { - -template -static __global__ void sequence_expand_as_kernel(const T *in_data, - const size_t *expand_offset, - const size_t src_hight, - const size_t src_widht, - T *out_data) { - for (int h_id = blockIdx.x; h_id < src_hight; h_id += gridDim.x) { - int span = expand_offset[h_id + 1] - expand_offset[h_id]; - if (span == 0) continue; - const T *src = in_data + h_id * src_widht; - for (int w_id = threadIdx.x; w_id < src_widht; w_id += blockDim.x) { - T ele = src[w_id]; - int offset = expand_offset[h_id] * src_widht; - for (int k = 0; k < span; ++k) { - out_data[offset + k * src_widht + w_id] = ele; - } - } - } -} - -template -static __global__ void sequence_expand_as_grad_kernel( - const T *dout_data, - const size_t *expand_offset, - const size_t dst_hight, - const size_t dst_width, - T *dx_data) { - for (int h_id = blockIdx.x; h_id < dst_hight; h_id += gridDim.x) { - T *dst = dx_data + h_id * dst_width; - int span = expand_offset[h_id + 1] - expand_offset[h_id]; - - for (int w_id = threadIdx.x; w_id < dst_width; w_id += blockDim.x) { - T result = 0; - for (int k = 0; k < span; ++k) { - int offset = (expand_offset[h_id] + k) * dst_width; - const T *src = dout_data + offset; - result += src[w_id]; - } - dst[w_id] = result; - } - } -} - -template -struct SequenceExpandAsFunctor { - void operator()(const phi::GPUContext &context, - const phi::DenseTensor &x, - const phi::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { - int height = x.dims()[0]; - int width = common::product(x.dims()) / height; - - const int kThreadsPerBlock = 1024; - int thread_x = kThreadsPerBlock; - if (width < kThreadsPerBlock) { // block_cols is aligned by 32. - thread_x = ((width + 31) >> 5) << 5; - } - - int max_threads = context.GetMaxPhysicalThreadCount(); - int block_x = std::max(max_threads / thread_x, 1); - - dim3 block_size(thread_x); - dim3 grid_size(block_x); - phi::MixVector mixv_ref_lod(&ref_lod); - sequence_expand_as_kernel<<>>( - x.data(), - mixv_ref_lod.CUDAData(context.GetPlace()), - height, - width, - out->mutable_data(context.GetPlace())); - } -}; - -template -struct SequenceExpandAsGradFunctor { - void operator()(const phi::GPUContext &context, - const phi::DenseTensor &dout, - const phi::Vector &ref_lod, /*expand based lod*/ - phi::DenseTensor *dx) { - int height = dx->dims()[0]; - int width = common::product(dx->dims()) / height; - - const int kThreadsPerBlock = 1024; - int thread_x = kThreadsPerBlock; - if (width < kThreadsPerBlock) { // block_cols is aligned by 32. - thread_x = ((width + 31) >> 5) << 5; - } - - int max_threads = context.GetMaxPhysicalThreadCount(); - int block_x = std::max(max_threads / thread_x, 1); - - dim3 block_size(thread_x); - dim3 grid_size(block_x); - phi::MixVector mixv_ref_lod(&ref_lod); - sequence_expand_as_grad_kernel<<>>( - dout.data(), - mixv_ref_lod.CUDAData(context.GetPlace()), - height, - width, - dx->mutable_data(context.GetPlace())); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sequence_expand_as, - GPU, - ALL_LAYOUT, - ops::SequenceExpandAsKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_expand_as_grad, - GPU, - ALL_LAYOUT, - ops::SequenceExpandAsGradKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h deleted file mode 100644 index d9a1d419f5a9e..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include // std::iota -#include -#include - -#include "glog/logging.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -struct SequenceExpandAsFunctor { - void operator()(const DeviceContext &ctx, - const phi::DenseTensor &x, - const phi::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out); -}; - -template -struct SequenceExpandAsGradFunctor { - void operator()(const DeviceContext &ctx, - const phi::DenseTensor &dout, - const phi::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx); -}; - -template -struct SequenceExpandAsFunctor { - void operator()(const phi::CPUContext &context, - const phi::DenseTensor &x, - const phi::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { - int64_t height = x.dims()[0]; - int64_t width = common::product(x.dims()) / height; - - const T *in_data = x.data(); - T *out_data = out->mutable_data(context.GetPlace()); - - for (int h_id = 0; h_id < height; ++h_id) { - size_t span = ref_lod[h_id + 1] - ref_lod[h_id]; - if (span == 0) continue; - const T *src = in_data + h_id * width; - for (int64_t w_id = 0; w_id < width; ++w_id) { - T ele = src[w_id]; - size_t offset = ref_lod[h_id] * width; - for (size_t k = 0; k < span; ++k) { - out_data[offset + k * width + w_id] = ele; - } - } - } - } -}; - -template -class SequenceExpandAsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *x = context.Input("X"); - auto *y = context.Input("Y"); - auto *out = context.Output("Out"); - - PADDLE_ENFORCE_EQ( - y->lod().empty(), - false, - platform::errors::InvalidArgument( - "Input(Y) of SequenceExpandAsOp has wrong LoD information. " - "Expected Y's lod is not empty, but received empty lod.")); - - auto &y_lod = y->lod(); - PADDLE_ENFORCE_EQ(y_lod.size(), - 1, - platform::errors::InvalidArgument( - "Input(Y) of SequenceExpandAsOp has wrong LoD " - "information. Expected Y's lod level = 1, but " - "received lod level = %d.", - y_lod.size())); - PADDLE_ENFORCE_GT(y_lod[0].size(), - 1, - platform::errors::InvalidArgument( - "Input(Y) of SequenceExpandAsOp has wrong LoD " - "information. Expected the size of Y's lod[0] > 1, " - "but received lod[0].size = %d.", - y_lod[0].size())); - - out->mutable_data(context.GetPlace()); - - auto &dev_ctx = context.template device_context(); - SequenceExpandAsFunctor seq_expand_functor; - seq_expand_functor(dev_ctx, *x, y_lod[0], out); - } -}; - -/* - *Given Grad(Out) - * - * Grad(Out).lod = [[0, 3, 6]] - * Grad(Out).data = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6] - * Then - * Grad(X).data = [(0.1 + 0.2 + 0.3), (0.4 + 0.5 + 0.6)] - * = [0.6, 1.5] - * Grad(X).lod = Input(X).lod - * - * */ -template -struct SequenceExpandAsGradFunctor { - void operator()(const phi::CPUContext &context, - const phi::DenseTensor &dout, - const phi::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx) { - int64_t height = dx->dims()[0]; - int64_t width = common::product(dx->dims()) / height; - - const T *dout_data = dout.data(); - T *dx_data = dx->mutable_data(context.GetPlace()); - - for (int64_t h_id = 0; h_id < height; ++h_id) { - T *dst = dx_data + h_id * width; - size_t span = ref_lod[h_id + 1] - ref_lod[h_id]; - for (int64_t w_id = 0; w_id < width; ++w_id) { - T result = 0; - for (size_t k = 0; k < span; ++k) { - size_t offset = (ref_lod[h_id] + k) * width; - result += dout_data[offset + w_id]; - } - dst[w_id] = result; - } - } - } -}; - -template -class SequenceExpandAsGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *g_out = - context.Input(framework::GradVarName("Out")); - auto *y = context.Input("Y"); - auto *g_x = context.Output(framework::GradVarName("X")); - - g_x->mutable_data(context.GetPlace()); - - SequenceExpandAsGradFunctor functor; - functor(context.template device_context(), - *g_out, - y->lod()[0], - g_x); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc index e6a64be83473d..4e7deab77952a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc @@ -38,7 +38,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension number of Input(X) should be at least 2. But " "received: input rank %u, input shape [%s].", x_dims.size(), @@ -55,14 +55,14 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE(x_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level of Input(X)'s lod should not be " "greater than 1. But received: lod level %u.", x_lod.size())); PADDLE_ENFORCE_GT( y_lod.size(), 0UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level of Input(Y)'s lod should be greater than 0. But " "received: lod level %u.", y_lod.size())); @@ -70,7 +70,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ref_level == -1 || (ref_level >= 0 && ref_level < static_cast(y_lod.size())), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Invalid `ref_level`, which should be either equal to -1 " "or in [0, %d), but received `ref_level` = %u.", y_lod.size(), @@ -82,7 +82,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_lod[0].size(), y_lod[ref_level].size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Level number of Input(X)'s lod could be 0. Otherwise " "size of Input(X)'s first level lod should be equal to " "size of Input(Y)'s referred level lod. But received: " @@ -95,7 +95,7 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_dims[0], static_cast(y_lod[ref_level].size()) - 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "When Input(X)'s lod is null, the dims[0] of " "Input(X) should match the " "size of Input(Y)'s referred level lod. But received " diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 0f53249cfbc24..1204775c44226 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -96,7 +96,7 @@ class SequenceExpandKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( y_lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Y) phi::DenseTensor of SequenceExpandOp does not contain " "LoD information.")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc index a6cd59e44dff0..a123ae14f39b1 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc @@ -61,7 +61,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker { PADDLE_ENFORCE_EQ( v < 0 || v >= 1, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attr(maxlen) must be less than 0 or larger than 1")); }); AddAttr("out_dtype", "Output data type"); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc deleted file mode 100644 index d033ac210c7c8..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc +++ /dev/null @@ -1,305 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_pad_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class SequencePadOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequencePadOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("PadValue"), - true, - platform::errors::NotFound( - "Input(PadValue) of SequencePadOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of SequencePadOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Length"), - true, - platform::errors::NotFound( - "Output(Length) of SequencePadOp should not be null.")); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_GE(x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of SequencePadOp Input(X) can't be less " - "than 2. But the rank we received is %d", - x_dims.size())); - auto time_step_dims = common::slice_ddim(x_dims, 1, x_dims.size()); - auto pad_value_dims = ctx->GetInputDim("PadValue"); - PADDLE_ENFORCE_EQ( - pad_value_dims == common::make_ddim({1}) || - pad_value_dims == common::make_ddim({}) || - pad_value_dims == time_step_dims, - true, - platform::errors::InvalidArgument( - "The SequencePadOp Input(PadValue) must be a scalar or a tensor " - "whose shape equals to time steps in sequences")); - - int out_dim_0 = -1; - - int padded_length = ctx->Attrs().Get("padded_length"); - if (ctx->IsRuntime()) { - // run time - framework::Variable* x_var = - PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]); - const auto& x_lod = x_var->Get().lod(); - PADDLE_ENFORCE_EQ(x_lod.empty(), - false, - platform::errors::NotFound( - "The SequencePadOp Input(X) must hold lod info.")); - const auto& x_lod_0 = x_lod[0]; - PADDLE_ENFORCE_GE( - x_lod_0.size(), - 2, - platform::errors::InvalidArgument( - "The size of SequencePadOp Input(X)'s lod info can't be less " - "than 2. But the size we received is %d", - x_lod_0.size())); - PADDLE_ENFORCE_EQ(x_dims[0], - static_cast(x_lod_0.back()), - platform::errors::InvalidArgument( - "The SequencePadOp Input(X)'s lod info mismatches " - "the actual tensor shape. The 1st dimension of " - "Input(X)'s lod info is %d, the 1st dimension of " - "actual tensor shape is %d", - x_dims[0], - static_cast(x_lod_0.back()))); - - int seq_num = static_cast(x_lod_0.size() - 1); - int max_seq_len = - static_cast(phi::funcs::MaximumSequenceLength(x_lod_0)); - if (padded_length == -1) { - padded_length = max_seq_len; - } - PADDLE_ENFORCE_GE( - padded_length, - max_seq_len, - platform::errors::InvalidArgument( - "The SequencePadOp Attr(padded_length) should be greater than or " - "equal to the " - "length of the longest original sequence. But the padded_length " - "we received is %d, the length of the longest original sequence " - "is %d", - padded_length, - max_seq_len)); - out_dim_0 = seq_num; - } else { - // compile time - if (padded_length == -1) { - padded_length = 1; - } - PADDLE_ENFORCE_GT( - ctx->GetLoDLevel("X"), - 0, - platform::errors::InvalidArgument( - "The LoD level of SequencePadOp Input(X) should be " - "larger than 0. But the LoD level we received is %d", - ctx->GetLoDLevel("X"))); - } - - std::vector out_dims_vec{out_dim_0, padded_length}; - std::vector len_dims_vec{out_dim_0}; - auto time_step_dims_vec = common::vectorize(time_step_dims); - out_dims_vec.insert(out_dims_vec.end(), - time_step_dims_vec.begin(), - time_step_dims_vec.end()); - ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); - ctx->SetOutputDim("Length", common::make_ddim(len_dims_vec)); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(phi::DenseTensor, default phi::DenseTensor) Input " - "variable which " - "should contain lod information."); - AddInput("PadValue", - "(phi::DenseTensor), this phi::DenseTensor holds values that will " - "be fill into " - "padded steps. It can be a scalar or a tensor whose shape equals " - "to time steps in sequences. If it's a scalar, it will be " - "automatically broadcasted to the shape of time step."); - AddOutput("Out", - "(phi::DenseTensor) The output variable, which contains padded " - "sequences."); - AddOutput("Length", - "(phi::DenseTensor) The output variable, which contains the " - "actual length of " - "sequences before padding."); - AddAttr( - "padded_length", - "The length of padded sequences. It can be set to -1 or " - "any positive int. When it is -1, all sequences will be padded up to " - "the length of the longest one among them; when it a certain positive " - "value, it must be greater than the length of the longest original " - "sequence.") - .SetDefault(-1); - AddComment(R"DOC( - Sequence Pad Operator - - This operator pads sequences in a same batch to a consistent length. - The length is specified by attribute 'padded_length'. New elements, - whose values are specified by input 'PadValue', will be appended to - the end of each sequence, to make their final lengths consistent. - - Following are cases to better explain how this works: - - Case 1: - - Given a 1-level phi::DenseTensor input(X): - X.lod = [[0, 2, 5]] - X.data = [a, b, c, d, e] - and Input(PadValue): - PadValue.data = [0] - and attribute 'padded_length' = 4, - then we get phi::DenseTensor: - Out.data = [[a, b, 0, 0], - [c, d, e, 0]] - Length.data = [2, 3] - - Case 2: - - Given a 1-level phi::DenseTensor input(X): - X.lod = [[0, 2, 5]] - X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] - and Input(PadValue): - PadValue.data = [0] - and attribute 'padded_length' = -1, which mean using the length - of longest input sequence(3 in this case), - then we get phi::DenseTensor: - Out.data = [[[a1, a2], [b1, b2], [0, 0]], - [[c1, c2], [d1, d2], [e1, e2]]] - Length.data = [2, 3] - - Case 3: - - Given a 1-level phi::DenseTensor input(X): - X.lod = [[0, 2, 5]] - X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] - and Input(PadValue): - PadValue.data = [p1, p2] - and attribute 'padded_length' = -1, which mean using the length - of longest input sequence(3 in this case), - then we get phi::DenseTensor: - Out.data = [[[a1, a2], [b1, b2], [p1, p2]], - [[c1, c2], [d1, d2], [e1, e2]]] - Length.data = [2, 3] - - )DOC"); - } -}; - -class SequencePadGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequencePadGradOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::NotFound( - "Input(Out@GRAD) of SequencePadGradOp should not be null.")); - - if (ctx->HasOutput(framework::GradVarName("X"))) { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -template -class SequencePadGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_pad_grad"); - op->SetAttrMap(this->Attrs()); - op->SetInput("X", this->Input("X")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SequencePadGradOpNoNeedBufferVarsInferer, - "X"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(sequence_pad, - ops::SequencePadOp, - ops::SequencePadOpMaker, - ops::SequencePadGradOpMaker, - ops::SequencePadGradOpMaker); -REGISTER_OPERATOR(sequence_pad_grad, - ops::SequencePadGradOp, - ops::SequencePadGradOpNoNeedBufferVarsInferer); - -PD_REGISTER_STRUCT_KERNEL(sequence_pad, - CPU, - ALL_LAYOUT, - ops::SequencePadOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_pad_grad, - CPU, - ALL_LAYOUT, - ops::SequencePadGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cu b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cu deleted file mode 100644 index 910a4eae21f1e..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cu +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_pad_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sequence_pad, - GPU, - ALL_LAYOUT, - ops::SequencePadOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_pad_grad, - GPU, - ALL_LAYOUT, - ops::SequencePadGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h deleted file mode 100644 index d31611b94a658..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/sequence_padding.h" - -namespace paddle { -namespace operators { - -using LoD = framework::LoD; -template -class SequencePadOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto* len_t = ctx.Output("Length"); - out->mutable_data(ctx.GetPlace()); - - PADDLE_ENFORCE_EQ(x->lod().empty(), - false, - platform::errors::NotFound( - "Input(X) phi::DenseTensor of SequencePadOp does not " - "contain LoD information.")); - - const auto* pad_value = ctx.Input("PadValue"); - - int padded_length = ctx.Attr("padded_length"); - - phi::funcs::PaddingLoDTensorFunctor()( - ctx.template device_context(), - *x, - out, - *pad_value, - padded_length, - 0, - false, - phi::funcs::kBatchLengthWidth); - - phi::DenseTensor seq_len; - seq_len.Resize(len_t->dims()); - int64_t* len_data = seq_len.mutable_data(platform::CPUPlace()); - for (size_t i = 1; i < x->lod()[0].size(); ++i) { - len_data[i - 1] = x->lod()[0][i] - x->lod()[0][i - 1]; - } - framework::TensorCopy(seq_len, - ctx.GetPlace(), - ctx.template device_context(), - len_t); - } -}; - -template -class SequencePadGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_x = ctx.Output(framework::GradVarName("X")); - if (d_x) { - const auto* d_out = - ctx.Input(framework::GradVarName("Out")); - d_x->mutable_data(ctx.GetPlace()); - - int padded_length = ctx.Attr("padded_length"); - - phi::funcs::UnpaddingLoDTensorFunctor()( - ctx.template device_context(), - *d_out, - d_x, - padded_length, - 0, - false, - phi::funcs::kBatchLengthWidth); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc index d616bca2c4e3b..2d58d2b32276f 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc @@ -31,10 +31,10 @@ class SequencePoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( in_lod_level, 0, - platform::errors::InvalidArgument("The LoD level of Input(X) should " - "be larger than 0, but received: " - "lod level %u.", - in_lod_level)); + phi::errors::InvalidArgument("The LoD level of Input(X) should " + "be larger than 0, but received: " + "lod level %u.", + in_lod_level)); ctx->SetLoDLevel("Out", in_lod_level - 1); } @@ -126,7 +126,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of output grad must equal to Input(X). But " "received: input rank %u, input shape [%s].", og_dims.size(), @@ -135,7 +135,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( og_dims[i], x_dims[i], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension mismatch between Input(OUT@GRAD) and " "Input(X). Received Input(OUT@GRAD): input rank %u, " "input shape [%s]; received Input(X): input rank %u, " diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc index 6e34f76fbd37d..23ce04ca74262 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc @@ -27,18 +27,18 @@ class SequenceReshapeOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceReshapeOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Output(Out) of SequenceReshapeOp should not be null.")); auto x_dims = ctx->GetInputDim("X"); auto x_numel = product(x_dims); PADDLE_ENFORCE_EQ(x_dims.size(), 2U, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequenceReshapeOp Input(X) should be 2. " "But the rank we received is %d", x_dims.size())); @@ -105,12 +105,12 @@ class SequenceReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequenceReshapeGradOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(X) of SequenceReshapeGradOp should not be null.")); ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h index de530bed0d663..e506b310ea2bb 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h @@ -32,14 +32,14 @@ class SequenceReshapeKernel : public framework::OpKernel { int64_t in_width = in_dims[1]; auto& in_lod = in->lod(); - PADDLE_ENFORCE_EQ(in_lod.empty(), - false, - platform::errors::NotFound( - "Input(X) Tensor of SequenceReshapeOp does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + in_lod.empty(), + false, + phi::errors::NotFound("Input(X) Tensor of SequenceReshapeOp does not " + "contain LoD information.")); PADDLE_ENFORCE_EQ(in_lod.size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) Tensor of SequenceReshapeOp Only support " "one level sequence now. But lod size " "of Input(X) is %d", @@ -47,7 +47,7 @@ class SequenceReshapeKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( (uint64_t)in_dims[0], in_lod[0].back(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of SequenceReshapeOp X.shape[0] and X.lod()[0].back() " "should " "be same. But X.shape[0] = %d, X.lod()[0].back() = %d", @@ -71,7 +71,7 @@ class SequenceReshapeKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( offset * out_width, seq_len * in_width, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Please make sure (sequence_length * dimension) " "can be divided by context Attr(new_dim) with no remainder for " "each sequence. But the %dth sequence is invalid.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h index 2236988025cbc..5b2d22218adf8 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h @@ -31,17 +31,17 @@ class SequenceReverseOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("Input(X) of SequenceReverse must exist")); + phi::errors::NotFound("Input(X) of SequenceReverse must exist")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Y"), true, - platform::errors::NotFound("Output(Y) of SequenceReverse must exist")); + phi::errors::NotFound("Output(Y) of SequenceReverse must exist")); auto x_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE( x_dim.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of SequenceReverseOp Input(X) must be greater " "than or equal to 2. But the Input(X) tensor's rank we received is " "%d", @@ -120,15 +120,15 @@ class SequenceReverseOpKernel : public framework::OpKernel { auto &x = *ctx.Input("X"); auto *y = ctx.Output("Y"); - PADDLE_ENFORCE_EQ(x.lod().empty(), - false, - platform::errors::NotFound( - "Input(X) Tensor of SequenceReverseOp does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + x.lod().empty(), + false, + phi::errors::NotFound("Input(X) Tensor of SequenceReverseOp does not " + "contain LoD information.")); PADDLE_ENFORCE_EQ(x.lod().size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SequenceReverseOp only support one " "level lod. But the Input(X) lod size is %d", x.lod().size())); @@ -156,7 +156,7 @@ class SequenceReverseOpKernel : public framework::OpKernel { PADDLE_ENFORCE_NE( x_data, y_data, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "SequenceReverse Op does not support in-place operation")); if (platform::is_cpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc deleted file mode 100644 index cf7e549134cd0..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc +++ /dev/null @@ -1,204 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_scatter_op.h" - -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using Tensor = phi::DenseTensor; -using LoDTensor = phi::DenseTensor; - -class SequenceScatterOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) The source input of sequence scatter op"); - AddInput("Ids", - "(LoDTensor) The index input of sequence scatter op where X" - " will be updated, must be a LoDTensor"); - AddInput("Updates", - "(LoDTensor) The values to scatter to the input tensor " - "X, must be a LoDTensor with the same LoD information as Ids"); - AddOutput("Out", - "(Tensor) The output tensor of sequence scatter op, which " - "has the same dims as X"); - AddComment(R"DOC( -Sequence Scatter Operator. - -This operator scatters the Updates tensor to the input X. It uses the LoD -information of Ids to select the rows to update, and use the values in Ids as -the columns to update in each row of X. - -Following are cases to better explain how this works: - -Example 1: -Given an all-ones Tensor input(X) - X.data = [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]] - X.dims = [3, 6] -a LoDTensor input(Ids) - Ids.data = [[0], [1], [2], [5], [4], [3], [2], [1], [3], [2], [5], [4]] - Ids.lod = [[0, 3, 8, 12]] -and a Tensor input(Updates) - Updates.data = [[0.3], [0.3], [0.4], [0.1], [0.2], [0.3], [0.4], [0.0], [0.2], [0.3], [0.1], [0.4]] - Updates.lod = [[ 0, 3, 8, 12]] -then we get an output Tensor - Out.data = [[1.3, 1.3, 1.4, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.4, 1.3, 1.2, 1.1], - [1.0, 1.0, 1.3, 1.2, 1.4, 1.1]] - Out.dims = X.dims = [3, 6] -)DOC"); - } -}; - -class SequenceScatterOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - // Enforce has inputs and outputs - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceScatter"); - OP_INOUT_CHECK(ctx->HasInput("Ids"), "Input", "Ids", "SequenceScatter"); - OP_INOUT_CHECK( - ctx->HasInput("Updates"), "Input", "Updates", "SequenceScatter"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceScatter"); - - // Set output dim the same as input - auto ref_dims = ctx->GetInputDim("X"); - ctx->SetOutputDim("Out", ref_dims); - - // Enforce the Updates and Ids are the same shape - auto updates_dim = ctx->GetInputDim("Updates"); - auto ids_dim = ctx->GetInputDim("Ids"); - PADDLE_ENFORCE_EQ( - updates_dim[0], - ids_dim[0], - platform::errors::InvalidArgument( - "The shape of SequenceScatter operator's input Updates and Ids do " - "not match, receive Updates's shape is [%s], Ids's shape is [%s].", - updates_dim, - ids_dim)); - - // Enforce LoD of ids and updates be the same - if (ctx->IsRuntime()) { - framework::Variable* ids_var = - PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Ids")[0]); - framework::Variable* updates_var = - PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Updates")[0]); - - auto& ids_lod = ids_var->Get().lod(); - auto& updates_lod = updates_var->Get().lod(); - PADDLE_ENFORCE_EQ( - ids_lod.size(), - 1, - platform::errors::InvalidArgument( - "The SequenceScatter operator’s Input Ids holds wrong LoD " - "information. Currently SequenceScatter operator can only deal " - "with one level LoD for input Ids, but received LoD level is %d.", - ids_lod.size())); - PADDLE_ENFORCE_EQ( - updates_lod.size(), - 1, - platform::errors::InvalidArgument( - "The SequenceScatter operator’s Input Updates holds wrong LoD " - "information. Currently SequenceScatter operator can only deal " - "with one level LoD for input Updates, but received LoD level is " - "%d.", - ids_lod.size())); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -class SequenceScatterGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - ctx->SetOutputDim(framework::GradVarName("Updates"), - ctx->GetInputDim("Updates")); - ctx->SetOutputDim(framework::GradVarName("X"), - ctx->GetInputDim(framework::GradVarName("Out"))); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - platform::CPUPlace()); - } -}; - -template -class SequenceScatterGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_scatter_grad"); - op->SetInput("Ids", this->Input("Ids")); - op->SetInput("Updates", this->Input("Updates")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetOutput(framework::GradVarName("Updates"), - this->InputGrad("Updates")); - op->SetAttrMap(this->Attrs()); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SequenceScatterGradNoNeedBufferVarsInferer, - "Updates"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(sequence_scatter, - ops::SequenceScatterOp, - ops::SequenceScatterOpMaker, - ops::SequenceScatterGradMaker, - ops::SequenceScatterGradMaker); -REGISTER_OPERATOR(sequence_scatter_grad, - ops::SequenceScatterGradOp, - ops::SequenceScatterGradNoNeedBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(sequence_scatter, - CPU, - ALL_LAYOUT, - ops::SequenceScatterOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_scatter_grad, - CPU, - ALL_LAYOUT, - ops::SequenceScatterGradientOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h deleted file mode 100644 index 389b630015e6f..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/scatter.h" - -namespace paddle { -namespace operators { - -using Tensor = phi::DenseTensor; -using LoDTensor = phi::DenseTensor; - -template -class SequenceScatterOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* ids = ctx.Input("Ids"); - auto* updates = ctx.Input("Updates"); - auto* out = ctx.Output("Out"); - - auto& ids_lod = ids->lod(); - PADDLE_ENFORCE_EQ(ids_lod.empty(), - false, - platform::errors::InvalidArgument( - "Input(Ids) Tensor of SequenceScatter operator does " - "not contain LoD information.")); - - // Initialize out as same as x - out->mutable_data(ctx.GetPlace()); - framework::TensorCopySync(*x, ctx.GetPlace(), out); - - auto x_dims = x->dims(); - auto out_dims = out->dims(); - - for (int i = 0; i < x_dims.size(); ++i) - PADDLE_ENFORCE_EQ(x_dims[i], - out_dims[i], - platform::errors::InvalidArgument( - "Input(X) and output(Out) shape of SequenceScatter " - "operator do not match. Received input(X)'s shape " - "is [%s], output(Out)'s shape is [%s].", - x_dims, - out_dims)); - - size_t slice_size = 1; - for (int i = 1; i < x_dims.size(); ++i) slice_size *= x_dims[i]; - - auto lod_vec = ids_lod[0]; - unsigned int seg = 0; - for (int i = 0; i < ids->dims()[0]; ++i) { - PADDLE_ENFORCE_LT( - seg, - lod_vec.size() - 1, - platform::errors::OutOfRange("The segment index is out of bound in " - "SequenceScatter operator, it must be " - "less than batch size. The segment " - "index is %d, the batch size is %d.", - seg, - lod_vec.size())); - int lower_bound = lod_vec[seg]; - int upper_bound = lod_vec[seg + 1]; - if (i >= lower_bound && i < upper_bound) { - T* p_out = out->data(); - const T* p_updates = updates->data(); - const int64_t* p_index = ids->data(); - p_out[seg * slice_size + p_index[i]] += p_updates[i]; - } else { - ++seg; - --i; - } - } - } -}; - -template -class SequenceScatterGradientOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_cpu_place(ctx.GetPlace()), - true, - platform::errors::Unimplemented("Device dose not match. The " - "SequenceScatterGradientOpKernel can " - "only run on CPU device.")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dUpdates = ctx.Output(framework::GradVarName("Updates")); - auto* ids = ctx.Input("Ids"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - - auto& ids_lod = ids->lod(); - - dX->mutable_data(ctx.GetPlace()); - framework::TensorCopySync(*dOut, ctx.GetPlace(), dX); - dUpdates->mutable_data(ctx.GetPlace()); - - auto dx_dims = dX->dims(); - auto dout_dims = dOut->dims(); - - for (int i = 0; i < dx_dims.size(); ++i) - PADDLE_ENFORCE_EQ(dx_dims[i], - dout_dims[i], - platform::errors::InvalidArgument( - "Input(Out@GRAD) and output(X@GRAD) shape of " - "SequenceScatterGradient operator do not match. " - "Received input(Out@GRAD)'s shape is [%s], " - "output(X@GRAD)'s shape is [%s].", - dout_dims, - dx_dims)); - - size_t slice_size = 1; - for (int i = 1; i < dx_dims.size(); ++i) slice_size *= dx_dims[i]; - - auto lod_vec = ids_lod[0]; - unsigned int seg = 0; - - for (int i = 0; i < ids->dims()[0]; ++i) { - PADDLE_ENFORCE_LT( - seg, - lod_vec.size() - 1, - platform::errors::OutOfRange( - "The segment index is out of bound in SequenceScatterGradient " - "operator, it must be less than batch size. The segment index is " - "%d, the batch size is %d.", - seg, - lod_vec.size())); - int lower_bound = lod_vec[seg]; - int upper_bound = lod_vec[seg + 1]; - if (i >= lower_bound && i < upper_bound) { - const T* p_dOut = dOut->data(); - const int64_t* p_index = ids->data(); - T* p_dUpdates = dUpdates->data(); - p_dUpdates[i] = p_dOut[seg * slice_size + p_index[i]]; - } else { - ++seg; - --i; - } - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc deleted file mode 100644 index ed6e53b9ca7e8..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc +++ /dev/null @@ -1,176 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_slice_op.h" - -#include - -namespace paddle { -namespace operators { - -class SequenceSliceOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceSlice"); - OP_INOUT_CHECK(ctx->HasInput("Offset"), "Input", "Offset", "SequenceSlice"); - OP_INOUT_CHECK(ctx->HasInput("Length"), "Input", "Length", "SequenceSlice"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceSlice"); - auto input_dims = ctx->GetInputDim("X"); - - auto offset_dim = ctx->GetInputDim("Offset"); - auto length_dim = ctx->GetInputDim("Length"); - - PADDLE_ENFORCE_EQ( - offset_dim.size(), - 2UL, - platform::errors::InvalidArgument( - "Input Offset dimension error. SequenceSlice operator only support " - "one level sequence now, the dimension of input Offset must be 2, " - "but received dimension is %d.", - offset_dim.size())); - PADDLE_ENFORCE_EQ( - length_dim.size(), - 2UL, - platform::errors::InvalidArgument( - "Input Length dimension error. SequenceSlice operator only support " - "one level sequence now, the dimension of input Length must be 2, " - "but received dimension is %d.", - offset_dim.size())); - - // Initialize the output's dims to maximum, - // and re-set to real dims by the value of Offset and Length at kernel - ctx->SetOutputDim("Out", input_dims); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class SequenceSliceGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), - "Input", - framework::GradVarName("Out"), - "SequenceSliceGrad"); - OP_INOUT_CHECK(ctx->HasOutputs(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "SequenceSliceGrad"); - ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); - } -}; - -class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(LoDTensor), " - "the input of SequenceSliceOp."); - AddInput("Offset", - "(Tensor), " - "a vector to describe the offset of every input sequence for " - "sub sequence item."); - AddInput("Length", - "(Tensor), " - "a vector to describe the length of every input sequence for " - "sub sequence item."); - AddOutput("Out", "(LoDTensor), the output of SequenceSliceOp."); - AddComment(R"DOC( -Sequence slice operator - -The operator crops a subsequence from given sequence with given start offset and subsequence length. -It only supports sequence (LoD Tensor with level number is 1). -- Case: - X = [[a1, a2; - b1, b2; - c1, c2] - [d1, d2; - e1, e2]] - LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2) - Offset = [[0], [1]]; Length = [[2], [1]] - - Out = [[a1, a2; - b1, b2] - [e1, e2]] - LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2) -NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0. - )DOC"); - } -}; - -template -class SequenceSliceGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_slice_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Offset", this->Input("Offset")); - op->SetInput("Length", this->Input("Length")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SequenceSliceGradNoNeedBufferVarsInferer, - "X"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(sequence_slice, - ops::SequenceSliceOp, - ops::SequenceSliceOpMaker, - ops::SequenceSliceGradOpMaker, - ops::SequenceSliceGradOpMaker); -REGISTER_OPERATOR(sequence_slice_grad, - ops::SequenceSliceGradOp, - ops::SequenceSliceGradNoNeedBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(sequence_slice, - CPU, - ALL_LAYOUT, - ops::SequenceSliceOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_slice_grad, - CPU, - ALL_LAYOUT, - ops::SequenceSliceGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cu b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cu deleted file mode 100644 index 407eb2e3ad7db..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cu +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_slice_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sequence_slice, - GPU, - ALL_LAYOUT, - ops::SequenceSliceOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_slice_grad, - GPU, - ALL_LAYOUT, - ops::SequenceSliceGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h deleted file mode 100644 index 50a3e97633475..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ /dev/null @@ -1,216 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/strided_memcpy.h" - -namespace paddle { -namespace operators { - -using Tensor = phi::DenseTensor; -using LoDTensor = phi::DenseTensor; -using LoD = framework::LoD; - -template -inline LoD SequenceSliceLoD(const T& in, - const int64_t* offset_data, - const int64_t* length_data) { - auto out_lod = in.lod(); - size_t lod_offset = 0; - - auto n = in.lod()[0].size() - 1; - out_lod[0][0] = 0; - for (size_t i = 0; i < n; ++i) { - lod_offset += length_data[i]; - out_lod[0][i + 1] = lod_offset; - } - return out_lod; -} - -template -class SequenceSliceOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* offset = ctx.Input("Offset"); - auto* length = ctx.Input("Length"); - auto* out = ctx.Output("Out"); - - auto lod = in->lod(); - PADDLE_ENFORCE_EQ(lod.empty(), - false, - platform::errors::InvalidArgument( - "Input(X) Tensor of SequenceSlice operator does not " - "contain LoD information.")); - - PADDLE_ENFORCE_EQ( - lod.size(), - 1UL, - platform::errors::InvalidArgument( - "LoD information error. SequenceSlice operator only support one " - "level sequence now, but received LoD level is %d.", - lod.size())); - auto n = lod[0].size() - 1; - PADDLE_ENFORCE_EQ( - n, - static_cast(length->dims()[0]), - platform::errors::InvalidArgument( - "Input length shape error. The length of input LoD sequence and " - "input length-array‘s first dimension should be equal, but the LoD " - "sequence length is %d, the length-array‘s first dimension is %d.", - n, - static_cast(length->dims()[0]))); - PADDLE_ENFORCE_EQ( - n, - static_cast(offset->dims()[0]), - platform::errors::InvalidArgument( - "Input offset shape error. The length of input LoD sequence and " - "input offset-array‘s first dimension should be equal, but the LoD " - "sequence length is %d, the offset-array‘s first dimension is %d.", - n, - static_cast(offset->dims()[0]))); - - const int64_t* offset_data = offset->data(); - const int64_t* length_data = length->data(); - phi::DenseTensor offset_cpu; - phi::DenseTensor length_cpu; - - if (platform::is_gpu_place(ctx.GetPlace())) { - offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); - framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu); - offset_data = offset_cpu.data(); - - length_cpu.mutable_data(length->dims(), platform::CPUPlace()); - framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu); - length_data = length_cpu.data(); - } - - for (size_t i = 0; i < n; ++i) { - PADDLE_ENFORCE_LE(0, - offset_data[i], - platform::errors::InvalidArgument( - "The input offset[%d]'s value is negative, its " - "value is %d, expect it to be non-negative.", - i, - offset_data[i])); - PADDLE_ENFORCE_LE(0, - length_data[i], - platform::errors::InvalidArgument( - "The input length[%d]'s value is negative, its " - "value is %d, expect it to be non-negative.", - i, - offset_data[i])); - PADDLE_ENFORCE_LE( - lod[0][i] + offset_data[i] + length_data[i], - lod[0][i + 1], - platform::errors::OutOfRange( - "The slice end index of target tensor is out of range. expect it " - "less than or equal to %d, but the actual slice end index is %d.", - lod[0][i + 1], - lod[0][i] + offset_data[i] + length_data[i])); - } - - out->mutable_data(ctx.GetPlace()); - auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); - auto out_dims = in->dims(); - out_dims[0] = out_lod[0][out_lod[0].size() - 1]; - out->Resize(out_dims); - out->set_lod(out_lod); - - auto in_stride = common::stride(in->dims()); - auto out_stride = common::stride(out->dims()); - - size_t out_offset = 0; - for (size_t i = 0; i < n; ++i) { - if (length_data[i] == 0) continue; - Tensor in_t = in->Slice( - static_cast(lod[0][i] + offset_data[i]), - static_cast(lod[0][i] + offset_data[i] + length_data[i])); - - phi::funcs::StridedMemcpy(ctx.device_context(), - in_t.data(), - in_stride, - in_t.dims(), - out_stride, - out->data() + out_offset); - out_offset += length_data[i] * in_stride[0]; - } - } -}; - -template -class SequenceSliceGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* offset = ctx.Input("Offset"); - auto* length = ctx.Input("Length"); - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); - - const int64_t* offset_data = offset->data(); - const int64_t* length_data = length->data(); - phi::DenseTensor offset_cpu; - phi::DenseTensor length_cpu; - - if (platform::is_gpu_place(ctx.GetPlace())) { - offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); - framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu); - offset_data = offset_cpu.data(); - - length_cpu.mutable_data(length->dims(), platform::CPUPlace()); - framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu); - length_data = length_cpu.data(); - } - - auto lod = in->lod(); - // to avoid out_grad missing lod, compute lod again - auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); - - if (x_grad) { - x_grad->mutable_data(ctx.GetPlace()); - x_grad->set_lod(in->lod()); - phi::funcs::SetConstant set_zero; - set_zero(ctx.template device_context(), - x_grad, - static_cast(0)); - - for (size_t i = 0; i < out_lod[0].size() - 1; ++i) { - if (length_data[i] == 0) continue; - Tensor out_grad_t = - out_grad->Slice(static_cast(out_lod[0][i]), - static_cast(out_lod[0][i + 1])); - auto out_grad_stride = common::stride(out_grad_t.dims()); - - auto x_grad_stride = common::stride(x_grad->dims()); - - Tensor x_grad_t = x_grad->Slice( - static_cast(lod[0][i] + offset_data[i]), - static_cast(lod[0][i] + offset_data[i] + length_data[i])); - - phi::funcs::StridedMemcpy(ctx.device_context(), - out_grad_t.data(), - out_grad_stride, - out_grad_t.dims(), - x_grad_stride, - x_grad_t.data()); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index 01f7bb3e92890..0a4d5a69a8e2b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -36,7 +36,7 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], static_cast(lod[level].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(X) should be equal to the sum of all " "sequences' lengths. But received first dimension of Input(X) is " "%d, the sum of all sequences' lengths is %d.", @@ -44,7 +44,7 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel { static_cast(lod[level].back()))); PADDLE_ENFORCE_EQ(dims[0], x->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of each timestep in Input(X) of " "SequenceSoftmaxOp should be 1.")); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 12d4f72a91169..5fbbd49a88521 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -111,7 +111,7 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( out_dim, out_grad_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The shape of Input(Out) and Input(Out@GRAD) of " "SequenceSoftmaxGrad operator do not match. The Input(Out)'s shape " "is [%s], the Input(Out@GRAD)'s shape is [%s].", diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h index 6c6f1b69c8196..ee372e6a9d382 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h @@ -97,7 +97,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { auto dims = x->dims(); PADDLE_ENFORCE_EQ(lod.empty(), false, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) phi::DenseTensor of SequenceSoftmax " "operator does not contain " "LoD information.")); @@ -106,7 +106,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], static_cast(lod[level].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The first dimension of Input(X) should be equal to the sum of all " "sequences' lengths. But the first dimension of Input(X) is %d, " "the sum of all sequences' lengths is %d.", @@ -115,7 +115,7 @@ class SequenceSoftmaxKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( dims[0], x->numel(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The width of each timestep in Input(X) of SequenceSoftmax " "operator should be 1. But the first dimension of Input(X) is %d, " "the number of elements is %d.", diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc deleted file mode 100644 index 4b19faea335bf..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc +++ /dev/null @@ -1,213 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h" - -#include -#include - -namespace paddle { -namespace operators { - -class SequenceUnpadOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequenceUnpadOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Length"), - true, - platform::errors::NotFound( - "Input(Length) of SequenceUnpadOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "Output(Out) of SequenceUnpadOp should not be null.")); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_GE(x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The rank of Input(X) can't be less than 2. But the " - "rank we received is %d", - x_dims.size())); - - auto len_dims = ctx->GetInputDim("Length"); - PADDLE_ENFORCE_EQ(len_dims.size(), - 1, - platform::errors::InvalidArgument( - "The rank of SequenceUnpadOp Input(Length) should " - "be 1. But the rank we received is %d", - len_dims.size())); - PADDLE_ENFORCE_EQ( - len_dims[0], - x_dims[0], - platform::errors::InvalidArgument( - "The 1st dimension of SequenceUnpadOp Input(X) and Input(Length)" - "should be same. But the 1st dimension of " - "Input(X) is %d, Input(Length) is %d", - x_dims[0], - len_dims[0])); - - int64_t out_dim_0 = -1; - if (ctx->IsRuntime()) { - out_dim_0 = x_dims[0] * x_dims[1]; - } - - std::vector out_dims_vec{out_dim_0}; - if (x_dims.size() == 2) { - out_dims_vec.push_back(1); - } else { - for (int i = 2; i < x_dims.size(); ++i) { - out_dims_vec.push_back(x_dims[i]); - } - } - ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); - if (!ctx->IsRuntime()) { - ctx->SetLoDLevel("Out", 1); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -class SequenceUnpadOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(LoDTensor, default LoDTensor) Input tensor which " - "contains the padded sequences with equal length."); - AddInput( - "Length", - "(LoDTensor) The input tensor which specifies the actual length of " - "sequences after unpadding."); - AddOutput( - "Out", - "(LoDTensor) The output tensor which contains unpadded sequences."); - AddComment(R"DOC( - Sequence Unpad Operator - - This operator removes the padding data in the input sequences and convert - them into sequences with actual length as output, identified by lod - information. - - Example: - - Given input tensor Input(X): - X.data = [[ 1.0, 2.0, 3.0, 4.0, 5.0], - [ 6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 13.0, 14.0, 15.0]], -` - in which there are 3 sequences padded to length 5, and the actual length - specified by Input(Length): - - Length.data = [2, 3, 4], - - after unpadding, Output(Out) will be: - - Out.data = [[1.0, 2.0, 6.0, 7.0, 8.0, 11.0, 12.0, 13.0, 14.0]] - Out.lod = [[0, 2, 5, 9]] - - )DOC"); - } -}; - -class SequenceUnpadGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequenceUnpadGradOp should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::NotFound( - "Input(Out@GRAD) of SequenceUnpadGradOp should not be null.")); - - if (ctx->HasOutput(framework::GradVarName("X"))) { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -template -class SequenceUnpadGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("sequence_unpad_grad"); - op->SetAttrMap(this->Attrs()); - op->SetInput("X", this->Input("X")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(SequenceUnpadGradOpNoNeedBufferVarsInferer, - "X"); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(sequence_unpad, - ops::SequenceUnpadOp, - ops::SequenceUnpadOpMaker, - ops::SequenceUnpadGradOpMaker, - ops::SequenceUnpadGradOpMaker); -REGISTER_OPERATOR(sequence_unpad_grad, - ops::SequenceUnpadGradOp, - ops::SequenceUnpadGradOpNoNeedBufferVarsInferer); -PD_REGISTER_STRUCT_KERNEL(sequence_unpad, - CPU, - ALL_LAYOUT, - ops::SequenceUnpadOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad, - CPU, - ALL_LAYOUT, - ops::SequenceUnpadGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cu b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cu deleted file mode 100644 index 8ba8b380c0976..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cu +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL(sequence_unpad, - GPU, - ALL_LAYOUT, - ops::SequenceUnpadOpKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_STRUCT_KERNEL(sequence_unpad_grad, - GPU, - ALL_LAYOUT, - ops::SequenceUnpadGradOpKernel, - float, - double, - int, - int64_t) {} diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h deleted file mode 100644 index cc38fd510ef1e..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/sequence_padding.h" - -namespace paddle { -namespace operators { - -using LoDTensor = phi::DenseTensor; -using LoD = framework::LoD; - -template -class SequenceUnpadOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_t = ctx.Input("X"); - auto* len_t = ctx.Input("Length"); - auto* out_t = ctx.Output("Out"); - - auto& dev_ctx = ctx.template device_context(); - phi::DenseTensor seq_len_cpu = - ctx.AllocateTmpTensor(len_t->dims(), dev_ctx); - if (platform::is_gpu_place(ctx.GetPlace()) || - platform::is_xpu_place(ctx.GetPlace())) { - seq_len_cpu.mutable_data(platform::CPUPlace()); - framework::TensorCopySync(*len_t, platform::CPUPlace(), &seq_len_cpu); - } else { - seq_len_cpu = *len_t; - } - - const int64_t* seq_len_ptr = seq_len_cpu.data(); - int64_t batch_size = len_t->dims()[0]; - std::vector out_lod0(batch_size + 1, 0); - for (int64_t i = 0; i < batch_size; ++i) { - out_lod0[i + 1] = out_lod0[i] + static_cast(seq_len_ptr[i]); - } - - framework::LoD out_lod; - out_lod.push_back(out_lod0); - out_t->set_lod(out_lod); - std::vector out_dims_vec{static_cast(out_lod0.back())}; - if (x_t->dims().size() == 2) { - out_dims_vec.push_back(1); - } else { - for (int i = 2; i < x_t->dims().size(); ++i) { - out_dims_vec.push_back(x_t->dims()[i]); - } - } - out_t->Resize(common::make_ddim(out_dims_vec)); - - // after set the lod of output, allocate the memory - out_t->mutable_data(ctx.GetPlace()); - - int64_t padded_length = x_t->dims()[1]; - phi::funcs::UnpaddingLoDTensorFunctor()( - dev_ctx, - *x_t, - out_t, - padded_length, - 0, - false, - phi::funcs::kBatchLengthWidth); - } -}; - -template -class SequenceUnpadGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_x = ctx.Output(framework::GradVarName("X")); - if (d_x) { - const auto* d_out = ctx.Input(framework::GradVarName("Out")); - d_x->mutable_data(ctx.GetPlace()); - - int padded_length = d_x->dims()[1]; - - LoDTensor zero_pads; - zero_pads.Resize({1, 1}); - zero_pads.mutable_data(ctx.GetPlace()); - phi::funcs::SetConstant set_zero; - auto& dev_ctx = ctx.template device_context(); - set_zero(dev_ctx, &zero_pads, static_cast(0)); - - phi::funcs::PaddingLoDTensorFunctor()( - ctx.template device_context(), - *d_out, - d_x, - zero_pads, - padded_length, - 0, - false, - phi::funcs::kBatchLengthWidth); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_unpad_op_xpu.cc deleted file mode 100644 index c875cdc37e80b..0000000000000 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op_xpu.cc +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_XPU - -#include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h" - -namespace ops = paddle::operators; -PD_REGISTER_STRUCT_KERNEL( - sequence_unpad, XPU, ALL_LAYOUT, ops::SequenceUnpadOpKernel, float) {} - -#endif diff --git a/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake b/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake index a22e6865cf103..460c845029a96 100644 --- a/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake +++ b/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake @@ -6,7 +6,6 @@ # in combination rule, you can remove the source file from the following rules. register_unity_group( cc - sequence_concat_op.cc sequence_conv_op.cc sequence_enumerate_op.cc sequence_erase_op.cc diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index 5eeb356817a2a..268f7457f2136 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -189,7 +189,7 @@ class SetValueGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT( in_dims.size(), 7, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of set_value_grad operator's input should be less " "than 7, but received dimension is %d.", in_dims.size())); diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 7d5fd042bb0fb..85022ead0e905 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -22,10 +22,10 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/assign_value_op.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { @@ -68,7 +68,7 @@ inline void CheckIsDimsMatch(const framework::DDim first, return; } } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The shape of tensor assigned value must match the shape " "of target shape: %d, but now shape is %d.", second.to_str(), diff --git a/paddle/fluid/operators/share_data_op.cc b/paddle/fluid/operators/share_data_op.cc index b780ccba920c0..4accee24e17fa 100644 --- a/paddle/fluid/operators/share_data_op.cc +++ b/paddle/fluid/operators/share_data_op.cc @@ -33,12 +33,12 @@ class ShareDataOp : public framework::OperatorWithKernel { in_type == framework::proto::VarType::LOD_TENSOR || in_type == framework::proto::VarType::SELECTED_ROWS, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Type of Variable[X] must be phi::DenseTensor or SelectedRows!")); PADDLE_ENFORCE_EQ( in_type, out_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The type of input (X) and output (Out) are inconsistent.")); ctx->ShareDim("X", "Out"); @@ -80,4 +80,4 @@ PD_REGISTER_STRUCT_KERNEL(share_data, int64_t, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/share_data_op.cu b/paddle/fluid/operators/share_data_op.cu index 7e67b491834ea..2b1c32d655b80 100644 --- a/paddle/fluid/operators/share_data_op.cu +++ b/paddle/fluid/operators/share_data_op.cu @@ -27,4 +27,4 @@ PD_REGISTER_STRUCT_KERNEL(share_data, int64_t, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc deleted file mode 100644 index e883ba8e83092..0000000000000 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ /dev/null @@ -1,225 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include "paddle/fluid/operators/array_operator.h" -#include "paddle/phi/core/lod_utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace framework { -class OpDesc; -class Scope; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -class ShrinkRNNMemoryOp : public ArrayOp { - public: - ShrinkRNNMemoryOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : ArrayOp(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - auto *x_var = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL(x_var, - platform::errors::NotFound( - "Input(X) of ShrinkRNNMemoryOp is not found.")); - auto &x_tensor = x_var->Get(); - size_t offset = this->GetOffset(scope, place); - auto *rank_table_var = scope.FindVar(Input("RankTable")); - PADDLE_ENFORCE_NOT_NULL( - rank_table_var, - platform::errors::NotFound( - "Input(RankTable) of ShrinkRNNMemoryOp is not found.")); - auto &rank_table = rank_table_var->Get(); - - auto &rank_items = rank_table.items(); - int dst_num_rows = static_cast( - std::lower_bound(rank_items.begin(), - rank_items.end(), - offset, - [](const framework::LoDRankTable::TableItem &a, - size_t b) { return a.length > b; }) - - rank_items.begin()); - - auto *out_var = scope.FindVar(Output("Out")); - PADDLE_ENFORCE_NOT_NULL( - out_var, - platform::errors::NotFound( - "Output(Out) of ShrinkRNNMemoryOp is not found.")); - auto &out_tensor = *out_var->GetMutable(); - - size_t height = dst_num_rows; - - // do shrink for the top level LoD - if (!x_tensor.lod().empty() && - x_tensor.lod()[0].size() > static_cast(dst_num_rows)) { - auto lod_offset = framework::GetSubLoDAndAbsoluteOffset( - x_tensor.lod(), 0, dst_num_rows, 0); - height = lod_offset.second.second; - auto out_lod = out_tensor.mutable_lod(); - phi::AppendLoD(out_lod, lod_offset.first); - } - - if (dst_num_rows != 0) { - out_tensor.mutable_data(place, x_tensor.dtype()); - auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); - framework::TensorCopy(x_tensor.Slice(0, static_cast(height)), - place, - *dev_ctx, - &out_tensor); - } - } -}; - -class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(phi::DenseTensor) The RNN step memory to be shrank."); - AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN."); - AddInput( - "I", - "(phi::DenseTensor) The step index. The RNN step memory 'X' will be " - "shrank to match the size of the input of the index'th step."); - AddOutput("Out", "(phi::DenseTensor) The shrank RNN step memory."); - AddComment(R"DOC( -This operator is used to shrink output batch of memory defined in dynamic RNN. - -Dynamic RNN is able to handle variable-length sequences, in which, sequences in -a mini-batch are sorted by their lengths first. After that, the longest sequence -becomes the first one in the sorted batch, followed by the second longest, the -third longest, and so on. Dynamic RNN then slices a batch input timestep by -timestep from the sorted input. Once any sequence in the input batch reaches its -end, memory defined in dynamicRNN has to shrink its outputs to adapt to the input -batch size for the next time step. -)DOC"); - } -}; - -class ShrinkRNNMemoryInferShape : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *context) const override { - OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "ShrinkRNNMemory"); - OP_INOUT_CHECK(context->HasInput("I"), "Input", "I", "ShrinkRNNMemory"); - OP_INOUT_CHECK(context->HasInput("RankTable"), - "Input", - "RankTable", - "ShrinkRNNMemory"); - context->SetOutputDim("Out", context->GetInputDim("X")); - // For runtime, output's lod is computed according to input's lod, but - // remove the finished sequence. It is set in detail kernel implementation. - if (!context->IsRuntime()) { - context->ShareLoD("X", /*->*/ "Out"); - } - } -}; - -class ShrinkRNNMemoryGradOp : public ArrayOp { - public: - ShrinkRNNMemoryGradOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : ArrayOp(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - auto *dout_var = scope.FindVar(Input(framework::GradVarName("Out"))); - auto *dx_var = scope.FindVar(Output(framework::GradVarName("X"))); - PADDLE_ENFORCE_NOT_NULL( - dx_var, - platform::errors::NotFound( - "Input(X@GRAD) of ShrinkRNNMemoryGradOp is not found.")); - auto *x_var = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL( - x_var, - platform::errors::NotFound( - "Input(x) of ShrinkRNNMemoryGradOp is not found.")); - auto &x_tensor = x_var->Get(); - auto &dx_tensor = *dx_var->GetMutable(); - dx_tensor.Resize(x_tensor.dims()); - dx_tensor.mutable_data(x_tensor.place(), x_tensor.dtype()); - - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - - if (dout_var == nullptr) { // dx_tensor fill zero - phi::funcs::set_constant(dev_ctx, &dx_tensor, 0.0f); - } else { - auto &dout_tensor = dout_var->Get(); - auto height = dout_tensor.dims()[0]; - auto slice = dx_tensor.Slice(0, static_cast(height)); - framework::TensorCopy(dout_tensor, dout_tensor.place(), dev_ctx, &slice); - if (dx_tensor.dims()[0] > height) { - auto rest_tensor = dx_tensor.Slice( - static_cast(height), static_cast(dx_tensor.dims()[0])); - phi::funcs::set_constant(dev_ctx, &rest_tensor, 0.0f); - } - } - dx_tensor.set_lod(x_tensor.lod()); - } -}; - -class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *context) const override { - OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "ShrinkRNNMemoryGrad"); - OP_INOUT_CHECK(context->HasOutput(framework::GradVarName("X")), - "Output", - "X", - "ShrinkRNNMemoryGrad"); - - context->ShareDim("X", /*->*/ framework::GradVarName("X")); - context->ShareLoD("X", /*->*/ framework::GradVarName("X")); - } -}; - -template -class ShrinkRNNGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("shrink_rnn_memory_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(shrink_rnn_memory, - ops::ShrinkRNNMemoryOp, - ops::ShrinkRNNMemoryInferShape, - ops::ShrinkRNNMemoryOpProtoMaker, - ops::ShrinkRNNGradOpMaker, - ops::ShrinkRNNGradOpMaker); -REGISTER_OPERATOR(shrink_rnn_memory_grad, - ops::ShrinkRNNMemoryGradOp, - ops::ShrinkRNNMemoryGradInferShape); diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc index 0b5a7bf5540ab..1f1415aa995fd 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ b/paddle/fluid/operators/shuffle_batch_op.cc @@ -38,26 +38,23 @@ class ShuffleBatchOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::NotFound("Input(X) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Seed"), - true, - platform::errors::NotFound("Input(Seed) should not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Out"), - true, - platform::errors::NotFound("Output(Out) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Seed"), + true, + phi::errors::NotFound("Input(Seed) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), + true, + phi::errors::NotFound("Output(Out) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("ShuffleIdx"), true, - platform::errors::NotFound("Output(ShuffleIdx) should not be null.")); + phi::errors::NotFound("Output(ShuffleIdx) should not be null.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("SeedOut"), true, - platform::errors::NotFound("Output(SeedOut) should not be null.")); + phi::errors::NotFound("Output(SeedOut) should not be null.")); ctx->ShareDim("X", "Out"); ctx->ShareLoD("X", "Out"); @@ -122,15 +119,15 @@ class ShuffleBatchOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("ShuffleIdx"), true, - platform::errors::NotFound("Input(ShuffleIdx) should not be null")); + phi::errors::NotFound("Input(ShuffleIdx) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound("Grad Input(Out) should not be null")); + phi::errors::NotFound("Grad Input(Out) should not be null")); PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::NotFound("Grad Output(X) should not be null")); + phi::errors::NotFound("Grad Output(X) should not be null")); ctx->ShareDim(framework::GradVarName("Out"), framework::GradVarName("X")); ctx->ShareLoD(framework::GradVarName("Out"), framework::GradVarName("X")); diff --git a/paddle/fluid/operators/shuffle_channel_op.cc b/paddle/fluid/operators/shuffle_channel_op.cc index c8f9d9469848e..f95bed3bed5ef 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cc +++ b/paddle/fluid/operators/shuffle_channel_op.cc @@ -29,7 +29,7 @@ class ShuffleChannelOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim("Out", input_dims); } @@ -55,10 +55,10 @@ class ShuffleChannelOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("group", "the number of groups.") .SetDefault(1) .AddCustomChecker([](const int& group) { - PADDLE_ENFORCE_GE(group, - 1, - platform::errors::InvalidArgument( - "group should be larger than 0.")); + PADDLE_ENFORCE_GE( + group, + 1, + phi::errors::InvalidArgument("group should be larger than 0.")); }); AddComment(R"DOC( Shuffle Channel operator @@ -83,7 +83,7 @@ class ShuffleChannelGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( input_dims.size(), 4, - platform::errors::InvalidArgument("The layout of input is NCHW.")); + phi::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim(framework::GradVarName("X"), input_dims); } diff --git a/paddle/fluid/operators/similarity_focus_op.cc b/paddle/fluid/operators/similarity_focus_op.cc deleted file mode 100644 index 4508459f25514..0000000000000 --- a/paddle/fluid/operators/similarity_focus_op.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/similarity_focus_op.h" - -namespace paddle { -namespace operators { -class SimilarityFocusOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), a 4-D tensor with shape," - " [BatchSize, X, Y, Z]"); - AddOutput("Out", - "(Tensor, default Tensor), the similarity focus mask" - " with the same shape of input X."); - AddAttr("axis", - "(int32), indicating the dimension to be select. It can" - " only be 1, 2, or 3."); - AddAttr>("indexes", - "(std::vector), indicating the indexes" - " of the selected dimension."); - AddComment(R"DOC( -SimilarityFocus Operator. - -Generate a similarity focus mask with the same shape of input using the following method: -1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding - to the axis according to the indexes. For example, if axis=1 and indexes=[a], - it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X - is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). -2. For each index, find the largest numbers in the tensor T, so that the same - row and same column has at most one number(what it means is that if the - largest number has been found in the i-th row and the j-th column, then - the numbers in the i-th row or j-th column will be skipped. And then the - next largest number will be selected from the remaining numbers. Obviously - there will be min(B, C) numbers), and mark the corresponding position of the - 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for - each index. -3. Broadcast the 3-D similarity focus mask to the same shape of input X. - -Refer to `Similarity Focus Layer `_ -)DOC"); - } -}; - -class SimilarityFocusOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SimilarityFocus"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SimilarityFocus"); - - auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 4, - platform::errors::InvalidArgument( - "The dimension size of Input(X) be 4, but received %d.", - x_dims.size())); - ctx->SetOutputDim("Out", x_dims); - ctx->ShareLoD("X", /*->*/ "Out"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - similarity_focus, - ops::SimilarityFocusOp, - ops::SimilarityFocusOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(similarity_focus, - CPU, - ALL_LAYOUT, - ops::SimilarityFocusKernel, - float, - double) {} diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h deleted file mode 100644 index 32349e9570369..0000000000000 --- a/paddle/fluid/operators/similarity_focus_op.h +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class SimilarityFocusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - phi::DenseTensor* out = context.Output("Out"); - const phi::DenseTensor* x = context.Input("X"); - T* out_data = out->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - - int axis = context.Attr("axis"); - std::vector indexes = context.Attr>("indexes"); - - int64_t batch_size = x->dims()[0]; - int64_t dim[4]; - for (int i = 1; i <= 3; ++i) { - dim[i] = x->dims()[i]; - } - - PADDLE_ENFORCE_GT( - indexes.size(), - 0, - platform::errors::InvalidArgument("The size of Attr(indexes) must be " - "greater than 0, but received %d.", - indexes.size())); - - for (size_t i = 0; i < indexes.size(); i++) { - PADDLE_ENFORCE_GT( - dim[axis], - indexes[i], - platform::errors::InvalidArgument( - "Each value of Attr(indexes) must be less than X.dim[axis], " - "but indexes[%d] received %d.", - i, - indexes[i])); - } - - int64_t array_size = 1; - for (int i = 1; i <= 3; ++i) { - if (i != axis) { - array_size *= dim[i]; - } - } - - std::vector> array(array_size); - - bool (*cmp)(std::pair, std::pair) = - [](std::pair x, std::pair y) { - return x.first > y.first; - }; - - int64_t (*compute_index)(int64_t*, int, int, int, int) = - [](int64_t* dim, int d1, int d2, int d3, int d4) { - return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + - d3 * dim[3] + d4; - }; - - PADDLE_ENFORCE_GT( - axis, - 0, - platform::errors::InvalidArgument( - "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", - axis)); - PADDLE_ENFORCE_LT( - axis, - 4, - platform::errors::InvalidArgument( - "The value of Attr(axis) must be 1 or 2 or 3, but received %d.", - axis)); - memset(out_data, 0, sizeof(T) * batch_size * dim[1] * dim[2] * dim[3]); - for (int i = 0; i < batch_size; ++i) { - for (auto index : indexes) { - if (axis == 1) { - for (int j = 0; j < dim[2]; ++j) { - for (int k = 0; k < dim[3]; ++k) { - array[j * dim[3] + k] = std::make_pair( - x_data[compute_index(dim, i, index, j, k)], j * dim[3] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag2(dim[2]), tag3(dim[3]); - for (auto x : array) { - int idx2 = x.second / dim[3]; - int idx3 = x.second % dim[3]; - if (tag2[idx2] || tag3[idx3]) { - continue; - } - tag_num++; - tag2[idx2] = true; - tag3[idx3] = true; - for (int j = 0; j < dim[1]; ++j) { - out_data[compute_index(dim, i, j, idx2, idx3)] = 1; - } - if (tag_num == std::min(dim[2], dim[3])) { - break; - } - } - } else if (axis == 2) { - for (int j = 0; j < dim[1]; ++j) { - for (int k = 0; k < dim[3]; ++k) { - array[j * dim[3] + k] = std::make_pair( - x_data[compute_index(dim, i, j, index, k)], j * dim[3] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag1(dim[1]), tag3(dim[3]); - for (auto x : array) { - int idx1 = x.second / dim[3]; - int idx3 = x.second % dim[3]; - if (tag1[idx1] || tag3[idx3]) { - continue; - } - tag_num++; - tag1[idx1] = true; - tag3[idx3] = true; - for (int j = 0; j < dim[2]; ++j) { - out_data[compute_index(dim, i, idx1, j, idx3)] = 1; - } - if (tag_num == std::min(dim[1], dim[3])) { - break; - } - } - } else if (axis == 3) { - for (int j = 0; j < dim[1]; ++j) { - for (int k = 0; k < dim[2]; ++k) { - array[j * dim[2] + k] = std::make_pair( - x_data[compute_index(dim, i, j, k, index)], j * dim[2] + k); - } - } - - std::sort(array.begin(), array.end(), cmp); - int tag_num = 0; - std::vector tag1(dim[1]), tag2(dim[2]); - for (auto x : array) { - int idx1 = x.second / dim[2]; - int idx2 = x.second % dim[2]; - if (tag1[idx1] || tag2[idx2]) { - continue; - } - tag_num++; - tag1[idx1] = true; - tag2[idx2] = true; - for (int j = 0; j < dim[3]; ++j) { - out_data[compute_index(dim, i, idx1, idx2, j)] = 1; - } - if (tag_num == std::min(dim[1], dim[2])) { - break; - } - } - } - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 16b895ce557a7..881e3b59f0db7 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -40,7 +40,7 @@ class SliceOp : public framework::OperatorWithKernel { if (x_var_type == framework::proto::VarType::LOD_TENSOR_ARRAY) { PADDLE_ENFORCE_EQ(axes.size(), 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of axes must be 1 when the Input of " "SliceOp is LoDTensorArray, " "but received %d.", @@ -63,7 +63,7 @@ class SliceOp : public framework::OperatorWithKernel { auto in_dims = ctx->GetInputDim("Input"); PADDLE_ENFORCE_LT(in_dims.size(), 7, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of input should be less than 7.")); framework::DDim out_dims(in_dims); @@ -83,31 +83,31 @@ class SliceOp : public framework::OperatorWithKernel { if (ctx->HasInputs("StartsTensorList")) { starts_size = ctx->Inputs("StartsTensorList").size(); - PADDLE_ENFORCE_GT(starts_size, - 0, - platform::errors::InvalidArgument( - "StartsTensorList size can't be zero")); + PADDLE_ENFORCE_GT( + starts_size, + 0, + phi::errors::InvalidArgument("StartsTensorList size can't be zero")); } if (ctx->HasInputs("EndsTensorList")) { ends_size = ctx->Inputs("EndsTensorList").size(); - PADDLE_ENFORCE_GT(ends_size, - 0, - platform::errors::InvalidArgument( - "EndsTensorList size can't be zero")); + PADDLE_ENFORCE_GT( + ends_size, + 0, + phi::errors::InvalidArgument("EndsTensorList size can't be zero")); } if (!ctx->HasInput("StartsTensor")) { PADDLE_ENFORCE_EQ( starts_size, axes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of starts must be equal to the size of axes.")); } if (!ctx->HasInput("EndsTensor")) { PADDLE_ENFORCE_EQ( ends_size, axes.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of ends must be equal to the size of axes.")); } for (auto &axis : axes) { @@ -143,7 +143,7 @@ class SliceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_tensor.IsInitialized(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The tensor Input (Input) of Slice op is not initialized.")); // NOTE: cuda pinned tensor need to copy its data to target place if (platform::is_cuda_pinned_place(in_tensor.place())) { @@ -304,14 +304,13 @@ class SliceOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), + true, + phi::errors::InvalidArgument("Input should not be null")); PADDLE_ENFORCE_EQ( - ctx->HasInput("Input"), + ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::InvalidArgument("Input should not be null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Input(Out@GRAD) should not be null")); + phi::errors::InvalidArgument("Input(Out@GRAD) should not be null")); auto x_var_type = ctx->GetInputsVarType("Input")[0]; if (x_var_type == framework::proto::VarType::LOD_TENSOR_ARRAY) { // If the var type of input is LOD_TENSOR_ARRAY, diff --git a/paddle/fluid/operators/soft_relu_op.cu b/paddle/fluid/operators/soft_relu_op.cu index 3963b372c9c8e..e4273c73530f6 100644 --- a/paddle/fluid/operators/soft_relu_op.cu +++ b/paddle/fluid/operators/soft_relu_op.cu @@ -39,7 +39,7 @@ PD_REGISTER_STRUCT_KERNEL(soft_relu, ops::SoftReluCudaKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} PD_REGISTER_STRUCT_KERNEL(soft_relu_grad, GPU, @@ -47,5 +47,5 @@ PD_REGISTER_STRUCT_KERNEL(soft_relu_grad, ops::SoftReluGradCudaKernel, float, double, - plat::float16, + phi::dtype::float16, plat::bfloat16) {} diff --git a/paddle/fluid/operators/sparse_attention_op.cc b/paddle/fluid/operators/sparse_attention_op.cc index 26dfc0fbbc64d..6d6a567ab1b61 100644 --- a/paddle/fluid/operators/sparse_attention_op.cc +++ b/paddle/fluid/operators/sparse_attention_op.cc @@ -99,15 +99,15 @@ class SparseAttentionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(dims_q.size(), static_cast(4), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension in query' shapes should be 4.")); - PADDLE_ENFORCE_EQ(dims_k.size(), - static_cast(4), - platform::errors::InvalidArgument( - "Dimension in key' shapes should be 4.")); + PADDLE_ENFORCE_EQ( + dims_k.size(), + static_cast(4), + phi::errors::InvalidArgument("Dimension in key' shapes should be 4.")); PADDLE_ENFORCE_EQ(dims_v.size(), static_cast(4), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dimension in value' shapes should be 4.")); auto batch_size = dims_q[0]; diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index 117de1c1a55df..ec41a829e7f72 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -314,7 +314,7 @@ void SparseSoftmaxForward(const phi::GPUContext& ctx, columns_data, num_rows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The head_dim of query in sparse_attention op should less or equal " "512")); } @@ -412,7 +412,7 @@ void SparseSoftmaxBackward(const phi::GPUContext& ctx, columns_data, num_rows); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The head_dim of query in sparse_attention op should less or equal " "512")); } @@ -425,7 +425,7 @@ inline cudaDataType_t GetGpuType(const VarType::Type data_type) { } else if (data_type == VarType::FP64) { return CUDA_R_64F; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Not support tensor type in sparse_attention OP: %s", framework::DataTypeToString(data_type))); } diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc deleted file mode 100644 index 6b79d5c35b783..0000000000000 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/phi/core/lod_utils.h" - -namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -class Scope; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -struct CopyRange { - size_t begin; - size_t end; -}; - -using LoD = framework::LoD; - -class SplitLoDTensorOp : public framework::OperatorBase { - public: - SplitLoDTensorOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &dev_place) const override { - auto &x = scope.FindVar(Input("X"))->Get(); - auto &mask = scope.FindVar(Input("Mask"))->Get(); - auto *out_true = - scope.FindVar(Output("OutTrue"))->GetMutable(); - auto *out_false = - scope.FindVar(Output("OutFalse"))->GetMutable(); - auto level = static_cast(Attr("level")); - auto &x_lod = x.lod(); - auto &mask_dim = mask.dims(); - - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(dev_place); - - std::unique_ptr cpu_mask{new phi::DenseTensor()}; - if (platform::is_cpu_place(mask.place())) { - cpu_mask->ShareDataWith(mask); - } else if (platform::is_gpu_place(mask.place())) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - framework::TensorCopy( - mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not support GPU, Please compile WITH_GPU option")); -#endif - } - auto *mask_data = cpu_mask->data(); - - std::vector> copy_ranges(2); - - // set out_true/out_false lod - for (size_t t = 0; t < 2; t++) { - LoD *lod = nullptr; - if (t == 0) { - lod = out_false->mutable_lod(); - } else { - lod = out_true->mutable_lod(); - } - lod->clear(); - for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { - if (static_cast(mask_data[i]) == t) { - size_t start_idx = i; - auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( - x_lod, start_idx, start_idx + 1, level); - - auto &lod_length = lod_and_offset.first; - phi::AppendLoD(lod, lod_length); - - size_t start_offset = lod_and_offset.second.first; - size_t end_offset = lod_and_offset.second.second; - copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset}); - } - } - } - - for (size_t t = 0; t < 2; ++t) { - phi::DenseTensor *out = nullptr; - if (t == 0) { - out = out_false; - } else { - out = out_true; - } - auto &ranges = copy_ranges[t]; - size_t height = std::accumulate( - ranges.begin(), ranges.end(), 0UL, [](size_t a, const CopyRange &b) { - return a + b.end - b.begin; - }); - auto x_dim = x.dims(); - x_dim[0] = static_cast(height); - out->Resize(x_dim); - out->mutable_data(x.place(), x.type()); - size_t offset = 0; - for (auto &each_range : ranges) { - size_t len = each_range.end - each_range.begin; - if (len == 0) { - continue; - } - // out[offset: offset+len] = x[each_range.begin: each_range.end] - auto slice = out->Slice(static_cast(offset), - static_cast(offset + len)); - framework::TensorCopy(x.Slice(static_cast(each_range.begin), - static_cast(each_range.end)), - x.place(), - dev_ctx, - &slice); - offset += len; - } - } - } -}; - -class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input phi::DenseTensor"); - AddInput("Mask", "A bool column vector which mask the input"); - AddOutput("OutTrue", "True branch of input phi::DenseTensor"); - AddOutput("OutFalse", "False branch of input phi::DenseTensor"); - AddAttr("level", "(int) the specific lod level to split.") - .SetDefault(0) - .EqualGreaterThan(0); - AddComment( - R"DOC( - Split a phi::DenseTensor with a Mask at certain level. The input phi::DenseTensor - has 3 sequence at certain lod level. The Mask is a bool column vector, - such as [0, 1, 0] at the same level. The first and third sequence will - be send to False Output phi::DenseTensor; whereas the second sequence will - be send to True Output phi::DenseTensor. Please refer to MergeLoDTensorOp.)DOC"); - } -}; - -class SplitLoDTensorInferShape : public framework::InferShapeBase { - public: - void operator()(framework::InferShapeContext *context) const override { - OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "SplitLoDTensor"); - OP_INOUT_CHECK( - context->HasInput("Mask"), "Input", "Mask", "SplitLoDTensor"); - OP_INOUT_CHECK( - context->HasOutput("OutTrue"), "Output", "OutTrue", "SplitLoDTensor"); - OP_INOUT_CHECK( - context->HasOutput("OutFalse"), "Output", "OutFalse", "SplitLoDTensor"); - - auto mask_dim = context->GetInputDim("Mask"); - PADDLE_ENFORCE_EQ( - mask_dim.size(), - 2, - platform::errors::InvalidArgument( - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor and " - "the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), - "].\n")); - PADDLE_ENFORCE_EQ(mask_dim[1], - 1, - platform::errors::InvalidArgument( - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor " - "and the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), - "].\n")); - - context->SetOutputDim("OutTrue", context->GetInputDim("X")); - context->SetOutputDim("OutFalse", context->GetInputDim("X")); - } -}; - -template -class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr grad_op) const override { - grad_op->SetType("merge_lod_tensor"); - grad_op->SetInput("InTrue", this->OutputGrad("OutTrue")); - grad_op->SetInput("InFalse", this->OutputGrad("OutFalse")); - grad_op->SetInput("Mask", this->Input("Mask")); - grad_op->SetInput("X", this->Input("X")); - grad_op->SetOutput("Out", this->InputGrad("X")); - grad_op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - split_lod_tensor, - ops::SplitLoDTensorOp, - ops::SplitLoDTensorOpProtoMaker, - ops::SplitLoDTensorInferShape, - ops::SplitLoDTensorArrayGradMaker, - ops::SplitLoDTensorArrayGradMaker); diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index ddda1131f5cc7..1a4eace1f3398 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -35,11 +35,11 @@ class SplitOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of SplitOp should not be null.")); PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Outputs(Out) of SplitOp should not be empty.")); int axis = static_cast(ctx->Attrs().Get("axis")); int num = static_cast(ctx->Attrs().Get("num")); @@ -218,7 +218,7 @@ class SplitCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { std::vector out_grad = this->GetMultiOutputGrad("Out"); if (tensor_axis.is_initialized() || tensor_sections.is_initialized()) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support dynamic index or sections from tensor for split " "composite grad for now. ")); } else { diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index aaee366a4636a..e7ba7d0706fd2 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/split_kernel.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_EQ( input_axis_dim % num, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The input's size along the split dimension " "must be evenly divisible by Attr(num_or_sections). " "But received Attr(num_or_sections) " @@ -75,7 +75,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_LE( num_of_unk, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Only one dimension value of Attr(num_or_sections) " "in SplitOp can be -1. " "But received Attr(num_or_sections) = [%s].", @@ -89,7 +89,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_LT( sum_of_section, input_axis_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum of Attr(num_or_sections) other than unknown section " "must be less than the input's " "size " @@ -105,7 +105,7 @@ static inline std::vector UpdateOutsDims( PADDLE_ENFORCE_EQ( sum_of_section, input_axis_dim, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum of Attr(num_or_sections) must be equal to the input's " "size " "along the split dimension. But received Attr(num_or_sections)" diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc deleted file mode 100644 index 98072746e8eee..0000000000000 --- a/paddle/fluid/operators/spp_op.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/spp_op.h" - -#include -#include -namespace paddle { -namespace operators { - -class SppOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "X", - "(Tensor) The input tensor of spp operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of feature."); - AddOutput("Out", - "(Tensor) The output tensor of spp operator." - "N * M." - "M = C * H * W"); - AddAttr("pyramid_height", "(int), multi level pooling"); - AddAttr( - "pooling_type", - "(string), pooling type, can be \"max\" for max-pooling " - "and \"avg\" for average-pooling.") - .InEnum({"max", "avg"}); - AddComment(R"DOC( - "With spatial pyramid pooling, the input image can - be of any sizes. This not only allows arbitrary aspect - ratios, but also allows arbitrary scales. We can resize - the input image to any scale (e.g., min(w, h)=180, 224, - ...) and apply the same deep network. When the - input image is at different scales, the network (with - the same filter sizes) will extract features at different - scales. The scales play important roles in traditional - methods. - Input shape: $(N, C_{in}, H_{in}, W_{in})$ - Output shape: $(H_{out}, W_{out})$ - Where - $$ - H_{out} = N \\ - W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in} - $$ - paper https://arxiv.org/pdf/1406.4729v4.pdf - )DOC"); - } -}; - -class SppOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::InvalidArgument( - "Input(X) of SppOp should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::InvalidArgument( - "Output(Out) of SppOp should not be null.")); - auto in_x_dims = ctx->GetInputDim("X"); - int pyramid_height = ctx->Attrs().Get("pyramid_height"); - PADDLE_ENFORCE_EQ(in_x_dims.size(), - 4, - platform::errors::InvalidArgument( - "Spping intput must be of 4-dimensional.")); - int outlen = - ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1]; // NOLINT - std::vector output_shape({in_x_dims[0], outlen}); - ctx->SetOutputDim("Out", common::make_ddim(output_shape)); - } -}; - -class SppOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::InvalidArgument("Input(X) must not be null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput(framework::GradVarName("X")), - true, - platform::errors::InvalidArgument("Input(X@GRAD) should not be null.")); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - spp, - ops::SppOp, - ops::SppOpMaker, - paddle::framework::DefaultGradOpMaker, - paddle::framework::DefaultGradOpMaker); -REGISTER_OPERATOR(spp_grad, ops::SppOpGrad); - -PD_REGISTER_STRUCT_KERNEL(spp, CPU, ALL_LAYOUT, ops::SppKernel, float, double) { -} -PD_REGISTER_STRUCT_KERNEL( - spp_grad, CPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {} diff --git a/paddle/fluid/operators/spp_op.cu.cc b/paddle/fluid/operators/spp_op.cu.cc deleted file mode 100644 index b41fa8ae5fcf7..0000000000000 --- a/paddle/fluid/operators/spp_op.cu.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/spp_op.h" - -namespace ops = paddle::operators; - -PD_REGISTER_STRUCT_KERNEL(spp, GPU, ALL_LAYOUT, ops::SppKernel, float, double) { -} -PD_REGISTER_STRUCT_KERNEL( - spp_grad, GPU, ALL_LAYOUT, ops::SppGradKernel, float, double) {} diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h deleted file mode 100644 index 5d3f4a78020a0..0000000000000 --- a/paddle/fluid/operators/spp_op.h +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -Indicesou may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/phi_utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/funcs/pooling.h" -#include "paddle/phi/kernels/funcs/strided_memcpy.h" - -namespace paddle { -namespace operators { -template -class SppKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* in_x = context.Input("X"); - auto* out = context.Output("Out"); - int pyramid_height = context.template Attr("pyramid_height"); - std::string pooling_type = - context.template Attr("pooling_type"); - out->mutable_data(context.GetPlace()); - auto out_stride = common::stride(out->dims()); - int input_h = in_x->dims()[2]; - int input_w = in_x->dims()[3]; - size_t output_offset = 0; - for (int p = 0; p < pyramid_height; ++p) { - int bins = std::pow(2, p); - int kernel_size_h = std::ceil(input_h / static_cast(bins)); - int kernel_size_w = std::ceil(input_w / static_cast(bins)); - int padding_h = (kernel_size_h * bins - input_h + 1) / 2; - int padding_w = (kernel_size_w * bins - input_w + 1) / 2; - std::vector kernel_size({kernel_size_h, kernel_size_w}); - std::vector strides({kernel_size_h, kernel_size_w}); - std::vector paddings({padding_h, padding_w}); - // pooling output shape - phi::DenseTensor out_level; - std::vector output_shape_vec( - {in_x->dims()[0], in_x->dims()[1], bins, bins}); - framework::DDim output_shape(common::make_ddim(output_shape_vec)); - out_level.mutable_data(output_shape, context.GetPlace()); - // pooling - if (pooling_type == "max") { - phi::funcs::Pool2dFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::MaxPool, - T> - pool_forward; - phi::funcs::MaxPool max_process; - pool_forward(context.template device_context(), - *in_x, - kernel_size, - strides, - paddings, - true, - false, - &out_level, - max_process); - } else if (pooling_type == "avg") { - phi::funcs::Pool2dFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::AvgPool, - T> - pool_forward; - phi::funcs::AvgPool avg_process; - pool_forward(context.template device_context(), - *in_x, - kernel_size, - strides, - paddings, - true, - false, - &out_level, - avg_process); - } - // flatten pooling output shape - int output_flatten_w = in_x->dims()[1] * bins * bins; - std::vector output_flatten_shape_vec( - {in_x->dims()[0], output_flatten_w}); - framework::DDim output_flatten_shape( - common::make_ddim(output_flatten_shape_vec)); - out_level.Resize(output_flatten_shape); - // concat - auto out_level_stride = common::stride(out_level.dims()); - phi::funcs::StridedMemcpy( - context.template device_context(), - out_level.data(), - out_level_stride, - out_level.dims(), - out_stride, - out->data() + output_offset); - output_offset += out_level.dims()[1] * out_level_stride[1]; - } - } -}; -template -class SppGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* in_x = context.Input("X"); - const phi::DenseTensor* out = context.Input("Out"); - const phi::DenseTensor* out_grad = - context.Input(framework::GradVarName("Out")); - phi::DenseTensor* in_x_grad = - context.Output(framework::GradVarName("X")); - int pyramid_height = context.template Attr("pyramid_height"); - std::string pooling_type = - context.template Attr("pooling_type"); - auto& device_ctx = context.template device_context(); - phi::funcs::SetConstant< - typename framework::ConvertToPhiContext::TYPE, - T> - zero; - in_x_grad->mutable_data(context.GetPlace()); - zero(device_ctx, in_x_grad, static_cast(0)); - auto out_stride = common::stride(out->dims()); - int input_h = in_x->dims()[2]; - int input_w = in_x->dims()[3]; - size_t out_offset = 0; - for (int p = 0; p < pyramid_height; ++p) { - int bins = std::pow(2, p); - int kernel_size_h = std::ceil(input_h / static_cast(bins)); - int kernel_size_w = std::ceil(input_w / static_cast(bins)); - int padding_h = (kernel_size_h * bins - input_h + 1) / 2; - int padding_w = (kernel_size_w * bins - input_w + 1) / 2; - std::vector kernel_size({kernel_size_h, kernel_size_w}); - std::vector strides({kernel_size_h, kernel_size_w}); - std::vector paddings({padding_h, padding_w}); - // split out and outgrad ... to flatten - phi::DenseTensor out_level; - phi::DenseTensor outgrad_level; - int out_flatten_w = in_x->dims()[1] * bins * bins; - std::vector out_flatten_shape_vec( - {in_x->dims()[0], out_flatten_w}); - framework::DDim out_flatten_shape( - common::make_ddim(out_flatten_shape_vec)); - out_level.mutable_data(out_flatten_shape, context.GetPlace()); - outgrad_level.mutable_data(out_flatten_shape, context.GetPlace()); - auto flatten_stride = common::stride(out_level.dims()); - // memcpy - phi::funcs::StridedMemcpy( - context.template device_context(), - out->data() + out_offset, - out_stride, - out_level.dims(), - flatten_stride, - out_level.data()); - - phi::funcs::StridedMemcpy( - context.template device_context(), - out_grad->data() + out_offset, - out_stride, - outgrad_level.dims(), - flatten_stride, - outgrad_level.data()); - out_offset += out_level.dims()[1] * out_stride[1]; - // flatten backward to nchw - - std::vector out_shape_vec({in_x->dims()[0], in_x->dims()[1]}); - out_shape_vec.push_back( - (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); - out_shape_vec.push_back( - (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); - framework::DDim out_shape(common::make_ddim(out_shape_vec)); - out_level.ShareDataWith(out_level); - out_level.Resize(out_shape); - outgrad_level.ShareDataWith(outgrad_level); - outgrad_level.Resize(out_shape); - // pooling backward - if (pooling_type == "max") { - phi::funcs::MaxPool2dGradFunctor< - typename framework::ConvertToPhiContext::TYPE, - T> - pool2d_backward; - pool2d_backward(context.template device_context(), - *in_x, - *&out_level, - *&outgrad_level, - kernel_size, - strides, - paddings, - in_x_grad); - } else if (pooling_type == "avg") { - phi::funcs::Pool2dGradFunctor< - typename framework::ConvertToPhiContext::TYPE, - phi::funcs::AvgPoolGrad, - T> - pool_backward; - phi::funcs::AvgPoolGrad avg_process; - pool_backward(context.template device_context(), - *in_x, - *&out_level, - *&outgrad_level, - kernel_size, - strides, - paddings, - true, - false, - in_x_grad, - avg_process); - } - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/squeeze_op.h b/paddle/fluid/operators/squeeze_op.h index 10ff809d60888..21aed7b00882c 100644 --- a/paddle/fluid/operators/squeeze_op.h +++ b/paddle/fluid/operators/squeeze_op.h @@ -45,7 +45,7 @@ framework::DDim GetOutputShape(const std::vector squeeze_dims, PADDLE_ENFORCE_GE( current, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each axis in Attr(axes) should be in the range of [%d, %d]" "But current axis is:%d, input tensor's shape = [%s].", -in_dims.size(), @@ -55,7 +55,7 @@ framework::DDim GetOutputShape(const std::vector squeeze_dims, PADDLE_ENFORCE_LT( current, in_dims.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Each axis in Attr(axes) should be in the range of [%d, %d]" "But current axis is:%d, input tensor's shape = [%s].", -in_dims.size(), diff --git a/paddle/fluid/operators/stft_op.cc b/paddle/fluid/operators/stft_op.cc index 34f6ee854dd7b..b1165bf2bf295 100644 --- a/paddle/fluid/operators/stft_op.cc +++ b/paddle/fluid/operators/stft_op.cc @@ -36,20 +36,20 @@ class StftOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_rank, 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) of StftOp should be a tensor with shape [N, T], " "but got rank %s.", x_rank)); PADDLE_ENFORCE_GT( hop_length, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute(hop_length) should be greater than 0, but got %s.", hop_length)); PADDLE_ENFORCE_EQ( window_size, n_fft, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Window) of StftOp should be equal with n_fft %s, " "but got %s.", n_fft, @@ -60,7 +60,7 @@ class StftOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE(n_fft, seq_length, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute(frame_length) should be less equal than " "sequence length, but got (%s) > (%s).", n_fft, diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index d8b7e35d6d3a1..6cbb99ff2032f 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -38,20 +38,20 @@ class SumOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GT( x_vars.size(), 0, - platform::errors::InvalidArgument("Input[X] should not be empty")); + phi::errors::InvalidArgument("Input[X] should not be empty")); PADDLE_ENFORCE_NOT_NULL( x_vars[0], - platform::errors::NotFound("Input var[%s] should not be nullptr", - x_vars_name[0])); + phi::errors::NotFound("Input var[%s] should not be nullptr", + x_vars_name[0])); if (x_vars[0]->IsType()) { int dtype = -1; for (size_t idx = 0; idx < x_vars.size(); ++idx) { PADDLE_ENFORCE_NOT_NULL( x_vars[idx], - platform::errors::NotFound("Input var[%s] should not be nullptr", - x_vars_name[idx])); + phi::errors::NotFound("Input var[%s] should not be nullptr", + x_vars_name[idx])); auto tensor = framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_vars[idx]); if (!tensor->IsInitialized()) { @@ -62,13 +62,13 @@ class SumOp : public framework::OperatorWithKernel { } else { PADDLE_ENFORCE_EQ(dtype, framework::TransToProtoVarType(tensor->dtype()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The inputs type of sum op must be same")); } } PADDLE_ENFORCE_NE(dtype, -1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Sum operator should have at least one tensor")); auto data_type = static_cast(dtype); @@ -108,13 +108,13 @@ class SumOp : public framework::OperatorWithKernel { } } } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected each tensor in Input(x) in sum op has be initialized, but " "some tensor in Input(x) is not be initialized, please check your " "code.", framework::ToTypeName(x_vars[0]->Type()))); } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Input(X) must be Tensor, SelectedRows or " "LodTensorArray. But got " "unsupport type: %s.", @@ -164,7 +164,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { << " type is " << ctx->GetInputType("X", static_cast(ind)) << "\n"; } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Not all inputs are tensor array:\n%s", os.str())); } var_type = framework::proto::VarType::LOD_TENSOR_ARRAY; diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 273e2c7b65100..c480bb9bb12e9 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -24,12 +24,12 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/diag_op.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/complex_functors.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -67,9 +67,9 @@ struct RealMulComplexFunctor { PADDLE_ENFORCE_LT( y.imag, 1e-6, - platform::errors::InvalidArgument("The image part of y must to be 0" - "but got [%d]", - y.imag)); + phi::errors::InvalidArgument("The image part of y must to be 0" + "but got [%d]", + y.imag)); return platform::complex>(x.real * y.real, x.imag * y.real); } @@ -79,9 +79,9 @@ static std::vector GetBroadcastShape(InTensors ins) { PADDLE_ENFORCE_EQ( ins.size(), 2, - platform::errors::InvalidArgument("GetBroadcastShape Receive 2 tensors" - "but got [%d]", - ins.size())); + phi::errors::InvalidArgument("GetBroadcastShape Receive 2 tensors" + "but got [%d]", + ins.size())); auto x_dim = ins[0]->dims(); auto y_dim = ins[1]->dims(); std::vector broadcast_shape = @@ -104,7 +104,7 @@ static std::vector GetBroadcastShape(InTensors ins) { broadcast_shape[final_rank - i] = x_dim[rank_x - i]; continue; } - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Wrong Input Shape in broadcast operator: " "Input(X)'s shape must follow the broadcast rule with Input(Y)'s " "shape, but received [%s] (X) vs [%s] (Y).", @@ -125,14 +125,14 @@ static inline framework::DDim ComputeAndCheckShapeForConcatOp( PADDLE_ENFORCE_EQ( inputs_dims[i].size(), out_dims.size(), - platform::errors::InvalidArgument("The shape of input[0] and input[%d] " - "is expected to be equal." - "But received input[0]'s shape = " - "[%s], input[%d]'s shape = [%s].", - i, - inputs_dims[0], - i, - inputs_dims[i])); + phi::errors::InvalidArgument("The shape of input[0] and input[%d] " + "is expected to be equal." + "But received input[0]'s shape = " + "[%s], input[%d]'s shape = [%s].", + i, + inputs_dims[0], + i, + inputs_dims[i])); for (size_t j = 0; j < in_zero_dims_size; j++) { if (j == axis) { if (is_runtime) { @@ -151,7 +151,7 @@ static inline framework::DDim ComputeAndCheckShapeForConcatOp( // check all shape in run time PADDLE_ENFORCE_EQ(inputs_dims[0][j], inputs_dims[i][j], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The %d-th dimension of input[0] and input[%d] " "is expected to be equal." "But received input[0]'s shape = " @@ -175,7 +175,7 @@ static inline int64_t ComputeAxisForConcatOp(int64_t axis, int64_t rank) { PADDLE_ENFORCE_EQ( axis >= -rank && axis < rank, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, @@ -205,7 +205,7 @@ static std::vector get_broadcast_batch_portion( PADDLE_ENFORCE_EQ( (x_size == y_size || x_size == 1 || y_size == 1), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The size of tensor x (%d) must match the size of tensor y " "(%d) at non-singleton dimension %d.", x_size, @@ -337,7 +337,7 @@ struct DeviceIndependenceTensorOperations { DITO_TRANSPOSE_RANK_CASE(5); DITO_TRANSPOSE_RANK_CASE(6); default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Invalid Rank number, " "currently only support rank between 2~6")); } @@ -350,11 +350,11 @@ struct DeviceIndependenceTensorOperations { int padding_value = 0) { PADDLE_ENFORCE_EQ(padding_value, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current diag only support padding_value = 0")); PADDLE_ENFORCE_EQ(offset, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Current diag only support offset = 0," "you can use DiagOp instead(not recommend)")); @@ -362,7 +362,7 @@ struct DeviceIndependenceTensorOperations { int x_rank = x.dims().size(); std::vector out_shape; if (x_rank == 2) { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Current diag only support vector" "-> diagonalized matrix, not support matrix -> vector," " Use DiagOp instead.")); @@ -371,7 +371,7 @@ struct DeviceIndependenceTensorOperations { out_shape.push_back(x.dims()[0]); } else { PADDLE_THROW( - platform::errors::InvalidArgument("Rank must less or equal than 2")); + phi::errors::InvalidArgument("Rank must less or equal than 2")); } ret = Fill({out_shape[0], out_shape[0]}, 0.0); T* output = ret.mutable_data(context.GetPlace()); @@ -540,11 +540,11 @@ struct DeviceIndependenceTensorOperations { PADDLE_ENFORCE_EQ( axes.size(), starts.size(), - platform::errors::InvalidArgument("Slice Operator Argument Invalided")); + phi::errors::InvalidArgument("Slice Operator Argument Invalided")); PADDLE_ENFORCE_EQ( ends.size(), starts.size(), - platform::errors::InvalidArgument("Slice Operator Argument Invalided")); + phi::errors::InvalidArgument("Slice Operator Argument Invalided")); for (unsigned int i = 0; i < axes.size(); ++i) { int axis = axes[i]; if (axis < 0) axis = rank + axis; @@ -553,7 +553,7 @@ struct DeviceIndependenceTensorOperations { int ed = ends[i]; PADDLE_ENFORCE_GT(ed, st, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "C++ Slice Operation Not Support End < Start")); out_shape[axis] = ed - st; } @@ -576,7 +576,7 @@ struct DeviceIndependenceTensorOperations { DITO_SLICE_RANK_CASE(5); DITO_SLICE_RANK_CASE(6); default: { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Invalid Rank number, " "currently only support rank between 2~6")); } @@ -584,22 +584,6 @@ struct DeviceIndependenceTensorOperations { return ret; } - phi::DenseTensor TrilTriu(const phi::DenseTensor& x, - int diagonal, - bool lower) { - framework::AttributeMap attrs; - attrs["diagonal"] = diagonal; - attrs["lower"] = lower; - NameInTensorMap inputs({{"X", {&x}}}); - int x_rank = x.dims().size(); - PADDLE_ENFORCE_GE( - x_rank, - 2, - platform::errors::InvalidArgument("Rank must be at least 2.")); - std::vector out_shape = common::vectorize(x.dims()); - return CreateOpRunAndReturnTensor("tril_triu", inputs, attrs, out_shape); - } - phi::DenseTensor TriangularSolve(const phi::DenseTensor& x, const phi::DenseTensor& y, bool upper, @@ -714,12 +698,12 @@ struct DeviceIndependenceTensorOperations { size_t rank = in->dims().size(); PADDLE_ENFORCE_EQ(start.size(), rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "EigenSliceWrapper function start " "argument must have the same length as input rank.")); PADDLE_ENFORCE_EQ(end.size(), rank, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "EigenSliceWrapper function end " "argument must have the same length as input rank.")); auto eigen_place_ptr = @@ -732,7 +716,7 @@ struct DeviceIndependenceTensorOperations { offsets_32bit[i] = start[i]; extents_32bit[i] = end[i]; } - EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( eigen_place, framework::To32BitIndex(out_t), framework::To32BitIndex(in_t), diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc index 7b9932ffb4a62..e14dc0e316219 100644 --- a/paddle/fluid/operators/tdm_child_op.cc +++ b/paddle/fluid/operators/tdm_child_op.cc @@ -60,18 +60,18 @@ class TDMChildOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(X) of TdmChild should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(TreeInfo) of TdmChild should not be null.")); int child_nums = ctx->Attrs().Get("child_nums"); PADDLE_ENFORCE_GT( child_nums, 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ValueError: The value of the 'child_nums' must greater than 0. " "But received child_nums value = %d, ", child_nums)); @@ -82,7 +82,7 @@ class TDMChildOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( info_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeError: The dimensions of the 'tree info' must be 2. " "But received tree info's dimensions = %d, " "tree info's shape = [%s].", diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index 2d849e1849348..3380062743047 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -56,7 +56,7 @@ void TDMChildInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( input_data[input_ids], node_nums, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input id of OP(paddle.incubate.layers.tdm_child) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -65,7 +65,7 @@ void TDMChildInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( 0, input_data[input_ids], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "input id of OP(paddle.incubate.layers.tdm_child) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -119,7 +119,7 @@ class TDMChildKernel : public framework::OpKernel { input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(input_type), @@ -136,7 +136,7 @@ class TDMChildKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( info_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(TreeInfo) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(info_type), @@ -156,7 +156,7 @@ class TDMChildKernel : public framework::OpKernel { output_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(out_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Child) & Output(LeafMask) holds the wrong " "type, it holds %s, but " "desires to be %s or %s", diff --git a/paddle/fluid/operators/tdm_sampler_op.cc b/paddle/fluid/operators/tdm_sampler_op.cc index d516af7718365..f7877b8268a04 100644 --- a/paddle/fluid/operators/tdm_sampler_op.cc +++ b/paddle/fluid/operators/tdm_sampler_op.cc @@ -81,15 +81,15 @@ class TDMSamplerOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Input) of TdmSampler should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Travel"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Travel) of TdmSampler should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Layer"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs(Layer) of TdmSampler should not be null.")); auto neg_samples_num_vec = ctx->Attrs().Get>("neg_samples_num_list"); diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index 52f86d633307b..7dcc72b66a1a6 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -93,7 +93,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( -1, input_id, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.tdm_sampler) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -102,7 +102,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LT( input_id, travel_dim[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Variable value (input) of OP(fluid.layers.tdm_sampler) " "expected >= 0 and < %ld, but got %ld. Please check input " "value.", @@ -126,7 +126,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( sample_num, node_nums - 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Neg sample nums id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected <= %ld - 1 (positive included), but got %ld. Please " "check neg_samples_num_list.", @@ -163,7 +163,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( positive_node_id, node_id_max, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Positive node id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected >= %ld and <= %ld, but got %ld. Please check input " "value.", @@ -174,7 +174,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( node_id_min, positive_node_id, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Positive node id of OP(fluid.layers.tdm_sampler) at layer %ld " "expected >= %ld and <= %ld, but got %ld. Please check input " "value.", @@ -224,7 +224,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, PADDLE_ENFORCE_LE( layer_data[layer_offset_lod[layer_idx] + sample_res], node_id_max, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Negative node id of OP(fluid.layers.tdm_sampler) at layer %ld" "expected >= %ld and <= %ld, but got %ld. Please check input " "tdm tree structure and tdm travel info.", @@ -270,7 +270,7 @@ class TDMSamplerKernel : public framework::OpKernel { input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(X) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(input_type), @@ -286,7 +286,7 @@ class TDMSamplerKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( travel_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Travel) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(travel_type), @@ -301,7 +301,7 @@ class TDMSamplerKernel : public framework::OpKernel { layer_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(layer_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Layer) holds the wrong type, it holds %s, but " "desires to be %s or %s", paddle::framework::DataTypeToString(layer_type), @@ -312,7 +312,7 @@ class TDMSamplerKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( travel_type, layer_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input(Travel) must holds the same type with " "Input(Layer), but Travel holds %s, and Layer holds %s", paddle::framework::DataTypeToString(travel_type), diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc deleted file mode 100644 index 332008894d5b9..0000000000000 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ /dev/null @@ -1,263 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/teacher_student_sigmoid_loss_op.h" - -#include - -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("X"), "Input", "X", "teacher_student_sigmoid_loss"); - OP_INOUT_CHECK(ctx->HasInput("Label"), - "Input", - "Label", - "teacher_student_sigmoid_loss"); - OP_INOUT_CHECK( - ctx->HasOutput("Y"), "Output", "Y", "teacher_student_sigmoid_loss"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(x_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 2. But received: " - "Input(X)'s rank is [%d]", - x_dims.size())); - PADDLE_ENFORCE_EQ(label_dims.size(), - 2UL, - platform::errors::InvalidArgument( - "Input(Label)'s rank should be 2. But " - "received Input(Label)'s rank is [%d]", - label_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - x_dims[0], - label_dims[0], - platform::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Label) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - label_dims[0])); - PADDLE_ENFORCE_EQ(label_dims[1], - 1UL, - platform::errors::InvalidArgument( - "The 2nd dimension of " - "Input(Label) should be 1. But received " - "Input(Label)'s 2nd dim is [%d]", - label_dims[1])); - } - ctx->SetOutputDim("Y", {x_dims[0], 1}); - ctx->ShareLoD("X", /*->*/ "Y"); - } - - protected: - // Explicitly set that the data type of computation kernel of - // teacher_student_sigmoid_loss - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -template -class TeacherStudentSigmoidLossGradOpMaker - : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("teacher_student_sigmoid_loss_grad"); - - op->SetInput("X", this->Input("X")); - op->SetInput("Label", this->Input("Label")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class TeacherStudentSigmoidLossGradientOp - : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("X"), "Input", "X", "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasInput("Label"), - "Input", - "X", - "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - "Y@Grad", - "teacher_student_sigmoid_loss_grad"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Input", - "X@Grad", - "teacher_student_sigmoid_loss_grad"); - - auto x_dims = ctx->GetInputDim("X"); - auto label_dims = ctx->GetInputDim("Label"); - auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 2. But received Input(X)'s rank is [%d]", - x_dims.size())); - PADDLE_ENFORCE_EQ(dy_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(Y@Grad)'s rank should be 2. But received " - "Input(Y@Grad)'s rank is [%d]", - dy_dims.size())); - PADDLE_ENFORCE_EQ(label_dims.size(), - 2, - platform::errors::InvalidArgument( - "Input(Label)'s rank should be 2. But received " - "Input(Y@Grad)'s rank is [%d]", - label_dims.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - x_dims[0], - label_dims[0], - platform::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Label) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - label_dims[0])); - PADDLE_ENFORCE_EQ( - x_dims[0], - dy_dims[0], - platform::errors::InvalidArgument( - "The 1st dimension of Input(X) and Input(Y@Grad) should " - "be equal. The difference is [%d]: [%d]", - x_dims[0], - dy_dims[0])); - PADDLE_ENFORCE_EQ( - dy_dims[1], - 1, - platform::errors::InvalidArgument( - "The 2nd dimension of Input(Y@Grad) should be 1. " - "But received Input(Y@Grad)'s 2nd dimension is [%d]", - dy_dims[1])); - PADDLE_ENFORCE_EQ( - label_dims[1], - 1, - platform::errors::InvalidArgument( - "When Attr(soft_label) == false, the 2nd dimension of " - "Input(Label) should be 1. But received Input(Label)'s 2nd " - "dimension " - "is [%d]", - label_dims[1])); - } - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of - // teacher_student_sigmoid_loss - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class TeacherStudentSigmoidLossOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(phi::DenseTensor, default phi::DenseTensor), a 2-D " - "tensor with shape [N x 1]," - " where N is the batch size and D is the output. " - "This input is a probability computed by the previous operator, " - "which is almost always the result of a softmax operator."); - AddInput("Label", - "(phi::DenseTensor), the ground truth which is a 2-D tensor. " - "Label is a phi::DenseTensor with shape [N x 1]. "); - AddOutput("Y", - "(phi::DenseTensor, default phi::DenseTensor), a 2-D " - "tensor with shape " - "[N x 1]. The teacher student sigmoid loss."); - AddAttr( - "soft_max_up_bound", - "fp32, if input > soft_max_up_bound, input will be bound, default 15.0") - .SetDefault(15.0); - AddAttr("soft_max_lower_bound", - "fp32, if input < soft_max_lower_bound, input will be " - "bound, default -15.0") - .SetDefault(-15.0); - AddComment(R"DOC( -TeacherStudentSigmoidLoss Operator. - -It's similarity to SigmoidCrossEntropyWithLogits Operator. The difference is that -we add another label(z') to original. - loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + log(1 + exp(-abs(x))) - z is click or not - z' is teacher value - label = {-2, -1, [0, 2]} - when z' is not exist, clk = 0 : label = -2; - when z' is not exist, clk = 1 : label = -1; - when z' is exist , clk = 0 : label = 0 + z'; - when z' is exist , clk = 1 : label = 1 + z'; - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - teacher_student_sigmoid_loss, - ops::TeacherStudentSigmoidLossOp, - ops::TeacherStudentSigmoidLossOpMaker, - ops::TeacherStudentSigmoidLossGradOpMaker, - ops::TeacherStudentSigmoidLossGradOpMaker); - -REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad, - ops::TeacherStudentSigmoidLossGradientOp); - -PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss, - CPU, - ALL_LAYOUT, - ops::TeacherStudentSigmoidLossOpKernel, - float, - double) {} -PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss_grad, - CPU, - ALL_LAYOUT, - ops::TeacherStudentSigmoidLossGradOpKernel, - float, - double) {} diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h deleted file mode 100644 index 7ccb9438d4188..0000000000000 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - phi::DenseTensor* y = context.Output("Y"); - const phi::DenseTensor* x = context.Input("X"); - const phi::DenseTensor* labels = context.Input("Label"); - T* y_data = y->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - const T* label_data = labels->data(); - int64_t batch_size = x->dims()[0]; - // loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + - // log(1 + exp(-abs(x))) - // z is click or not - // z' is value q of feed_fine - // label = {-2, -1, [0, 2]} - // when z' is not exist, clk = 0 : label = -2; - // when z' is not exist, clk = 1 : label = -1; - // when z' is exist , clk = 0 : label = 0 + z'; - // when z' is exist , clk = 1 : label = 1 + z'; - for (int i = 0; i < batch_size; ++i) { - if (label_data[i] < -1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 0.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * label_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * (label_data[i] - 1.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } - } - } -}; - -template -class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const phi::DenseTensor* x = context.Input("X"); - const T* x_data = x->data(); - - phi::DenseTensor* dx = - context.Output(framework::GradVarName("X")); - T* dx_data = dx->mutable_data(context.GetPlace()); - - const phi::DenseTensor* labels = context.Input("Label"); - const T* label_data = labels->data(); - - T soft_max_up_bound = - static_cast(context.Attr("soft_max_up_bound")); - T soft_max_lower_bound = - static_cast(context.Attr("soft_max_lower_bound")); - - int64_t batch_size = x->dims()[0]; - - const phi::DenseTensor* dOut = - context.Input(framework::GradVarName("Y")); - - const T* dout_data = dOut->data(); - - for (int i = 0; i < batch_size; ++i) { - T sum_val = x_data[i]; - if (sum_val > soft_max_up_bound) { - sum_val = soft_max_up_bound; - } else { - if (sum_val < soft_max_lower_bound) { - sum_val = soft_max_lower_bound; - } - } - - T pred = 1.0 / (1.0 + exp(-sum_val)); - if (label_data[i] < -1.0) { - dx_data[i] = 0.0 - pred; - } else if (label_data[i] < 0.0) { - dx_data[i] = 1.0 - pred; - } else { - dx_data[i] = label_data[i] - 2.0 * pred; - } - if (sum_val >= soft_max_up_bound || sum_val <= soft_max_lower_bound) { - dx_data[i] = 0; - } - dx_data[i] *= dout_data[i] * -1; - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index 51b75832d078a..d03f93e0503ae 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -156,10 +156,10 @@ template class TemporalShiftOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument( - "This kernel only runs on GPU device.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("This kernel only runs on GPU device.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); int t = ctx.Attr("seg_num"); @@ -275,11 +275,11 @@ PD_REGISTER_STRUCT_KERNEL(temporal_shift, ops::TemporalShiftOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} PD_REGISTER_STRUCT_KERNEL(temporal_shift_grad, GPU, ALL_LAYOUT, ops::TemporalShiftGradOpCUDAKernel, float, double, - plat::float16) {} + phi::dtype::float16) {} diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 69c7446d85d47..f7b5a9a8833d2 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -93,9 +93,9 @@ class LoDTensorArray2TensorOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( n, 0, - platform::errors::InvalidArgument("Input tensorarray size should > 0," - "but the received is %d", - n)); + phi::errors::InvalidArgument("Input tensorarray size should > 0," + "but the received is %d", + n)); std::string base_name = Inputs("X")[0]; std::vector names; @@ -229,9 +229,9 @@ class LoDTensorArray2TensorGradOp : public framework::OperatorBase { PADDLE_ENFORCE_GT( n, 0, - platform::errors::InvalidArgument("Input tensorarray size should > 0, " - "but the received is: %d. ", - n)); + phi::errors::InvalidArgument("Input tensorarray size should > 0, " + "but the received is: %d. ", + n)); std::string base_name = Inputs("X")[0]; std::vector names; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 28dcaf3d43e31..2709d404320bb 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -75,7 +75,7 @@ static void RuntimeStaticShapeCheck(std::vector runtime_input_shape, PADDLE_ENFORCE_EQ( model_input_shape == runtime_input_shape, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input shapes are inconsistent with the model. Expect [%s] in " "model description, but got [%s] in runtime. TRT 5 " "or lower version " @@ -101,7 +101,7 @@ static phi::DataType TRT2FluidDataType(nvinfer1::DataType type) { return phi::DataType::BOOL; #endif default: - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "unknown fluid datatype in Fluid op converter")); return phi::DataType::FLOAT32; } @@ -114,7 +114,7 @@ static void RuntimeDynamicShapeCheck( const std::vector &max_input_shape) { // PADDLE_ENFORCE_EQ( // runtime_input_shape.size(), min_input_shape.size(), - // platform::errors::InvalidArgument( + // phi::errors::InvalidArgument( // "TRT engine runtime input %s dims size(%d) inconsistent " // "with the dynamic shape size(%d)", // x, runtime_input_shape.size(), min_input_shape.size())); @@ -139,7 +139,7 @@ static void RuntimeDynamicShapeCheck( PADDLE_ENFORCE_EQ(is_input_shape_valid( runtime_input_shape, min_input_shape, max_input_shape), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "TRT runtime input shape of %s is invalid. Expect " "runtime input shape to be within min/max input shape " "configured in SetTRTDynamicShapeInfo()," @@ -362,12 +362,12 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( min_input_shape.count(x), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input %s not found in TRT engine min_input_shape.", x)); PADDLE_ENFORCE_EQ( max_input_shape.count(x), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input %s not found in TRT engine max_input_shape.", x)); RuntimeDynamicShapeCheck(x, runtime_input_shape[x], @@ -560,7 +560,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( t.numel(), 1UL, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "This tensor must have one element, but got %ld.", t.numel())); t_shape.push_back(1); } @@ -571,7 +571,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_LT( bind_index, num_bindings, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Wrong TRT engine input binding index. Expected The " "binding index of TRT engine input to be less than " "the number of inputs and outputs. Received binding " @@ -592,7 +592,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( runtime_batch, t_shape[0], - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Inputs of trt subgraphs has different batchsize. " "It's not allowed in static shape mode. " "Check whether the model you are running has multiple trt " @@ -693,7 +693,7 @@ class TensorRTEngineOp : public framework::OperatorBase { auto intrt_type = engine->engine()->getBindingDataType(intrt_index); PADDLE_ENFORCE_EQ(indata_type, intrt_type, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The TRT Engine OP's input type [%d] should equal " "to the input data type [%d].", static_cast(intrt_type), @@ -733,7 +733,7 @@ class TensorRTEngineOp : public framework::OperatorBase { buffers[bind_index] = static_cast(t.data()); #endif } else { - PADDLE_THROW(platform::errors::Fatal( + PADDLE_THROW(phi::errors::Fatal( "The TRT Engine OP only support " "float/double/int32_t/int64_t/float16/bool input.")); } @@ -787,14 +787,14 @@ class TensorRTEngineOp : public framework::OperatorBase { auto *fluid_v = scope.FindVar(y); PADDLE_ENFORCE_NOT_NULL( fluid_v, - platform::errors::NotFound( + phi::errors::NotFound( "Output variable %s is not found in TensorRT subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); fluid_t->Resize(common::make_ddim(ddim)); PADDLE_ENFORCE_LT(bind_index, num_bindings, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The binding index in TRT engine should be less " "than the number of bindings, but got binding " "index = %d, number of bindings = %d.", @@ -813,7 +813,7 @@ class TensorRTEngineOp : public framework::OperatorBase { PADDLE_ENFORCE_LE( runtime_batch, max_batch_size_, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The runtime batch size (%d) is greater than the max batch " "size(%d).\n" "There are two possible causes for this problem: \n" @@ -983,7 +983,7 @@ class TensorRTEngineOp : public framework::OperatorBase { } PADDLE_ENFORCE_NOT_NULL( trt_engine_, - platform::errors::Fatal( + phi::errors::Fatal( "The pointer to tensorrt engine should not be null.")); return trt_engine_; } diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc index 9d961bbd57122..1819b587c7c1c 100644 --- a/paddle/fluid/operators/tile_op.cc +++ b/paddle/fluid/operators/tile_op.cc @@ -181,7 +181,7 @@ class TileCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { auto repeat_times = this->Attr>("repeat_times"); if (tensor_repeat_times.is_initialized() || tensor_repeat_times_attr.is_initialized()) { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "We don't support RepeatTimes from tensor or repeat_times_tensor for " "tile composite grad for now. ")); } else { diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 32ee384f841d6..99b311960e77b 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -24,17 +24,17 @@ class TopkOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::InvalidArgument( - "Input(X) of TopkOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::InvalidArgument("Input(X) of TopkOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Out) of TopkOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Indices"), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Output(Indices) of TopkOp should not be null.")); auto input_dims = ctx->GetInputDim("X"); @@ -42,18 +42,18 @@ class TopkOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(k, 1, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Attribute k must be >= 1, but got k is %d.", k)); PADDLE_ENFORCE_GE( input_dims.size(), 1, - platform::errors::InvalidArgument("input must have >= 1d shape")); + phi::errors::InvalidArgument("input must have >= 1d shape")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_GE( input_dims[input_dims.size() - 1], k, - platform::errors::InvalidArgument("input must have >= k columns")); + phi::errors::InvalidArgument("input must have >= k columns")); } framework::DDim dims = input_dims; @@ -104,19 +104,19 @@ class TopkOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::InvalidArgument("Input(X) should be not null")); + phi::errors::InvalidArgument("Input(X) should be not null")); PADDLE_ENFORCE_EQ( ctx->HasInput("Indices"), true, - platform::errors::InvalidArgument("Input(Indices) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Grad Input(Out) should be not null")); + phi::errors::InvalidArgument("Input(Indices) should be not null")); + PADDLE_ENFORCE_EQ( + ctx->HasInput(framework::GradVarName("Out")), + true, + phi::errors::InvalidArgument("Grad Input(Out) should be not null")); PADDLE_ENFORCE_EQ( ctx->HasOutput(framework::GradVarName("X")), true, - platform::errors::InvalidArgument("Grad Output(X) should be not null")); + phi::errors::InvalidArgument("Grad Output(X) should be not null")); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 003f670133e45..1bb53891d8151 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ #endif #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/top_k_op.h" -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/top_k_function_cuda.h" // set cub base traits in order to handle float16 @@ -61,10 +61,9 @@ template class TopkOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(ctx.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); @@ -124,12 +123,12 @@ class TopkOpCUDAKernel : public framework::OpKernel { gridx, input_height)); default: - PADDLE_THROW(platform::errors::Fatal( + PADDLE_THROW(phi::errors::Fatal( "the input k has error when use getMaxLength function to get the " "maxLength.")); }); default: - PADDLE_THROW(platform::errors::Unavailable( + PADDLE_THROW(phi::errors::Unavailable( "Calculation error occurred in TopK Operator's CUDA Kernel.")); } } @@ -139,10 +138,9 @@ template class TopkOpGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE_EQ( - platform::is_gpu_place(context.GetPlace()), - true, - platform::errors::InvalidArgument("It must use CUDAPlace.")); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(context.GetPlace()), + true, + phi::errors::InvalidArgument("It must use CUDAPlace.")); auto* x = context.Input("X"); auto* out_grad = context.Input(framework::GradVarName("Out")); @@ -169,7 +167,7 @@ class TopkOpGradCUDAKernel : public framework::OpKernel { x_grad_data, indices_data, out_grad_data, row, col, k)); default: PADDLE_THROW( - platform::errors::Unavailable("Error occurs when Assign Grad.")); + phi::errors::Unavailable("Error occurs when Assign Grad.")); } } }; @@ -184,8 +182,7 @@ REGISTER_OP_CUDA_KERNEL( paddle::operators::TopkOpCUDAKernel, paddle::operators::TopkOpCUDAKernel, paddle::operators::TopkOpCUDAKernel, - paddle::operators::TopkOpCUDAKernel); + paddle::operators::TopkOpCUDAKernel); REGISTER_OP_CUDA_KERNEL( top_k_grad, @@ -194,4 +191,4 @@ REGISTER_OP_CUDA_KERNEL( paddle::operators::TopkOpGradCUDAKernel, paddle::operators::TopkOpGradCUDAKernel, paddle::operators::TopkOpGradCUDAKernel); + phi::dtype::float16>); diff --git a/paddle/fluid/operators/top_k_op_xpu.cc b/paddle/fluid/operators/top_k_op_xpu.cc index fff713236e9a6..48902ed3d8bd5 100644 --- a/paddle/fluid/operators/top_k_op_xpu.cc +++ b/paddle/fluid/operators/top_k_op_xpu.cc @@ -92,5 +92,5 @@ class TopkXPUKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL(top_k, ops::TopkXPUKernel, - ops::TopkXPUKernel); + ops::TopkXPUKernel); #endif diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index 9df0a1f3e36ed..a23461475397b 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -52,7 +52,7 @@ class TransferLayoutOp : public framework::OperatorWithKernel { if (in_tensor->layout() != DataLayout::ONEDNN) { PADDLE_ENFORCE_EQ(in_tensor->IsInitialized(), true, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The tensor of Input(X) is not initialized.")); } auto place = diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 2736171626121..1b4ef2d1b5abb 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -74,7 +74,7 @@ class TransferLayoutFunctor { PADDLE_ENFORCE_NE( in_layout, out_layout, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "No layout transform needed between two oneDNN OPKernels.")); if (in_layout != DataLayout::ONEDNN && out_layout == DataLayout::ONEDNN) { @@ -136,7 +136,7 @@ class TransferLayoutFunctor { PADDLE_ENFORCE_EQ( common::arity(in.dims()), 4, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input dimension arity only can be 4, the input dimension is %s.", in.dims())); diff --git a/paddle/fluid/operators/unbind_op.h b/paddle/fluid/operators/unbind_op.h index 7a5bf4d34c47c..ea2c6d4ee2bb8 100644 --- a/paddle/fluid/operators/unbind_op.h +++ b/paddle/fluid/operators/unbind_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/utils.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" namespace paddle { diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 6b84fd1612e65..bcff52e1af6d7 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -38,7 +38,7 @@ inline void UniformRealDistribution(T *data, } template <> -inline void UniformRealDistribution(paddle::platform::bfloat16 *data, +inline void UniformRealDistribution(phi::dtype::bfloat16 *data, const int64_t &size, const float &min, const float &max, @@ -48,7 +48,7 @@ inline void UniformRealDistribution(paddle::platform::bfloat16 *data, auto engine = phi::GetCPURandomEngine(seed); for (int64_t i = 0; i < size; ++i) { - data[i] = static_cast(dist(*engine)); + data[i] = static_cast(dist(*engine)); } } } // namespace @@ -85,7 +85,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be Tensor, " "SelectedRows. But got " "unsupport type: %s.", @@ -110,7 +110,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( size, (diag_num - 1) * (diag_step + 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeInvalid: the diagonal's elements is equal (num-1) " "* (step-1) with num %d, step %d," "It should be smaller than %d, but received %d", diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu index 0cf50142c04a0..0b81c690d573f 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu @@ -45,7 +45,7 @@ class GPUUniformRandomKernel : public framework::OpKernel { tensor = out_var->GetMutable(); if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be " "phi::DenseTensor, " "SelectedRows. But got " diff --git a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc index f1afd8ef3e213..6a773c60997ea 100644 --- a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc +++ b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc @@ -50,7 +50,7 @@ class XPUUniformRandomInplaceKernel : public framework::OpKernel { PADDLE_ENFORCE_GT( size, (diag_num - 1) * (diag_step + 1), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "ShapeInvalid: the diagonal's elements is equal (num-1) " "* (step-1) with num %d, step %d," "It should be smaller than %d, but received %d", diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 458794223dc74..2dbab83a2f528 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -60,7 +60,7 @@ inline std::vector GetNewDataFromShapeTensor( } return vec_new_data; } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected dtype of ShapeTensor must be int32, int64. But got " "unsupport dtype: %s.", new_data_tensor->dtype())); @@ -76,7 +76,7 @@ inline std::vector GetNewDataFromShapeTensorList( PADDLE_ENFORCE_EQ( tensor->dims(), common::make_ddim({1}), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Shape of dim tensor in uniform_random_op should be [1]" "But received tensor's dim=%s.", tensor->dims())); @@ -100,7 +100,7 @@ inline std::vector GetNewDataFromShapeTensorList( vec_new_shape.push_back(*tensor->data()); } } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "Expected dtype of ShapeTensorList of %d-th must be int32, int64. " "But got " "unsupport dtype: %s.", diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 4d7a9eb5e4937..47bd4674c9a29 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -55,7 +55,7 @@ struct UniqueOpFunctor { PADDLE_ENFORCE_LT( in_->numel(), pow(2, 31), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The num of Input(X) elements should be less then INT_MAX, " "but received num is %d.", in_->numel())); @@ -84,7 +84,7 @@ struct UniqueOpFunctor { index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Index holds the wrong type, it holds %s, " "but desires to be %s or %s", paddle::framework::DataTypeToString(index_type), @@ -406,7 +406,7 @@ class UniqueKernel : public framework::OpKernel { PADDLE_ENFORCE_LE( x->numel(), INT_MAX, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The number of elements in Input(X) should be less than or " "equal to INT_MAX, but received num is %d. Please set `dtype` to " "int64.", diff --git a/paddle/fluid/operators/unique_with_counts_op.cc b/paddle/fluid/operators/unique_with_counts_op.cc deleted file mode 100644 index 5272158805d71..0000000000000 --- a/paddle/fluid/operators/unique_with_counts_op.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unique_with_counts_op.h" - -namespace paddle { -namespace operators { - -class UniqueWithCountsOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Out"), "Output", "Out", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Index"), "Output", "Index", "unique_with_counts"); - OP_INOUT_CHECK( - ctx->HasOutput("Count"), "Output", "Count", "unique_with_counts"); - - auto in_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - in_dims.size(), - 1, - platform::errors::InvalidArgument("The Input(X) should be 1-D Tensor, " - "But now the dims of Input(X) is %d.", - in_dims.size())); - - ctx->SetOutputDim("Out", {-1}); - ctx->SetOutputDim("Index", in_dims); - ctx->SetOutputDim("Count", {-1}); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } -}; - -class UniqueWithCountsOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "Input tensor. It should be a 1-D tensor."); - AddAttr("dtype", "data type for output index"); - AddOutput("Out", "A unique subsequence for input tensor."); - AddOutput("Index", - "An index tensor pointing to unique subsequence, which has " - "identical shape with input tensor and the data type is set by " - "the attr `dtype`"); - AddOutput("Count", "A subsequence for the count of unique index"); - AddComment(R"DOC( - Return a unique subsequence for 1-D input tensor, index tensor pointing to this unique subsequence, - and the subsequence for the count of unique index. -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(unique_with_counts, - ops::UniqueWithCountsOp, - ops::UniqueWithCountsOpMaker); -PD_REGISTER_STRUCT_KERNEL(unique_with_counts, - CPU, - ALL_LAYOUT, - ops::UniqueWithCountsKernel, - float, - double, - int32_t, - int64_t) {} diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h deleted file mode 100644 index 4b1fef5e22447..0000000000000 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/unique_op.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class UniqueWithCountsKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto data_type = static_cast( - context.Attr("dtype")); - auto* x = context.Input("X"); - auto* out = context.Output("Out"); - auto* index = context.Output("Index"); - auto* count = context.Output("Count"); - framework::VisitDataType(data_type, - UniqueOpFunctor(out, index, x, count)); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 07136f7bd4f31..b19533b005a94 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -29,22 +29,22 @@ register_unity_group( bmm_op.cc bpr_loss_op.cc cast_op.cc - mkldnn/cast_mkldnn_op.cc + onednn/cast_onednn_op.cc cholesky_op.cc chunk_eval_op.cc clip_by_norm_op.cc clip_op.cc coalesce_tensor_op.cc - mkldnn/activation_mkldnn_op.cc - mkldnn/interpolate_mkldnn_op.cc - mkldnn/pool_mkldnn_op.cc - mkldnn/softmax_mkldnn_op.cc) + onednn/activation_onednn_op.cc + onednn/interpolate_onednn_op.cc + onednn/pool_onednn_op.cc + onednn/softmax_onednn_op.cc) register_unity_group( cc center_loss_op.cc - mkldnn/concat_mkldnn_op.cc - mkldnn/conv_mkldnn_op.cc - mkldnn/conv_transpose_mkldnn_op.cc + onednn/concat_onednn_op.cc + onednn/conv_onednn_op.cc + onednn/conv_transpose_onednn_op.cc correlation_op.cc cos_sim_op.cc crf_decoding_op.cc @@ -69,7 +69,7 @@ register_unity_group( delete_var_op.cc dequantize_abs_max_op.cc dequantize_op.cc - mkldnn/dequantize_mkldnn_op.cc) + onednn/dequantize_onednn_op.cc) register_unity_group( cc dequeue_op.cc @@ -92,7 +92,7 @@ register_unity_group( expand_v2_op.cc fake_dequantize_op.cc fc_op.cc - mkldnn/fc_mkldnn_op.cc + onednn/fc_onednn_op.cc fill_any_like_op.cc fill_constant_batch_size_like_op.cc fill_constant_op.cc @@ -105,7 +105,7 @@ register_unity_group( gather_nd_op.cc gather_tree_op.cc gaussian_random_batch_size_like_op.cc - mkldnn/gaussian_random_mkldnn_op.cc + onednn/gaussian_random_onednn_op.cc group_norm_op.cc gru_op.cc) register_unity_group( @@ -143,7 +143,7 @@ register_unity_group( log_softmax_op.cc lookup_table_dequant_op.cc lrn_op.cc - mkldnn/lrn_mkldnn_op.cc + onednn/lrn_onednn_op.cc lstm_unit_op.cc) register_unity_group( cc @@ -152,10 +152,9 @@ register_unity_group( masked_select_op.cc match_matrix_tensor_op.cc matmul_op.cc - mkldnn/matmul_mkldnn_op.cc + onednn/matmul_onednn_op.cc max_sequence_len_op.cc maxout_op.cc - merge_lod_tensor_op.cc merge_selected_rows_op.cc meshgrid_op.cc) register_unity_group( @@ -204,7 +203,7 @@ register_unity_group( cc push_dense_op.cc quantize_op.cc - mkldnn/quantize_mkldnn_op.cc + onednn/quantize_onednn_op.cc queue_generator_op.cc range_op.cc rank_attention_op.cc @@ -212,7 +211,7 @@ register_unity_group( recurrent_op.cc reorder_lod_tensor_by_rank_op.cc requantize_op.cc - mkldnn/requantize_mkldnn_op.cc + onednn/requantize_onednn_op.cc reshape_op.cc reverse_op.cc) register_unity_group( @@ -224,7 +223,7 @@ register_unity_group( save_combine_op.cc save_op.cc scale_op.cc - mkldnn/scale_mkldnn_op.cc + onednn/scale_onednn_op.cc scatter_nd_add_op.cc scatter_op.cc seed_op.cc @@ -247,7 +246,6 @@ register_unity_group( register_unity_group( cc spectral_norm_op.cc - split_lod_tensor_op.cc split_op.cc split_selected_rows_op.cc spp_op.cc @@ -256,7 +254,7 @@ register_unity_group( stack_op.cc strided_slice_op.cc sum_op.cc - mkldnn/sum_mkldnn_op.cc + onednn/sum_onednn_op.cc tdm_child_op.cc tdm_sampler_op.cc teacher_student_sigmoid_loss_op.cc @@ -269,7 +267,7 @@ register_unity_group( top_k_v2_op.cc trace_op.cc transpose_op.cc - mkldnn/transpose_mkldnn_op.cc + onednn/transpose_onednn_op.cc unbind_op.cc unfold_op.cc) register_unity_group( diff --git a/paddle/fluid/operators/unzip_op.cc b/paddle/fluid/operators/unzip_op.cc deleted file mode 100644 index b1b3d42282c40..0000000000000 --- a/paddle/fluid/operators/unzip_op.cc +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unzip_op.h" - -#include - -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -class unzipOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "lod"); - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "lod"); - auto lod_dims = ctx->GetInputDim("lod"); - PADDLE_ENFORCE_EQ( - lod_dims.size(), - 1UL, - platform::errors::InvalidArgument( - "Input(X)'s rank should be 1, but got %d", lod_dims.size())); - auto len = static_cast(ctx->Attrs().Get("len")); - ctx->SetOutputDim("Y", {lod_dims[0] - 1, len}); - } - - protected: - // Explicitly set that the data type of computation kernel of - // unzip - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.device_context().GetPlace()); - } -}; - -class unzipGradientOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unzipGradient"); - OP_INOUT_CHECK(ctx->HasInput("lod"), "Input", "unzip", "unzipGradient"); - OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), - "Input", - framework::GradVarName("Y"), - "unzipGradient"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - framework::GradVarName("X"), - "unzipGradient"); - - auto x_dims = ctx->GetInputDim("X"); - auto lod_dims = ctx->GetInputDim("lod"); - PADDLE_ENFORCE_EQ( - x_dims.size(), - 2, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 2, but got %d", x_dims.size())); - PADDLE_ENFORCE_EQ( - lod_dims.size(), - 1, - platform::errors::InvalidArgument( - "Expect Input(X)'s rank == 1, but got %d", lod_dims.size())); - - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); - } - - protected: - // Explicitly set that the data type of computation kernel of - // unzip - // is determined by its input "X". - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Y")), - ctx.device_context().GetPlace()); - } -}; - -class unzipOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(LodTensor, default LodTensor)"); - AddInput("lod", "(Tensor), a 1-D Tensor with shape [K]"); - AddAttr("len", "The len of each original Tensor").SetDefault(1); - AddOutput("Y", - "(LodTensor, default LodTensor), a 2-D tensor with shape " - "[K-1 x len]."); - AddComment(R"DOC( -unzip Operator. -)DOC"); - } -}; - -template -class unzipGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("unzip_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("lod", this->Input("lod")); - op->SetAttr("len", this->GetAttr("len")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(unzip, - ops::unzipOp, - ops::unzipOpMaker, - ops::unzipGradOpMaker, - ops::unzipGradOpMaker); - -REGISTER_OPERATOR(unzip_grad, ops::unzipGradientOp); - -PD_REGISTER_STRUCT_KERNEL(unzip, - CPU, - ALL_LAYOUT, - ops::unzipOpKernel, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} -PD_REGISTER_STRUCT_KERNEL(unzip_grad, - CPU, - ALL_LAYOUT, - ops::unzipGradOpKernel, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} diff --git a/paddle/fluid/operators/unzip_op.cu b/paddle/fluid/operators/unzip_op.cu deleted file mode 100644 index 39d80e8c6ce92..0000000000000 --- a/paddle/fluid/operators/unzip_op.cu +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/operators/unzip_op.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/backends/gpu/gpu_primitives.h" -namespace paddle { -namespace operators { - -using phi::PADDLE_CUDA_NUM_THREADS; - -template -__global__ void unzipKernel( - const T* X, const LodType* lod, T* Y, size_t col_size, size_t n) { - CUDA_KERNEL_LOOP(i, n) { - int lod_idx = i / col_size; - int len = lod[lod_idx + 1] - lod[lod_idx]; - if (i >= lod_idx * col_size + len) { - Y[i] = 0; - } else { - Y[i] = X[lod[lod_idx] + i % col_size]; - } - } -} - -template -class unzipCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const auto* x = context.Input("X"); - const T* x_data = x->data(); - - const auto* lod = context.Input("lod"); - const LodType* lod_data = lod->data(); - - auto col_size = context.Attr("len"); - auto row_size = lod->dims()[0] - 1; - auto y_numel = col_size * row_size; - - auto* y = context.Output("Y"); - T* y_data = y->mutable_data(context.GetPlace()); - - // for Input X do not have lod Information. - auto stream = context.template device_context().stream(); - unzipKernel<<<(y_numel + PADDLE_CUDA_NUM_THREADS - 1) / - PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, - 0, - stream>>>(x_data, lod_data, y_data, col_size, y_numel); - } -}; - -template -class unzipGradCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip_grad is unimplemented")); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -PD_REGISTER_STRUCT_KERNEL(unzip, - GPU, - ALL_LAYOUT, - ops::unzipCUDAKernel, - float, - double, - plat::float16, - bool, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} -PD_REGISTER_STRUCT_KERNEL(unzip_grad, - GPU, - ALL_LAYOUT, - ops::unzipGradCUDAKernel, - float, - double, - plat::float16, - bool, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} diff --git a/paddle/fluid/operators/unzip_op.h b/paddle/fluid/operators/unzip_op.h deleted file mode 100644 index 6829d00dccf56..0000000000000 --- a/paddle/fluid/operators/unzip_op.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class unzipOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip is unimplemented")); - } -}; - -template -class unzipGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_THROW(phi::errors::Unimplemented("unzip_grad is unimplemented")); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index cecd2e2931af6..feca2d9c722ac 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -29,13 +29,13 @@ inline std::vector GetDataFromTensorList( std::vector vec_new_data; for (size_t i = 0; i < list_tensor.size(); ++i) { auto tensor = list_tensor[i]; - PADDLE_ENFORCE_EQ(tensor->dims(), - common::make_ddim({1}), - platform::errors::InvalidArgument( - "The shape of Tensor in list must be [1]. " - "But received its shape " - "is [%s]", - tensor->dims())); + PADDLE_ENFORCE_EQ( + tensor->dims(), + common::make_ddim({1}), + phi::errors::InvalidArgument("The shape of Tensor in list must be [1]. " + "But received its shape " + "is [%s]", + tensor->dims())); if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT32) { @@ -57,7 +57,7 @@ inline std::vector GetDataFromTensorList( vec_new_data.push_back(static_cast(*tensor->data())); } } else { - PADDLE_THROW(platform::errors::InvalidArgument( + PADDLE_THROW(phi::errors::InvalidArgument( "The dtype of Tensor in list must be int32 or int64, but received: " "%s", tensor->dtype())); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index 86e3fc3420ed6..e8d69083e532e 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -66,33 +66,33 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( ctx->HasInput("X"), true, - platform::errors::NotFound("X(Input) of VarConv2dOP is not found.")); + phi::errors::NotFound("X(Input) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("W"), true, - platform::errors::NotFound("W(Input) of VarConv2dOP is not found.")); + phi::errors::NotFound("W(Input) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("ROW"), true, - platform::errors::NotFound("Input(ROW) of VarConv2dOP is not found.")); + phi::errors::NotFound("Input(ROW) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasInput("COLUMN"), true, - platform::errors::NotFound("Input(COLUMN) of VarConv2dOP is not found.")); + phi::errors::NotFound("Input(COLUMN) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Out"), true, - platform::errors::NotFound("Out(Output) of VarConv2dOP is not found.")); + phi::errors::NotFound("Out(Output) of VarConv2dOP is not found.")); PADDLE_ENFORCE_EQ( ctx->HasOutput("Col"), true, - platform::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); + phi::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ( x_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The rank of X(Input) can't be less than 2, but received rank is %u.", x_dims.size())); @@ -101,7 +101,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims.size(), 2, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W should be a 2-D tensor, but its actual dimension is %u.", w_dims.size())); int output_channel = ctx->Attrs().Get("OutputChannel"); @@ -111,7 +111,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[0], output_channel, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W's dimension[0] should be equal to OutputChannel, the " "dimension[0] is %d, OutputChannel is %d.", w_dims[0], @@ -119,7 +119,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( w_dims[1], input_channel * kernel_h * kernel_w, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Input W's dimension[1] should be equal to InputChannel * StrideH * " "StrideW, the dimension[1] is %d, expected value is %d.", w_dims[1], @@ -131,17 +131,17 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { const auto& x_lod = x_var->Get().lod(); PADDLE_ENFORCE_EQ(!x_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X) phi::DenseTensor of VarConv2dOP " "does not contain LoD information.")); - PADDLE_ENFORCE_GE(x_lod.size(), - 1, - platform::errors::InvalidArgument( - "The Input(X)'s lod info is corrupted.")); + PADDLE_ENFORCE_GE( + x_lod.size(), + 1, + phi::errors::InvalidArgument("The Input(X)'s lod info is corrupted.")); PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod[0].back()), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(X)'s lod info mismatches the actual " "tensor shape, input lod is %s, tensor shape is %s.", x_lod, @@ -153,7 +153,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( !row_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(ROW) phi::DenseTensor of VarConv2dOP does not " "contain LoD information.")); @@ -163,7 +163,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( !col_lod.empty(), true, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The Input(COLUMN) phi::DenseTensor of VarConv2dOP does not " "contain LoD information.")); } else { @@ -370,17 +370,17 @@ class VarConv2dGradMaker : public framework::SingleGradOpMaker { }; void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "Input(X) of SequencePadGradOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), - true, - platform::errors::NotFound( - "Input(W) of SequencePadGradOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("X"), + true, + phi::errors::NotFound("Input(X) of SequencePadGradOp is not found.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("W"), + true, + phi::errors::NotFound("Input(W) of SequencePadGradOp is not found.")); PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true, - platform::errors::NotFound( + phi::errors::NotFound( "Input(Out@GRAD) of SequencePadGradOp is not found.")); if (ctx->HasOutput(framework::GradVarName("X"))) { diff --git a/paddle/fluid/operators/xpu_api_wrapper.h b/paddle/fluid/operators/xpu_api_wrapper.h deleted file mode 100644 index c23fb1ae02ab4..0000000000000 --- a/paddle/fluid/operators/xpu_api_wrapper.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#ifdef PADDLE_WITH_XPU -#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h" - -namespace paddle { -namespace operators { - -using float16 = typename XPUTypeTrait::Type; - -} // namespace operators -} // namespace paddle -#endif diff --git a/paddle/fluid/pir/CMakeLists.txt b/paddle/fluid/pir/CMakeLists.txt index 9e883ef21af9a..7647a7efdf660 100644 --- a/paddle/fluid/pir/CMakeLists.txt +++ b/paddle/fluid/pir/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(dialect) add_subdirectory(transforms) add_subdirectory(drr) add_subdirectory(utils) +add_subdirectory(serialize_deserialize) diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt index 59db81550bb8b..0b2fc8c47b75f 100644 --- a/paddle/fluid/pir/dialect/CMakeLists.txt +++ b/paddle/fluid/pir/dialect/CMakeLists.txt @@ -110,7 +110,7 @@ set(generated_files_pd_op "${pir_bwd_op_source_file}" "${pir_update_op_source_file}") -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(pir_op_onednn_yaml ${parsed_op_dir}/onednn.parsed.yaml) set(pd_onednn_op_yaml_file @@ -250,7 +250,7 @@ set(op_dialect_srcs ${api_source_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/transforms/shape_optimization_pass.cc) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(op_dialect_srcs ${op_dialect_srcs} ${onednn_op_source_file} ${op_onednn_info_file} ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/manual_onednn_op.cc) @@ -263,7 +263,14 @@ file(GLOB_RECURSE dist_dialect_srcs # if(WITH_DISTRIBUTE) FIXME in next PR set(op_dialect_srcs ${op_dialect_srcs} ${dist_dialect_srcs}) # endif() -set(op_dialect_deps phi common pir type_info string_helper) +set(op_dialect_deps + phi + common + pir + type_info + string_helper + global_utils + amp) if(WITH_ROCM) set(op_dialect_deps ${op_dialect_deps} global_utils) endif() @@ -283,13 +290,13 @@ set(op_dialect_vjp_srcs ${op_vjp_source_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/primitive/base/decomp_trans.cc) -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(op_dialect_vjp_srcs ${op_dialect_vjp_srcs} ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/op_onednn_dialect.cc) endif() -set(op_dialect_vjp_deps primitive_vjp_experimental op_dialect) +set(op_dialect_vjp_deps primitive_vjp_experimental op_dialect prim_utils) cc_library( op_dialect_vjp diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc index 3382fa18b9090..6ba2b16d00df2 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_api.cc @@ -45,23 +45,20 @@ pir::Value shard_tensor(const pir::Value& x, return shard_tensor_op.out(); } -pir::Value reshard(const pir::Value& x, - const phi::distributed::ProcessMesh& process_mesh, - const std::vector& dims_mapping) { +pir::Value reshard( + const pir::Value& x, + const phi::distributed::ProcessMesh& process_mesh, + const std::vector& dims_mapping, + const flat_hash_map& partial_status) { pir::IrContext* ctx = pir::IrContext::Instance(); - // TODO(ywt01) get partial_status by func parameter - paddle::flat_hash_map partial_status; TensorDistAttribute tensor_dist_attr = TensorDistAttribute::get(ctx, process_mesh, dims_mapping, partial_status); - - auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( - x, tensor_dist_attr); - return reshard_op.result(0); + return reshard(x, tensor_dist_attr); } pir::Value reshard(const pir::Value& x, const TensorDistAttribute& tensor_dist_attr) { - auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( + auto reshard_op = ApiBuilder::Instance().GetBuilder()->Build( x, tensor_dist_attr); return reshard_op.result(0); } diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_api.h b/paddle/fluid/pir/dialect/distributed/ir/dist_api.h index 18aa1bb32ca64..5706afa63c165 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_api.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_api.h @@ -29,9 +29,11 @@ pir::Value shard_tensor(const pir::Value& x, const phi::distributed::ProcessMesh& process_mesh, const std::vector& dims_mapping); -pir::Value reshard(const pir::Value& x, - const phi::distributed::ProcessMesh& process_mesh, - const std::vector& dims_mapping); +pir::Value reshard( + const pir::Value& x, + const phi::distributed::ProcessMesh& process_mesh, + const std::vector& dims_mapping, + const flat_hash_map& partial_status = {}); pir::Value reshard(const pir::Value& x, const TensorDistAttribute& tensor_dist_attr); diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h b/paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h index 2b2be781c9ca8..9725206f5eaf4 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h @@ -75,6 +75,11 @@ class TensorDistAttribute : public pir::AttrBase& partial_status() const; + // construct a new attribute with new mesh attribute. + TensorDistAttribute CopyWithNewMesh(ProcessMeshAttribute mesh) const { + return get(ir_context(), mesh, dims_mapping(), partial_status()); + } + static TensorDistAttribute get( pir::IrContext* ctx, ProcessMeshAttribute mesh, diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc index 0ea42bf6e093d..5834ba6262f3f 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_dialect.cc @@ -35,7 +35,7 @@ void DistDialect::initialize() { TensorDistAttribute, OperationDistAttribute>(); RegisterTypes(); - RegisterOps(); + RegisterOps(); } void DistDialect::PrintType(pir::Type type, std::ostream &os) const { diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_interface.h b/paddle/fluid/pir/dialect/distributed/ir/dist_interface.h index 6fca7d4442b7c..c3fe93521da14 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_interface.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_interface.h @@ -28,13 +28,17 @@ class IR_API DistTypeInterface /// Defined these methods with the interface. explicit Concept(pir::Type (*local_type)(pir::Type), ProcessMeshAttribute (*process_mesh_attr)(pir::Type), - TensorDistAttribute (*tensor_dist_attr)(pir::Type)) + TensorDistAttribute (*tensor_dist_attr)(pir::Type), + pir::Type (*copy_with_new_mesh)(pir::Type, + ProcessMeshAttribute mesh)) : local_type(local_type), process_mesh_attr(process_mesh_attr), - tensor_dist_attr(tensor_dist_attr) {} + tensor_dist_attr(tensor_dist_attr), + copy_with_new_mesh(copy_with_new_mesh) {} pir::Type (*local_type)(pir::Type); ProcessMeshAttribute (*process_mesh_attr)(pir::Type); TensorDistAttribute (*tensor_dist_attr)(pir::Type); + pir::Type (*copy_with_new_mesh)(pir::Type, ProcessMeshAttribute mesh); }; template @@ -50,7 +54,15 @@ class IR_API DistTypeInterface return pir::cast(type).tensor_dist_attr(); } - Model() : Concept(local_type, process_mesh_attr, tensor_dist_attr) {} + static Type CopyWithNewMesh(Type type, ProcessMeshAttribute mesh) { + return pir::cast(type).CopyWithNewMesh(mesh); + } + + Model() + : Concept(local_type, + process_mesh_attr, + tensor_dist_attr, + CopyWithNewMesh) {} }; DistTypeInterface(pir::Type type, Concept *impl) @@ -66,6 +78,10 @@ class IR_API DistTypeInterface return impl_->tensor_dist_attr(*this); } + DistTypeInterface CopyWithNewMesh(ProcessMeshAttribute mesh) { + return DistTypeInterface(impl_->copy_with_new_mesh(*this, mesh), impl_); + } + private: Concept *impl_; }; diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc index cc06461e66d55..d419ea7d4d165 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_op.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/pir/dialect/distributed/ir/dist_op.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_type.h" +#include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" @@ -28,7 +29,7 @@ namespace paddle { namespace dialect { const char* ShardTensorOp::attributes_name[1] = {"op_dist_attr"}; -const char* ReShardOp::attributes_name[1] = {"op_dist_attr"}; +const char* ReshardOp::attributes_name[1] = {"op_dist_attr"}; void ShardTensorOp::VerifySig() { VLOG(4) @@ -159,8 +160,54 @@ void ShardTensorOp::Build(pir::Builder& builder, ::pir::PassStopGradientsDefaultly(argument); } -void ReShardOp::VerifySig() { - VLOG(4) << "Start Verifying inputs, outputs and attributes for: ReShardOp."; +OpInfoTuple ReshardOp::GetOpInfo() { + return OpInfoTuple( + {OpInputInfo()}, {}, {OpOutputInfo()}, OpRunTimeInfo(), "reshard"); +} + +std::vector> ReshardOp::Vjp( + pir::Operation* op, + const std::vector>& inputs_, + const std::vector>& outputs, + const std::vector>& out_grads, + const std::vector>& stop_gradients) { + VLOG(6) << "Start call vjp for reshard op."; + PADDLE_ENFORCE_EQ( + inputs_.size(), + 1, + common::errors::InvalidArgument("reshard op's inputs' size should be 1")); + PADDLE_ENFORCE_EQ(inputs_[0].size(), + 1, + common::errors::InvalidArgument( + "reshard op's inputs[0]'s size should be 1")); + auto dist_type = inputs_[0][0].type().dyn_cast(); + + PADDLE_ENFORCE_NOT_NULL( + dist_type, + common::errors::InvalidArgument( + "Currently, reshard op's inputs type must be dist type.")); + + PADDLE_ENFORCE_EQ(out_grads.size(), + 1, + common::errors::InvalidArgument( + "reshard op's outputs grad size should be 1")); + + PADDLE_ENFORCE_EQ(out_grads[0].size(), + 1, + common::errors::InvalidArgument( + "reshard op's outputs grad[0] size should be 1")); + + auto& builder = *ApiBuilder::Instance().GetBuilder(); + + auto grad_op = + builder.Build(out_grads[0][0], dist_type.tensor_dist_attr()); + + VLOG(6) << "End call vjp for reshard op."; + + return {std::vector{grad_op->result(0)}}; +} +void ReshardOp::VerifySig() { + VLOG(4) << "Start Verifying inputs, outputs and attributes for: ReshardOp."; VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); @@ -224,11 +271,11 @@ void ReShardOp::VerifySig() { VLOG(4) << "End Verifying for: ShardTensorOp."; } -void ReShardOp::Build(pir::Builder& builder, +void ReshardOp::Build(pir::Builder& builder, pir::OperationArgument& argument, pir::Value input, TensorDistAttribute tensor_dist_attr) { - VLOG(4) << "Start build ReShardOp"; + VLOG(4) << "Start build ReshardOp"; paddle::dialect::DistDenseTensorType input_tensor_type; if (input.type().isa()) { @@ -270,10 +317,11 @@ void ReShardOp::Build(pir::Builder& builder, tensor_dist_attr, local_shape); argument.AddOutput(out_dist_tensor_type); + ::pir::PassStopGradientsDefaultly(argument); } } // namespace dialect } // namespace paddle IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ShardTensorOp) -IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ReShardOp) +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::ReshardOp) diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_op.h b/paddle/fluid/pir/dialect/distributed/ir/dist_op.h index 7ae81a0040702..638fb430eaf4e 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_op.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_op.h @@ -15,6 +15,8 @@ #pragma once #include +#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" +#include "paddle/fluid/pir/dialect/operator/interface/vjp.h" #include "paddle/pir/include/core/builder.h" #include "paddle/pir/include/core/builtin_type.h" #include "paddle/pir/include/core/op_base.h" @@ -39,7 +41,7 @@ class ShardTensorOp : public pir::Op { void VerifySig(); }; -class ReShardOp : public pir::Op { +class ReshardOp : public pir::Op { public: using Op::Op; static const char* name() { return "dist_op.reshard"; } @@ -49,10 +51,19 @@ class ReShardOp : public pir::Op { pir::OperationArgument& argument, // NOLINT pir::Value input, TensorDistAttribute tensor_dist_attr); + + static OpInfoTuple GetOpInfo(); + static std::vector> Vjp( + pir::Operation* op, + const std::vector>& inputs_, + const std::vector>& outputs, + const std::vector>& out_grads, + const std::vector>& stop_gradients); + void VerifySig(); }; } // namespace dialect } // namespace paddle IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ShardTensorOp) -IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ReShardOp) +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::ReshardOp) diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc index 9741a76714816..d4e6daf22149b 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.cc @@ -86,5 +86,29 @@ TensorDistAttribute CvtToPirDistAttr( attr.partial_status()); } +void CopyLeafOpToMesh(pir::Value value, ProcessMeshAttribute mesh_attr) { + if (auto dist_type = value.type().dyn_cast()) { + if (dist_type.process_mesh_attr() == mesh_attr) { + return; + } + if (auto op = value.defining_op()) { + if (op->num_operands() != 0u || op->num_results() != 1u) { + return; + } + pir::IrMapping ir_mapping; + auto new_op = op->Clone(ir_mapping); + op->GetParent()->insert(*op, new_op); + value.ReplaceAllUsesWith(new_op->result(0)); + dist_type = dist_type.CopyWithNewMesh(mesh_attr); + value.set_type(dist_type); + op->set_attribute( + kAttrOpDistAttr, + OperationDistAttribute::get(dist_type.ir_context(), + mesh_attr, + {}, + {dist_type.tensor_dist_attr()})); + } + } +} } // namespace dialect } // namespace paddle diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h index 24d8d2d2143b0..bbd62617c8cf0 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_tools.h @@ -31,5 +31,17 @@ phi::distributed::DistMetaTensor CvtToDistMetaTensor(DistDenseTensorType type); TensorDistAttribute CvtToPirDistAttr( const phi::distributed::ArgDistAttr& dist_attr); +/// +/// When the following conditions are met: +/// 1. The value's type is dist type. +/// 2. The value type's mesh is not equal to mesh_attr argument. +/// 3. The operation that defines the value contains no inputs and 1 output. +/// The function first clones the definition operation and replaces the use of +/// the original value with the cloned ouput, Secondly, the mesh of the +/// original operation and value is updated with the 'mesh_attr' argument. +/// Otherwise, the function does nothing. +/// +void CopyLeafOpToMesh(pir::Value value, ProcessMeshAttribute mesh_attr); + } // namespace dialect } // namespace paddle diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc b/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc index 5753608c85256..d1b70c24a1c56 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_type.cc @@ -60,7 +60,7 @@ common::DDim InferLocalDDim(const common::DDim& global_ddim, return local_ddim; } -auto DistDenseTensorType::local_type() const -> Type { +pir::DenseTensorType DistDenseTensorType::local_type() const { return pir::DenseTensorType::get(pir::IrContext::Instance(), dtype(), local_ddim(), diff --git a/paddle/fluid/pir/dialect/distributed/ir/dist_type.h b/paddle/fluid/pir/dialect/distributed/ir/dist_type.h index 2344a97399e34..c83904a02aef9 100644 --- a/paddle/fluid/pir/dialect/distributed/ir/dist_type.h +++ b/paddle/fluid/pir/dialect/distributed/ir/dist_type.h @@ -45,8 +45,8 @@ class DistDenseTensorType const LoD& lod() const { return dense_tensor_type().lod(); } size_t offset() const { return dense_tensor_type().offset(); } - Type prim_type() { return dense_tensor_type(); } - Type local_type() const; + pir::DenseTensorType prim_type() { return dense_tensor_type(); } + pir::DenseTensorType local_type() const; ProcessMeshAttribute process_mesh_attr() const { return tensor_dist_attr().process_mesh_attr(); @@ -61,6 +61,13 @@ class DistDenseTensorType return tensor_dist_attr().partial_status(); } + DistDenseTensorType CopyWithNewMesh(ProcessMeshAttribute mesh) { + return get(ir_context(), + dense_tensor_type(), + tensor_dist_attr().CopyWithNewMesh(mesh), + local_ddim()); + } + static DistDenseTensorType get(pir::IrContext* ctx, pir::DenseTensorType dense_tensor_type, TensorDistAttribute tensor_dist_attr, diff --git a/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc new file mode 100644 index 0000000000000..086feecffa396 --- /dev/null +++ b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.cc @@ -0,0 +1,136 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.h" + +#include +#include +#include + +#include "paddle/common/flags.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h" +#include "paddle/fluid/pir/dialect/distributed/ir/dist_dialect.h" +#include "paddle/fluid/pir/dialect/distributed/ir/dist_interface.h" +#include "paddle/fluid/pir/dialect/distributed/ir/dist_type.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_type.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/dialect/operator/utils/utils.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/pir/include/core/attribute.h" + +using paddle::dialect::DistDenseTensorType; + +COMMON_DECLARE_bool(print_ir); + +namespace paddle { +namespace dialect { + +inline pir::Type CastToLocalType(pir::Type dist_type) { + return dist_type.dyn_cast().local_type(); +} + +inline bool IsDistType(pir::Type type) { return type.isa(); } + +void ProcessDistBlock(pir::Block* block) { + for (auto iter = block->begin(); iter != block->end(); ++iter) { + pir::Operation* op_item = &(*iter); + VLOG(6) << "dist_to_dense main loop over op [" << op_item->name() << "]."; + + for (size_t i = 0; i < op_item->num_results(); ++i) { + auto result = op_item->result(i); + auto origin_type = result.type(); + if (IsDistType(origin_type)) { + auto local_type = CastToLocalType(origin_type); + result.set_type(local_type); + } else if (origin_type) { // skip if <> + // TODO(2024-Q2) not all value are dist type + PADDLE_THROW(platform::errors::PreconditionNotMet( + "The op [%s]'s [%d]th result is not Dist type.", + op_item->name(), + i)); + } + } + // TODO(2024-Q2) not all op are dist type + // PADDLE_ENFORCE_EQ( + // (op_item->HasAttribute(kAttrOpDistAttr) && + // op_item->attribute(kAttrOpDistAttr) + // .isa()), + // true, + // common::errors::PreconditionNotMet("The op [%s] has not + // op_dist_attr.", + // op_item->name())); + if (op_item->HasAttribute(kAttrOpDistAttr)) { + op_item->erase_attribute(kAttrOpDistAttr); + } + + // TODO(2024-Q2) Handle other special dist op in future. + } +} + +/* Verification: + 1. no operator has not OperatorDistAttr. + 2. all Values (Results) are DenseTensorType. + 3. no shard_tensor / reshard in block. +*/ +void VerifyDenseBlock(pir::Block* block) { + for (auto iter = block->begin(); iter != block->end(); ++iter) { + pir::Operation* op_item = &(*iter); + + for (size_t i = 0; i < op_item->num_results(); ++i) { + auto result = op_item->result(i); + + PADDLE_ENFORCE_EQ( + IsDistType(result.type()), + false, + phi::errors::PreconditionNotMet( + "Block op [%s] still contain dist type.", op_item->name())); + } + + PADDLE_ENFORCE_EQ( + op_item->HasAttribute(kAttrOpDistAttr), + false, + common::errors::PreconditionNotMet( + "The op [%s] still has op_dist_attr.", op_item->name())); + } +} + +std::shared_ptr DistToDensePass(pir::Program* prog) { + if (FLAGS_print_ir) { + VLOG(0) << "IR before DistToDense Pass = " << *prog; + } + + pir::IrMapping mapper; + auto new_prog = prog->Clone(mapper); + + pir::IrContext* ctx = pir::IrContext::Instance(); + ctx->GetOrRegisterDialect(); + ctx->GetOrRegisterDialect(); + + ProcessDistBlock(new_prog->block()); + VLOG(6) << "IR before VerifyDenseBlock Pass = " << *new_prog; + VerifyDenseBlock(new_prog->block()); + + if (FLAGS_print_ir) { + VLOG(0) << "IR after DistToDense Pass = " << *new_prog; + } + + return new_prog; +} + +} // namespace dialect +} // namespace paddle diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.h similarity index 65% rename from paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h rename to paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.h index c7cfc23feb89e..970aaaa564271 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/general_topo_policy.h +++ b/paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.h @@ -11,15 +11,18 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - #pragma once -#include "paddle/cinn/frontend/group_cluster/cluster_policy/policy_manager.h" -namespace cinn::frontend::group_cluster::policy { +#include "paddle/pir/include/core/program.h" + +namespace paddle { +namespace dialect { + +TEST_API std::shared_ptr DistToDensePass(pir::Program* prog); + +void ProcessDistBlock(pir::Block* block); -class GeneralTopoPolicy final : virtual public Policy { - public: - bool CanFuse(const PatternNodePtr upstream, const PatternNodePtr downstream); -}; +void VerifyDenseBlock(pir::Block* block); -} // namespace cinn::frontend::group_cluster::policy +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.cc b/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.cc index 60d42984c57b6..fe0f5254d2b5f 100644 --- a/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.cc +++ b/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.cc @@ -44,7 +44,7 @@ inline bool IsShardTensorOp(pir::Operation* op) { return op_name.find("shard_tensor") != op_name.npos; } -void ProcessBlock(pir::Block* block) { +void ProcessMixBlock(pir::Block* block) { std::vector deleted_ops; for (auto iter = block->begin(); iter != block->end(); ++iter) { @@ -80,6 +80,15 @@ void ProcessBlock(pir::Block* block) { shard_operand_define_op->set_attribute( kAttrOpDistAttr, op_item->attribute(kAttrOpDistAttr)); + // set stop gradient and persistable + if (op_item->HasAttribute(kAttrStopGradients)) { + shard_operand_define_op->set_attribute( + kAttrStopGradients, op_item->attribute(kAttrStopGradients)); + } + if (op_item->HasAttribute(kAttrIsPersistable)) { + shard_operand_define_op->set_attribute( + kAttrIsPersistable, op_item->attribute(kAttrIsPersistable)); + } deleted_ops.push_back(op_item); } @@ -98,7 +107,7 @@ void ProcessBlock(pir::Block* block) { 2. all Values (Results) are DistDenseTensorType. 3. no shard_tensor in block. */ -void VerifyBlock(pir::Block* block) { +void VerifyDistBlock(pir::Block* block) { for (auto iter = block->begin(); iter != block->end(); ++iter) { pir::Operation* op_item = &(*iter); PADDLE_ENFORCE_EQ(paddle::dialect::IsShardTensorOp(op_item), @@ -135,8 +144,8 @@ std::shared_ptr MixToDistPass(pir::Program* prog) { ctx->GetOrRegisterDialect(); ctx->GetOrRegisterDialect(); - ProcessBlock(new_prog->block()); - VerifyBlock(new_prog->block()); + ProcessMixBlock(new_prog->block()); + VerifyDistBlock(new_prog->block()); if (FLAGS_print_ir) { std::cout << "IR after MixToDist Pass = " << *new_prog << std::endl; diff --git a/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h b/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h index 978f64f12d2b1..7212fd79ab9fe 100644 --- a/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h +++ b/paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h @@ -22,9 +22,9 @@ namespace dialect { TEST_API std::shared_ptr MixToDistPass(pir::Program* prog); -void ProcessBlock(pir::Block* block); +void ProcessMixBlock(pir::Block* block); -void VerifyBlock(pir::Block* block); +void VerifyDistBlock(pir::Block* block); } // namespace dialect } // namespace paddle diff --git a/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py b/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py index 4d37aaf829861..6ca4b6d18680b 100644 --- a/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py +++ b/paddle/fluid/pir/dialect/op_generator/decomp_interface_gen_op_list.py @@ -38,19 +38,19 @@ "leaky_relu", "log_softmax", "mean", + "p_norm", "pow", + "reciprocal", "relu", "relu6", "sigmoid", "silu", "swiglu", "softmax", - "sqrt", "square", "squeeze", "stack", "unsqueeze", - "tile", ] # come into effect in generated file op_decomp.cc @@ -72,19 +72,19 @@ "leaky_relu", "log_softmax", "mean", + "p_norm", "pow", + "reciprocal", "relu", "relu6", "sigmoid", "silu", "swiglu", "softmax", - "sqrt", "square", "squeeze", "stack", "unsqueeze", - "tile", ] diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py index 99daa1a8c1585..ee45bdf338270 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py @@ -387,9 +387,7 @@ def GenBuildAttributes( op_attribute_type=op_non_mutable_attribute_type_list[idx], attr=op_non_mutable_attribute_name_list[idx], ) - attr_str += """ argument_attributes.insert({{"{attr_name}", attr_{attr_name}}});\n""".format( - attr_name=op_non_mutable_attribute_name_list[idx] - ) + attr_str += f""" argument_attributes.insert({{"{op_non_mutable_attribute_name_list[idx]}", attr_{op_non_mutable_attribute_name_list[idx]}}});\n""" return attr_str @@ -558,15 +556,11 @@ def GenBuildOutputs( # is a vector if 'pir::VectorType' in op_input_type_list[idx]: if op_input_optional_list[idx] == 'false': - build_output_str += " pir::VectorType {name} = {name}_.type().dyn_cast(); (void){name};\n".format( - name=op_input_name_list[idx] - ) + build_output_str += f" pir::VectorType {op_input_name_list[idx]} = {op_input_name_list[idx]}_.type().dyn_cast(); (void){op_input_name_list[idx]};\n" # is a Tensor else: if op_input_optional_list[idx] == 'false': - build_output_str += " {type} {name} = {name}_.type().dyn_cast<{type}>(); (void){name};\n".format( - type=op_input_type_list[idx], name=op_input_name_list[idx] - ) + build_output_str += f" {op_input_type_list[idx]} {op_input_name_list[idx]} = {op_input_name_list[idx]}_.type().dyn_cast<{op_input_type_list[idx]}>(); (void){op_input_name_list[idx]};\n" # Prepare mutable attributes if mutable_attr_is_input: @@ -826,13 +820,11 @@ def gen_build_func_str( op_non_mutable_attribute_type_list, ) - build_outputs_str = """ - std::vector argument_outputs = {op_name}::InferMeta(argument_inputs, &argument_attributes); + build_outputs_str = f""" + std::vector argument_outputs = {op_info.class_name}::InferMeta(argument_inputs, &argument_attributes); argument.AddAttributes(argument_attributes); argument.AddOutputs(argument_outputs.begin(), argument_outputs.end()); - ::pir::PassStopGradientsDefaultly(argument);""".format( - op_name=op_info.class_name - ) + ::pir::PassStopGradientsDefaultly(argument);""" GET_ATTRIBUTES_FROM_MAP_TEMPLATE = """ PADDLE_ENFORCE_NE( diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py index c264bd246ce60..ebe06caab438a 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py @@ -54,6 +54,7 @@ 'BatchNormOp', 'FetchOp', 'FullIntArrayOp', + 'FusedConv2dAddActOp', 'MatmulOp', 'SoftmaxOp', 'ReshapeOp', @@ -86,6 +87,7 @@ #include "paddle/fluid/pir/dialect/operator/interface/decomp.h" #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_symbolic_shape.h" #include "paddle/fluid/pir/dialect/operator/interface/infermeta.h" +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" #include "paddle/fluid/pir/dialect/operator/interface/parse_kernel_key.h" #include "paddle/fluid/pir/dialect/operator/interface/vjp.h" @@ -180,6 +182,7 @@ class {TEST_API} {op_name} : public pir::Op<{op_name}{interfaces}{traits}> {{ CC_FILE_TEMPLATE = """// This file is generated by "paddle/fluid/pir/dialect/op_generator/op_gen.py" #include "{h_file}" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_type.h" +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" @@ -1507,9 +1510,7 @@ def AutoCodeGen( muta_attr_is_input=True, ) - build_mutable_attr_is_input = "static void Build({build_args});".format( - build_args=build_args_with_muta_attr_is_input_for_declare - ) + build_mutable_attr_is_input = f"static void Build({build_args_with_muta_attr_is_input_for_declare});" if (op_invoke_map is not None) and ( op_invoke_map['func'] in op_info_items ): diff --git a/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py b/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py index 5e0b696507fa5..61e43c53e6d1b 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_infermeta_func_gen.py @@ -111,9 +111,7 @@ def get_infermeta_inputs_str( # is a vector if 'pir::VectorType' in op_input_type_list[idx]: if op_input_optional_list[idx] == 'false': - infermeta_inputs_str += " pir::VectorType {name} = {name}_.type().dyn_cast(); (void){name};\n".format( - name=op_input_name_list[idx] - ) + infermeta_inputs_str += f" pir::VectorType {op_input_name_list[idx]} = {op_input_name_list[idx]}_.type().dyn_cast(); (void){op_input_name_list[idx]};\n" # is a Tensor else: if op_input_optional_list[idx] == 'false': @@ -611,10 +609,17 @@ def GenDistBranch(args, op_info): // Auto Parallel condition ProcessMeshAttribute op_mesh; if(HasDistInput(input_values, &op_mesh)) {{ + {} CvtAllInputsToDist(input_values, op_mesh); auto ctx = pir::IrContext::Instance(); std::vector operand_dist_attrs, result_dist_attrs;""" - dist_branch_str = TEMPLATE.format() + + extra_call = "" + for name in op_info.spmd_params: + if name == "learning_rate": + extra_call = "CopyLeafOpToMesh(learning_rate_, op_mesh);" + break + dist_branch_str = TEMPLATE.format(extra_call) infer_spmd_args_list = [] # Prepare inputs_meta_tensor & attributes for infer spmd for name in op_info.spmd_params: diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py index 5ad1c5b562740..8ba3d64ad39a3 100644 --- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py @@ -125,6 +125,7 @@ 'add_n_', 'all_reduce', 'all_reduce_', + 'assign_pos', 'batch_fc', 'barrier', 'c_allgather', @@ -140,8 +141,12 @@ 'c_softmax_with_cross_entropy', 'c_split', 'decayed_adagrad', + 'distributed_fused_lamb', + 'distributed_fused_lamb_', 'distributed_push_sparse', 'distributed_lookup_table', + 'dgc_momentum', + 'dgc', 'dpsgd', 'embedding_grad_sparse', 'ftrl', diff --git a/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py b/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py index 4b2bbc3c54999..c0620d4dbdc43 100644 --- a/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py +++ b/paddle/fluid/pir/dialect/op_generator/vjp_interface_black_list.py @@ -22,7 +22,13 @@ # remove this file and support Vjp methods # code gen. +# Operators which only has composite implementation should be added below. +# For example +# * `silu_double_grad` only has composite implementation, so `silu_grad` was added below. +# * `log_double_grad` has both composite and kernel implementation, so `log_grad` should not be added below. vjp_interface_black_list = [ 'silu_grad', + 'exp_grad', + 'abs_double_grad', ] diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc index 42b3567290cda..49e62dbf59503 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/binary_infer_sym.cc @@ -38,7 +38,7 @@ inline void UpdatePaddingAndDilation( symbol::DimExpr one{1}; symbol::DimExpr two{2}; if (padding_algorithm == "SAME") { - symbol::DimExprBuilder builder{nullptr}; + symbol::DimExprBuilder builder; for (size_t i = 0; i < data_dims.size(); ++i) { symbol::DimExpr out_size = (data_dims[i] + strides[i] - 1) / strides[i]; symbol::DimExpr pad_sum = builder.Max( @@ -205,7 +205,9 @@ bool SparseWeightEmbeddingOpInferSymbolicShape( bool ExpandAsOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + op->name() + + " 's InferSymbolicShape interface is NOT implemented " + "now because of the lack of necessary information.")); return true; } @@ -354,8 +356,16 @@ bool KronOpInferSymbolicShape(pir::Operation *op, bool MaskedSelectOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const std::vector &out_dims = [&] { + std::vector out_dims; + symbol::DimExpr out_shape = + shape_analysis->GetNextSymName(); // unknown until runtime + out_dims.push_back(out_shape); + return out_dims; + }(); + // TODO(fty1777): Add constrains between the shapes of x and mask + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); return true; } @@ -416,9 +426,10 @@ bool MatmulOpInferSymbolicShape( } else if (ndims_x < ndims_y) { out_dims.assign(y_dims.begin(), y_dims.end() - 2); } else { - symbol::DimExprBuilder builder{nullptr}; + symbol::DimExprBuilder builder; for (size_t i = 0; i < ndims_x - 2; ++i) { out_dims.emplace_back(builder.Broadcast(x_dims[i], y_dims[i])); + shape_analysis->AddBroadcastableCstr(x_dims[i], y_dims[i]); } } @@ -440,21 +451,17 @@ bool MatmulOpInferSymbolicShape( if ((ndims_x == ndims_y) && ndims_x >= 2) { if (transpose_x_attr == false && transpose_y_attr == false) { - shape_analysis->DimExprBuilder().CstrEq(x_dims[ndims_x - 1], - y_dims[ndims_x - 2]); + shape_analysis->AddEqualCstr(x_dims[ndims_x - 1], y_dims[ndims_x - 2]); } else if (transpose_x_attr == false && transpose_y_attr == true) { - shape_analysis->DimExprBuilder().CstrEq(x_dims[ndims_x - 1], - y_dims[ndims_x - 1]); + shape_analysis->AddEqualCstr(x_dims[ndims_x - 1], y_dims[ndims_x - 1]); } else if (transpose_x_attr == true && transpose_y_attr == false) { - shape_analysis->DimExprBuilder().CstrEq(x_dims[ndims_x - 2], - y_dims[ndims_x - 2]); + shape_analysis->AddEqualCstr(x_dims[ndims_x - 2], y_dims[ndims_x - 2]); } else { - shape_analysis->DimExprBuilder().CstrEq(x_dims[ndims_x - 2], - y_dims[ndims_x - 1]); + shape_analysis->AddEqualCstr(x_dims[ndims_x - 2], y_dims[ndims_x - 1]); } for (size_t i = 0; i < ndims_x - 2; ++i) { - shape_analysis->DimExprBuilder().CstrEq(x_dims[i], y_dims[i]); + shape_analysis->AddEqualCstr(x_dims[i], y_dims[i]); } } return true; @@ -462,8 +469,12 @@ bool MatmulOpInferSymbolicShape( bool SearchsortedOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // The shape of output is the same as input `values` (op->operand_source(1)) + const symbol::ShapeOrDataDimExprs &operand_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(1)); + // TODO(fty1777): Add constrains between the shapes of `sorted_sequence` and + // `values` + shape_analysis->SetShapeOrDataForValue(op->result(0), operand_shape_or_data); return true; } diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc index 35d4992539111..5302e1f76cdc2 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/cinn_op_infer_sym.cc @@ -85,9 +85,6 @@ bool ConcatOpInferSymbolicShape( return out_dims; }; - VLOG(3) << "constraints size:" - << shape_analysis->DimExprBuilder().constraints().size(); - symbol::ShapeOrDataDimExprs shape_data{ symbol::TensorShapeOrDataDimExprs(GetOutDimExprs())}; @@ -129,6 +126,18 @@ bool ReshapeOpInferSymbolicShape( std::vector shape = paddle::dialect::details::GetVectorAttr(op, "shape"); + const symbol::ShapeOrDataDimExprs &x_dim_expr = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + if (x_dim_expr.data().has_value()) { + if (shape.size() == 1 && shape.front() == 1) { + shape_analysis->SetShapeOrDataForValue( + op->result(0), + symbol::TensorShapeOrDataDimExprs(std::vector{1}, + x_dim_expr.data().value())); + return true; + } + } + const auto &GetProduct = [&](const auto &dim_exprs, const auto &Filter) { symbol::DimExpr product{1}; for (const auto &dim_expr : dim_exprs) { diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/element_wise_binary.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/element_wise_binary.cc index 170143307dc06..e220d06f99020 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/element_wise_binary.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/element_wise_binary.cc @@ -15,37 +15,19 @@ #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/element_wise_binary.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" -bool ShouldUseData(pir::Value val) { - if (!val.defining_op()) return false; - if (val.defining_op()->isa()) { - return true; - } - return false; -} - bool InferSymbolicShapeElementWiseBinary( - pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - const auto &x_shapeordata = + pir::Operation *op, + pir::ShapeConstraintIRAnalysis *shape_analysis, + const std::function + &DataComputeFunc = nullptr) { + const auto &x_shape = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); - std::vector shape_0; - // For ElementWiseBinary ops, if the input tensor is from full op, the value - // of fullop is useless, only the shape need doing broadcast - if (ShouldUseData(op->operand_source(0)) && - x_shapeordata.data().has_value()) { - shape_0 = x_shapeordata.data().value(); - } else { - shape_0 = x_shapeordata.shape(); - } + std::vector shape_0 = x_shape.shape(); - const auto &y_shapeordata = + const auto &y_shape = shape_analysis->GetShapeOrDataForValue(op->operand_source(1)); - std::vector shape_1; - if (ShouldUseData(op->operand_source(1)) && - y_shapeordata.data().has_value()) { - shape_1 = y_shapeordata.data().value(); - } else { - shape_1 = y_shapeordata.shape(); - } + std::vector shape_1 = y_shape.shape(); int diff = shape_0.size() - shape_1.size(); if (diff > 0) { @@ -60,7 +42,7 @@ bool InferSymbolicShapeElementWiseBinary( const std::vector shapes = [&] { std::vector shapes; - symbol::DimExprBuilder builder{nullptr}; + symbol::DimExprBuilder builder; for (size_t i = 0; i < shape_0.size(); i++) { if (shape_0[i] == shape_1[i]) { shapes.emplace_back(shape_0[i]); @@ -70,17 +52,45 @@ bool InferSymbolicShapeElementWiseBinary( shapes.emplace_back(shape_0[i]); } else { shapes.emplace_back(builder.Broadcast(shape_0[i], shape_1[i])); + shape_analysis->AddBroadcastableCstr(shape_0[i], shape_1[i]); } } return shapes; }(); - // TODO(lanxianghit): fill data when the operation is on shape computation - // std::vector data; - symbol::ShapeOrDataDimExprs shape_data{ - symbol::TensorShapeOrDataDimExprs(shapes)}; - shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); - + if (x_shape.data() && y_shape.data() && DataComputeFunc) { + PADDLE_ENFORCE_LE( + x_shape.shape().size(), + 1, + common::errors::InvalidArgument("When compute data, the rank of x " + "should be 0 or 1, but now recevied %d", + x_shape.shape().size())); + PADDLE_ENFORCE_LE( + y_shape.shape().size(), + 1, + common::errors::InvalidArgument("When compute data, the rank of y " + "should be 0 or 1, but now recevied %d", + y_shape.shape().size())); + PADDLE_ENFORCE_EQ(x_shape.data()->size(), + y_shape.data()->size(), + common::errors::InvalidArgument( + "When compute data, the size of x and y should be " + "equal, but now recevied %d and %d", + x_shape.data()->size(), + y_shape.data()->size())); + std::vector out_data; + for (size_t i = 0; i < x_shape.data()->size(); ++i) { + out_data.emplace_back( + DataComputeFunc(x_shape.data()->at(i), y_shape.data()->at(i))); + } + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(shapes, out_data)}; + shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); + } else { + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(shapes)}; + shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); + } return true; } @@ -91,14 +101,45 @@ bool InferSymbolicShapeElementWiseBinary( } namespace paddle::dialect { -OP_ELEMENT_WISE_BINARY(Add) + +bool AddOpInferSymbolicShape(pir::Operation *op, + pir::ShapeConstraintIRAnalysis *shape_analysis) { + return InferSymbolicShapeElementWiseBinary( + op, + shape_analysis, + [](const symbol::DimExpr &x, const symbol::DimExpr &y) { return x + y; }); +} + +bool DivideOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return InferSymbolicShapeElementWiseBinary( + op, + shape_analysis, + [](const symbol::DimExpr &x, const symbol::DimExpr &y) { return x / y; }); +} + +bool MultiplyOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return InferSymbolicShapeElementWiseBinary( + op, + shape_analysis, + [](const symbol::DimExpr &x, const symbol::DimExpr &y) { return x * y; }); +} + +bool SubtractOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return InferSymbolicShapeElementWiseBinary( + op, + shape_analysis, + [](const symbol::DimExpr &x, const symbol::DimExpr &y) { return x - y; }); +} + OP_ELEMENT_WISE_BINARY(Add_) OP_ELEMENT_WISE_BINARY(BitwiseAnd) OP_ELEMENT_WISE_BINARY(BitwiseAnd_) OP_ELEMENT_WISE_BINARY(BitwiseXor) OP_ELEMENT_WISE_BINARY(BitwiseXor_) OP_ELEMENT_WISE_BINARY(Complex) -OP_ELEMENT_WISE_BINARY(Divide) OP_ELEMENT_WISE_BINARY(Divide_) OP_ELEMENT_WISE_BINARY(ElementwisePow) OP_ELEMENT_WISE_BINARY(Fmax) @@ -119,7 +160,6 @@ OP_ELEMENT_WISE_BINARY(LogicalXor) OP_ELEMENT_WISE_BINARY(LogicalXor_) OP_ELEMENT_WISE_BINARY(Maximum) OP_ELEMENT_WISE_BINARY(Minimum) -OP_ELEMENT_WISE_BINARY(Multiply) OP_ELEMENT_WISE_BINARY(MultiplySr) OP_ELEMENT_WISE_BINARY(MultiplySr_) OP_ELEMENT_WISE_BINARY(Multiply_) @@ -127,7 +167,6 @@ OP_ELEMENT_WISE_BINARY(NotEqual) OP_ELEMENT_WISE_BINARY(NotEqual_) OP_ELEMENT_WISE_BINARY(Remainder) OP_ELEMENT_WISE_BINARY(Remainder_) -OP_ELEMENT_WISE_BINARY(Subtract) OP_ELEMENT_WISE_BINARY(Subtract_) } // namespace paddle::dialect diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h index 345c55e1a116b..a1d6f5845802e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h @@ -159,13 +159,19 @@ inline ShapeOrData SliceRawInferSymbolicShape( // Currently, we DO NOT support the case that any element in `axes` `starts` // or `ends` is a Symbol. auto vec_int64 = details::VecExpr2Int64(starts); - IR_ENFORCE(vec_int64.has_value(), - "for slice op, all the elements in `starts` must be int64_t"); + PADDLE_ENFORCE_EQ( + vec_int64.has_value(), + true, + phi::errors::InvalidArgument( + "for slice op, all the elements in `starts` must be int64_t")); std::vector starts_int = vec_int64.value(); vec_int64 = details::VecExpr2Int64(ends); - IR_ENFORCE(vec_int64.has_value(), - "for slice op, all the elements in `ends` must be int64_t"); + PADDLE_ENFORCE_EQ( + vec_int64.has_value(), + true, + phi::errors::InvalidArgument( + "for slice op, all the elements in `ends` must be int64_t")); std::vector ends_int = vec_int64.value(); const int64_t start = diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc index 30730170e23a2..1026005ab7fc8 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.cc @@ -104,8 +104,8 @@ void BuildCstrEqForTensorListAlongAxis( const symbol::TensorListShapeOrDataDimExprs &shape_data_list, int axis) { for (size_t i = 1; i < shape_data_list.size(); ++i) { - shape_analysis->DimExprBuilder().CstrEq(shape_data_list[0].shape()[axis], - shape_data_list[i].shape()[axis]); + shape_analysis->AddEqualCstr(shape_data_list[0].shape()[axis], + shape_data_list[i].shape()[axis]); } } @@ -114,7 +114,7 @@ void BuildCstrEqForTensorListAlongAxis( const std::vector &values, int axis) { for (size_t i = 1; i < values.size(); ++i) { - shape_analysis->DimExprBuilder().CstrEq( + shape_analysis->AddEqualCstr( shape_analysis->GetShapeOrDataForValue(values[0]).shape()[axis], shape_analysis->GetShapeOrDataForValue(values[i]).shape()[axis]); } diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h index 7984fc3be4e46..42164c3c21254 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h @@ -47,6 +47,11 @@ struct AttributeTrait { using value_type = ::pir::Int32Attribute; }; +template <> +struct AttributeTrait { + using value_type = ::pir::FloatAttribute; +}; + template std::vector GetVectorAttr(const ::pir::Operation *op, const std::string &name) { @@ -82,8 +87,10 @@ inline ExprVec GetExprVecFromData(const ShapeOrData &shapeordata) { TensorListExprs list = shapeordata.dyn_cast(); for (size_t i = 0; i < list.size(); i++) { - for (auto expr : list[i].data().value()) { - result.emplace_back(expr); + if (list[i].data().has_value()) { + for (auto expr : list[i].data().value()) { + result.emplace_back(expr); + } } } return result; diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc index e96ede7488814..b5bb10f4f173e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.cc @@ -14,12 +14,174 @@ #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h" #include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_slice_utils.h" #include "paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/infer_sym_utils.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" namespace paddle::dialect { +bool BicubicInterpOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + const symbol::ShapeOrDataDimExprs &x = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + + const auto &attributes = op->attributes(); + + const std::string data_format = + attributes.at("data_format").dyn_cast().AsString(); + int out_d = attributes.at("out_d").dyn_cast().data(); + int out_h = attributes.at("out_h").dyn_cast().data(); + int out_w = attributes.at("out_w").dyn_cast().data(); + + std::vector size_tensor; + if (out_d != -1) size_tensor.push_back(out_d); + if (out_h != -1) size_tensor.push_back(out_h); + if (out_w != -1) size_tensor.push_back(out_w); + + const DataLayout data_layout = common::StringToDataLayout(data_format); + + if (x.shape().size() == 3) { + // shape check for 1D interpolate for input tensor shape NCHW + if (!size_tensor.empty()) { + // top priority size + std::vector dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], x.shape()[1], symbol::DimExpr{out_w}}; + } else { + dim_out = {x.shape()[0], symbol::DimExpr{out_w}, x.shape()[2]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + } + + symbol::DimExpr out_w_tmp{0}; + const auto &next_sym = shape_analysis->GetNextSymName(); + out_w_tmp = symbol::DimExpr(next_sym); + + std::vector dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], x.shape()[1], out_w_tmp}; + } else { + dim_out = {x.shape()[0], out_w_tmp, x.shape()[2]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + } else if (x.shape().size() == 4) { + // shape check for 2D interpolate for input tensor shape NCHW + if (!size_tensor.empty()) { + // top priority size + std::vector dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], + x.shape()[1], + symbol::DimExpr{out_h}, + symbol::DimExpr{out_w}}; + } else { + dim_out = {x.shape()[0], + symbol::DimExpr{out_h}, + symbol::DimExpr{out_w}, + x.shape()[3]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + } + + symbol::DimExpr out_h_tmp{0}; + symbol::DimExpr out_w_tmp{0}; + const auto &next_sym = shape_analysis->GetNextSymName(); + out_h_tmp = symbol::DimExpr(next_sym); + out_w_tmp = symbol::DimExpr(next_sym); + + std::vector dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], x.shape()[1], out_h_tmp, out_w_tmp}; + } else { + dim_out = {x.shape()[0], out_h_tmp, out_w_tmp, x.shape()[3]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + } else if (x.shape().size() == 5) { + // shape check for 3D interpolate for input tensor shape NCDHW + if (!size_tensor.empty()) { + // top priority size + std::vector dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], + x.shape()[1], + symbol::DimExpr{out_d}, + symbol::DimExpr{out_h}, + symbol::DimExpr{out_w}}; + } else { + dim_out = {x.shape()[0], + symbol::DimExpr{out_d}, + symbol::DimExpr{out_h}, + symbol::DimExpr{out_w}, + x.shape()[4]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + } + + symbol::DimExpr out_d_tmp{0}; + symbol::DimExpr out_h_tmp{0}; + symbol::DimExpr out_w_tmp{0}; + const auto &next_sym = shape_analysis->GetNextSymName(); + out_d_tmp = symbol::DimExpr(next_sym); + out_h_tmp = symbol::DimExpr(next_sym); + out_w_tmp = symbol::DimExpr(next_sym); + + std::vector dim_out; + + if (data_layout == DataLayout::kNCHW) { + dim_out = {x.shape()[0], x.shape()[1], out_d_tmp, out_h_tmp, out_w_tmp}; + } else { + dim_out = {x.shape()[0], out_d_tmp, out_h_tmp, out_w_tmp, x.shape()[4]}; + } + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(dim_out)}; + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, shape_data); + return true; + + } else { + PADDLE_THROW(phi::errors::Fatal("Input(X) dimension must be 3, 4 or 5!")); + } + + return true; +} + +bool BilinearInterpOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return BicubicInterpOpInferSymbolicShape(op, shape_analysis); +} + bool ConcatOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { pir::Value operand_source = op->operand_source(0); @@ -41,8 +203,10 @@ bool ConcatOpInferSymbolicShape( if (shape_data_list[0].data().has_value()) { if (rank == 1) { - ExprVec data = details::GetExprVecFromData( - shape_analysis->GetShapeOrDataForValue(operand_source)); + const auto &s_or_d = + shape_analysis->GetShapeOrDataForValue(operand_source); + ExprVec data = details::GetExprVecFromData(s_or_d); + const std::vector shape{std::int64_t(data.size())}; symbol::ShapeOrDataDimExprs shape_data{ symbol::TensorShapeOrDataDimExprs(shape, data)}; @@ -95,7 +259,7 @@ bool ConcatOpInferSymbolicShape( bool FullWithTensorOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - pir::Value operand_source = op->operand_source(0); + pir::Value operand_source = op->operand_source(1); const symbol::ShapeOrDataDimExprs &operand_shape_or_data = shape_analysis->GetShapeOrDataForValue(operand_source); @@ -147,11 +311,54 @@ bool LinspaceOpInferSymbolicShape( shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); return true; } + +bool LinearInterpOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return BicubicInterpOpInferSymbolicShape(op, shape_analysis); +} + bool LogspaceOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { return LinspaceOpInferSymbolicShape(op, shape_analysis); } +bool NearestInterpOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return BicubicInterpOpInferSymbolicShape(op, shape_analysis); +} + +bool MeshgridOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + const symbol::TensorListShapeOrDataDimExprs &shape_data_list = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)) + .dyn_cast(); + + const symbol::ShapeOrDataDimExprs sym_shape_dim_exprs = [&] { + symbol::TensorListShapeOrDataDimExprs shape_dim_exprs_list; + std::vector vec; + + for (auto &shape_data : shape_data_list) { + if (shape_data.shape().size() == 0) { + vec.emplace_back(1); + } else { + vec.emplace_back(shape_data.shape()[0]); + } + } + + auto shape_dim_exprs = symbol::TensorShapeOrDataDimExprs(vec); + + for (size_t i = 0; i < shape_data_list.size(); i++) { + shape_dim_exprs_list.emplace_back(shape_dim_exprs); + } + + return symbol::ShapeOrDataDimExprs(shape_dim_exprs_list); + }(); + + pir::Value res = op->result(0); + shape_analysis->SetShapeOrDataForValue(res, sym_shape_dim_exprs); + return true; +} + bool StackOpInferSymbolicShape(pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { pir::Value operand_source = op->operand_source(0); @@ -196,6 +403,11 @@ bool StackOpInferSymbolicShape(pir::Operation *op, return true; } +bool TrilinearInterpOpInferSymbolicShape( + pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { + return BicubicInterpOpInferSymbolicShape(op, shape_analysis); +} + bool WhereOpInferSymbolicShape(pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { shape_analysis->SetShapeOrDataForValue( diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h index f2907bed0a4fd..be528d31139cf 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/multiary_infer_sym.h @@ -18,12 +18,18 @@ namespace paddle::dialect { +OP_DECLARE_INFER_SYMBOLIC_SHAPE(BicubicInterp) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(BilinearInterp) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Concat) OP_DECLARE_INFER_SYMBOLIC_SHAPE(FullWithTensor) OP_DECLARE_INFER_SYMBOLIC_SHAPE(FlashAttn) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Linspace) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(LinearInterp) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Logspace) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(Meshgrid) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(NearestInterp) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Stack) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(TrilinearInterp) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Where) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Where_) diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/nullary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/nullary_infer_sym.cc index 0bec3266bfb30..0e294991449c1 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/nullary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/nullary_infer_sym.cc @@ -80,6 +80,27 @@ bool AssignValueOpInferSymbolicShape( sym_dims.emplace_back(symbol::DimExpr(static_cast(dim))); } + const auto &attributes = op->attributes(); + std::vector values; + for (size_t i = 0; + i < attributes.at("values").dyn_cast().size(); + i++) { + values.push_back(attributes.at("values") + .dyn_cast() + .at(i) + .dyn_cast() + .data() + .to()); + } + if (values.size() == 1) { + std::vector data{values[0]}; + + symbol::ShapeOrDataDimExprs shape_data{ + symbol::TensorShapeOrDataDimExprs(sym_dims, data)}; + shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); + return true; + } + symbol::ShapeOrDataDimExprs shape_data{ symbol::TensorShapeOrDataDimExprs(sym_dims)}; shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc index 04e5032098367..e96d10018c1d1 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.cc @@ -99,6 +99,7 @@ OP_SAME_OPERANDS_AND_RESULT(PutAlongAxis) OP_SAME_OPERANDS_AND_RESULT(PutAlongAxis_) OP_SAME_OPERANDS_AND_RESULT(Real) OP_SAME_OPERANDS_AND_RESULT(Relu) +OP_SAME_OPERANDS_AND_RESULT(Relu6) OP_SAME_OPERANDS_AND_RESULT(Relu_) OP_SAME_OPERANDS_AND_RESULT(Roll) OP_SAME_OPERANDS_AND_RESULT(Round) diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.h index 41363fbe70604..96073b1271a32 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/same_operands_result.h @@ -90,6 +90,7 @@ OP_DECLARE_INFER_SYMBOLIC_SHAPE(PutAlongAxis) OP_DECLARE_INFER_SYMBOLIC_SHAPE(PutAlongAxis_) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Real) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Relu) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(Relu6) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Relu_) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Roll) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Round) diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index 9f7b688f2825c..4dab7e358f05e 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -216,7 +216,7 @@ bool DiagonalOpInferSymbolicShape( out_dims.erase(out_dims.begin() + std::max(axis1_, axis2_)); out_dims.erase(out_dims.begin() + std::min(axis1_, axis2_)); - symbol::DimExprBuilder builder{nullptr}; + symbol::DimExprBuilder builder; symbol::DimExpr zero{0}; symbol::DimExpr res_shape; symbol::DimExpr offset_sym{offset}; @@ -330,8 +330,41 @@ bool MinOpInferSymbolicShape(pir::Operation *op, bool PadOpInferSymbolicShape(pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // input(0): Tensor x + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ(x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of PadOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + + // input(1): int[] paddings + std::vector paddings = + paddle::dialect::details::GetVectorAttr(op, "paddings"); + PADDLE_ENFORCE_EQ(rank * 2, + paddings.size(), + phi::errors::InvalidArgument( + "The size of paddings should be 2 * input's rank. But " + "got paddings.size() = %d, input's rank = %d.", + paddings.size(), + rank)); + + // output + const auto &out_dims = [&] { + std::vector out_dims; + out_dims.reserve(rank); + for (size_t i = 0; i < rank; ++i) { + out_dims.push_back(x_dims_sym[i] + paddings[2 * i] + paddings[2 * i + 1]); + } + return out_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs(out_dims)); + return true; } @@ -416,22 +449,24 @@ symbol::ShapeOrDataDimExprs CreateShapeOrDataForXShape( bool ReshapeOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - pir::Value operand_source = op->operand_source(0); - if (shape_analysis->GetShapeOrDataForValue(operand_source) - .data() - .has_value()) { - const symbol::ShapeOrDataDimExprs &operand_shape_or_data = - shape_analysis->GetShapeOrDataForValue(operand_source); - shape_analysis->SetShapeOrDataForValue(op->result(0), - operand_shape_or_data); - return true; + const symbol::ShapeOrDataDimExprs &x_dim_expr = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + const symbol::ShapeOrDataDimExprs &shape_dim_expr = + shape_analysis->GetShapeOrDataForValue(op->operand_source(1)); + if (x_dim_expr.data().has_value()) { + const auto &shape_data = details::GetExprVecFromData(shape_dim_expr); + auto IsOne = [](const symbol::DimExpr &expr) { + return expr.isa() && expr.dyn_cast() == 1; + }; + if (shape_data.size() == 1 && IsOne(shape_data.at(0))) { + shape_analysis->SetShapeOrDataForValue( + op->result(0), + symbol::TensorShapeOrDataDimExprs(shape_data, + x_dim_expr.data().value())); + return true; + } } - pir::Value operand_source_shape = op->operand_source(1); - - const symbol::ShapeOrDataDimExprs &operand_shape_or_data = - shape_analysis->GetShapeOrDataForValue(operand_source_shape); - const auto &GetProduct = [&](const auto &dim_exprs, const auto &Filter) { symbol::DimExpr product{1}; for (const auto &dim_expr : dim_exprs) { @@ -463,7 +498,7 @@ bool ReshapeOpInferSymbolicShape( const auto &numel = GetProduct(original_shape, [](const auto &) { return true; }); - ExprVec target_shape = details::GetExprVecFromData(operand_shape_or_data); + ExprVec target_shape = details::GetExprVecFromData(shape_dim_expr); const auto &product_exclude_minus_one = GetProduct(target_shape, IsNotMinusOne); @@ -487,7 +522,7 @@ bool ReshapeOpInferSymbolicShape( shape_analysis->SetShapeOrDataForValue(op->result(0), shape_data); - const auto &x_shape = [&] { + const auto UNUSED &x_shape = [&] { std::vector x_shape{symbol::DimExpr(0)}; const auto &original_shape = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)).shape(); @@ -499,7 +534,7 @@ bool ReshapeOpInferSymbolicShape( shape_analysis->SetShapeOrDataForValue( op->result(1), CreateShapeOrDataForXShape( - shape_analysis->GetShapeOrDataForValue(operand_source))); + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)))); return true; } @@ -585,6 +620,8 @@ bool SplitOpInferSymbolicShape(pir::Operation *op, .dyn_cast() .data() .to(); + size_t rank = x_dims_sym.size(); + axis = axis >= 0 ? axis : std::max(int64_t(0), int64_t(axis + rank)); // sections const std::vector §ions_sym = [&] { @@ -629,8 +666,7 @@ bool SplitOpInferSymbolicShape(pir::Operation *op, const bool &all_sections_sym_not_minus_one = All(sections_sym, IsNotMinusOne); if (all_sections_sym_not_minus_one) { - shape_analysis->DimExprBuilder().CstrEq(x_dims_sym[axis], - sum_exclude_minus_one); + shape_analysis->AddEqualCstr(x_dims_sym[axis], sum_exclude_minus_one); } symbol::TensorListShapeOrDataDimExprs shape_data_list; @@ -840,7 +876,7 @@ bool TransposeOpInferSymbolicShape( int x_rank = x_dims.size(); - const std::vector formatted_axis = [op, x_rank, &perm] { + const std::vector formatted_axis = [x_rank, &perm] { std::vector out(perm.size(), 0); std::transform(perm.begin(), perm.end(), @@ -878,10 +914,13 @@ bool Transpose_OpInferSymbolicShape( bool SqueezeOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - IR_ENFORCE(op->num_operands() == 2, - "SqueezeOpInferSymbolicShape ONLY support num_operands() == 2 " - "now, but got %d operands", - op->num_operands()); + PADDLE_ENFORCE_EQ( + op->num_operands(), + 2, + phi::errors::InvalidArgument( + "SqueezeOpInferSymbolicShape ONLY support num_operands() == 2 " + "now, but got %d operands", + op->num_operands())); auto x_shape_or_data = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); @@ -904,10 +943,13 @@ bool SqueezeOpInferSymbolicShape( std::vector squeeze_dims; for (auto squeeze_dim : squeeze_dims_sym) { - IR_ENFORCE(squeeze_dim.Has(), - "in SqueezeOpInferSymbolicShape, axes must be known int type, " - "but got: %s", - symbol::ToString(squeeze_dim)); + PADDLE_ENFORCE_EQ( + squeeze_dim.Has(), + true, + phi::errors::InvalidArgument( + "in SqueezeOpInferSymbolicShape, axes must be known int type, " + "but got: %s", + symbol::ToString(squeeze_dim))); squeeze_dims.emplace_back( static_cast(squeeze_dim.Get())); } @@ -970,31 +1012,186 @@ bool Squeeze_OpInferSymbolicShape( bool UnbindOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + // input + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UnbindOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + + // axis + int axis = op->attributes().at("axis").dyn_cast().data(); + int rank = x_dims_sym.size(); + axis = axis >= 0 ? axis : axis + rank; + + // output + const symbol::TensorListShapeOrDataDimExprs &output_shape_data_list = [&] { + symbol::TensorListShapeOrDataDimExprs shape_data_list; + std::vector output_dims_sym = x_dims_sym; + + const symbol::DimExpr &unbound_dim = x_dims_sym.at(axis); + PADDLE_ENFORCE_EQ(unbound_dim.isa(), + true, + phi::errors::InvalidArgument( + "InferSymbolicShape of UnbindOp only support unbound " + "dim with constant length!")); + output_dims_sym.erase(output_dims_sym.begin() + axis); + const int64_t unbound_dim_length = unbound_dim.dyn_cast(); + + for (uint32_t idx = 0; idx < unbound_dim_length; idx++) { + shape_data_list.push_back( + symbol::TensorShapeOrDataDimExprs(output_dims_sym)); + } + return shape_data_list; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::ShapeOrDataDimExprs{output_shape_data_list}); + return true; } bool UniqueOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UniqueOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + std::vector axes = + paddle::dialect::details::GetVectorAttr(op, "axis"); + + symbol::DimExpr unique_dim_sym = + shape_analysis->GetNextSymName(); // unknown until runtime + + const std::vector &counts_dims = [&] { + std::vector out_dims; + out_dims.push_back(unique_dim_sym); + return out_dims; + }(); + + const std::vector &index_dims = counts_dims; + + const std::vector &out_dims = [&] { + if (axes.empty()) { + return counts_dims; + } + std::vector out_dims = x_dims_sym; + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + out_dims[axis] = unique_dim_sym; + return out_dims; + }(); + + const std::vector &inverse_dims = [&] { + std::vector inverse_dims; + if (axes.empty()) { + // flatten before unique + symbol::DimExpr product{1}; + for (const auto &x_dim : x_dims_sym) { + product = product * x_dim; + } + inverse_dims.push_back(product); + } else { + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + inverse_dims.push_back(x_dims_sym[axis]); + } + return inverse_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(1), symbol::TensorShapeOrDataDimExprs{index_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(2), symbol::TensorShapeOrDataDimExprs{inverse_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(3), symbol::TensorShapeOrDataDimExprs{counts_dims}); + return true; } bool UniqueConsecutiveOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - PADDLE_THROW(phi::errors::Unimplemented( - op->name() + " 's InferSymbolicShape interface is NOT implemented now.")); + const auto &x_shape_or_data = + shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); + PADDLE_ENFORCE_EQ( + x_shape_or_data.data().has_value(), + false, + phi::errors::InvalidArgument( + "InferSymbolicShape of UniqueConsecutiveOp only support input with " + "value now.")); + const auto &x_dims_sym = x_shape_or_data.shape(); + const size_t rank = x_dims_sym.size(); + std::vector axes = + paddle::dialect::details::GetVectorAttr(op, "axis"); + + symbol::DimExpr unique_dim_sym = + shape_analysis->GetNextSymName(); // unknown until runtime + + const std::vector &counts_dims = [&] { + std::vector out_dims; + out_dims.push_back(unique_dim_sym); + return out_dims; + }(); + + const std::vector &out_dims = [&] { + if (axes.empty()) { + return counts_dims; + } + std::vector out_dims = x_dims_sym; + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + out_dims[axis] = unique_dim_sym; + return out_dims; + }(); + + const std::vector &inverse_dims = [&] { + std::vector inverse_dims; + if (axes.empty()) { + // flatten before unique + symbol::DimExpr product{1}; + for (const auto &x_dim : x_dims_sym) { + product = product * x_dim; + } + inverse_dims.push_back(product); + } else { + int axis = axes[0]; + axis = axis >= 0 ? axis : axis + rank; + inverse_dims.push_back(x_dims_sym[axis]); + } + return inverse_dims; + }(); + + shape_analysis->SetShapeOrDataForValue( + op->result(0), symbol::TensorShapeOrDataDimExprs{out_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(1), symbol::TensorShapeOrDataDimExprs{inverse_dims}); + shape_analysis->SetShapeOrDataForValue( + op->result(2), symbol::TensorShapeOrDataDimExprs{counts_dims}); + return true; } bool UnsqueezeOpInferSymbolicShape( pir::Operation *op, pir::ShapeConstraintIRAnalysis *shape_analysis) { - IR_ENFORCE(op->num_operands() == 2, - "UnsqueezeOp InferSymbolicShape ONLY support num_operands() == 2 " - "now, but got %d operands", - op->num_operands()); + PADDLE_ENFORCE_EQ( + op->num_operands(), + 2, + phi::errors::InvalidArgument( + "UnsqueezeOp InferSymbolicShape ONLY support num_operands() == 2 " + "now, but got %d operands", + op->num_operands())); auto x_shape_or_data = shape_analysis->GetShapeOrDataForValue(op->operand_source(0)); @@ -1023,10 +1220,13 @@ bool UnsqueezeOpInferSymbolicShape( int cur_output_rank = x_dims_size; for (auto axis_expr : axes_sym) { - IR_ENFORCE(axis_expr.Has(), - "in UnsqueezeOpInferSymbolicShape, axes must be known int type, " - "but got: %s", - symbol::ToString(axis_expr)); + PADDLE_ENFORCE_EQ( + axis_expr.Has(), + true, + phi::errors::InvalidArgument( + "in UnsqueezeOpInferSymbolicShape, axes must be known int type, " + "but got: %s", + symbol::ToString(axis_expr))); int axis = static_cast(axis_expr.Get()); int cur = axis < 0 ? axis + cur_output_rank + 1 : axis; diff --git a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc similarity index 54% rename from paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc rename to paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc index 36835406267a3..c6c1401f32d5c 100644 --- a/paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.cc +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.cc @@ -12,14 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/cinn/frontend/group_cluster/cluster_policy/shardable_axes_policy/shardable_axes_policy.h" +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.h" -namespace cinn::frontend::group_cluster::policy { +namespace paddle { +namespace dialect { -bool ShardableAxesPolicy::CanFuse(const PatternNodePtr upstream, - const PatternNodePtr downstream) { - // TODO(wuzhanfei) shardable axes policy - return false; +template <> +common::DataLayout PreferLayoutImpl(pir::Operation* op) { + return common::DataLayout::NHWC; } -} // namespace cinn::frontend::group_cluster::policy +template <> +void RewriteByLayoutImpl(pir::Operation* op, + common::DataLayout new_layout) { + return; +} + +} // namespace dialect +} // namespace paddle +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LayoutTransformationInterface) diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h new file mode 100644 index 0000000000000..71678029fb48c --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.h @@ -0,0 +1,106 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp" + +#include "paddle/common/enforce.h" +#include "paddle/common/layout.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_type.h" +#include "paddle/fluid/pir/dialect/operator/utils/utils.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/op_base.h" +#include "paddle/pir/include/core/type_name.h" + +namespace paddle { +namespace dialect { + +class LayoutTransformationInterface + : public pir::OpInterfaceBase { + public: + using PreferLayoutFn = common::DataLayout (*)(pir::Operation*); + using RewriteByLayoutFn = void (*)(pir::Operation*, common::DataLayout); + using RelevantInputsFn = std::vector (*)(pir::Operation*); + using RelevantOutputsFn = std::vector (*)(pir::Operation*); + + struct Concept { + explicit Concept(PreferLayoutFn prefer_layout, + RewriteByLayoutFn rewrite_by_layout, + RelevantInputsFn relevant_inputs, + RelevantOutputsFn relevant_outputs) + : prefer_layout(prefer_layout), + rewrite_by_layout(rewrite_by_layout), + relevant_inputs(relevant_inputs), + relevant_outputs(relevant_outputs) {} + + PreferLayoutFn prefer_layout; + RewriteByLayoutFn rewrite_by_layout; + RelevantInputsFn relevant_inputs; + RelevantOutputsFn relevant_outputs; + }; + + template + struct Model : public Concept { + static common::DataLayout PreferLayoutModel(pir::Operation* op) { + return PreferLayoutImpl(op); + } + + static void RewriteByLayoutModel(pir::Operation* op, + common::DataLayout new_layout) { + RewriteByLayoutImpl(op, new_layout); + } + + static std::vector RelevantInputsModel(pir::Operation* op) { + return RelevantInputsImpl(op); + } + + static std::vector RelevantOutputsModel(pir::Operation* op) { + return RelevantOutputsImpl(op); + } + + Model() + : Concept(PreferLayoutModel, + RewriteByLayoutModel, + RelevantInputsModel, + RelevantOutputsModel) {} + }; + + LayoutTransformationInterface(pir::Operation* op, Concept* impl) + : pir::OpInterfaceBase(op), impl_(impl) {} + + common::DataLayout PreferLayout(pir::Operation* op) { + return impl_->prefer_layout(op); + } + + void RewriteByLayout(pir::Operation* op, common::DataLayout new_layout) { + impl_->rewrite_by_layout(op, new_layout); + } + + std::vector RelevantInputs(pir::Operation* op) { + return impl_->relevant_inputs(op); + } + + std::vector RelevantOutputs(pir::Operation* op) { + return impl_->relevant_outputs(op); + } + + private: + Concept* impl_; +}; + +} // namespace dialect +} // namespace paddle + +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LayoutTransformationInterface) diff --git a/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp new file mode 100644 index 0000000000000..c1860cbbac108 --- /dev/null +++ b/paddle/fluid/pir/dialect/operator/interface/layout_transformation.hpp @@ -0,0 +1,60 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/common/enforce.h" +#include "paddle/common/layout.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/operation.h" +#include "paddle/pir/include/core/type_name.h" + +namespace paddle { +namespace dialect { + +template +common::DataLayout PreferLayoutImpl(pir::Operation* op) { + return common::DataLayout::ALL_LAYOUT; +} + +template +void RewriteByLayoutImpl(pir::Operation* op, common::DataLayout new_layout) { + PADDLE_THROW(common::errors::Unimplemented( + "Op %s should have a specialized RewriteByLayout function", + pir::get_type_name())); +} + +template +std::vector RelevantInputsImpl(pir::Operation* op) { + return op->operands_source(); +} + +template +std::vector RelevantOutputsImpl(pir::Operation* op) { + return op->results(); +} + +class FusedConv2dAddActOp; +template <> +common::DataLayout PreferLayoutImpl(pir::Operation*); +extern template common::DataLayout PreferLayoutImpl( + pir::Operation*); +template <> +void RewriteByLayoutImpl(pir::Operation*, + common::DataLayout); +extern template void RewriteByLayoutImpl( + pir::Operation*, common::DataLayout); + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc index a86e19ccfe0a6..939f91154de5b 100644 --- a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc +++ b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc @@ -22,11 +22,17 @@ namespace dialect { ApiBuilder::ApiBuilder() : ctx_(pir::IrContext::Instance()), builder_(std::make_shared(ctx_)) { - IR_ENFORCE(builder_ != nullptr, "api builder construct error!"); + PADDLE_ENFORCE_NE( + builder_, + nullptr, + phi::errors::InvalidArgument("api builder construct error!")); } void ApiBuilder::SetProgram(pir::Program* program) { - IR_ENFORCE(program != nullptr, "argument of program is nullptr"); + PADDLE_ENFORCE_NE( + program, + nullptr, + phi::errors::InvalidArgument("argument of program is nullptr")); builder_->SetInsertionPointToBlockEnd(program->block()); } @@ -50,8 +56,10 @@ void ApiBuilder::SetParameter(const std::string& name, } void ApiBuilder::LoadInsertionPoint() { - IR_ENFORCE(!insertion_point_stack_.empty(), - "insertion_point_stack_ is empty."); + PADDLE_ENFORCE_EQ( + !insertion_point_stack_.empty(), + true, + phi::errors::InvalidArgument("insertion_point_stack_ is empty.")); builder_->set_insertion_point(insertion_point_stack_.top()); insertion_point_stack_.pop(); } diff --git a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc index f674c35096018..ef9ecc2bd8ff7 100644 --- a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.cc @@ -765,14 +765,14 @@ bool WhileOp::InferSymbolicShape( } if (input_arg_shape[j] == yield_value_shape[j]) { // Dim isn't changed in while - shape_analysis->DimExprBuilder().CstrEq(original_input_shape[j], - input_arg_shape[j]); + shape_analysis->AddEqualCstr(original_input_shape[j], + input_arg_shape[j]); continue; } if (original_input_shape.size() == yield_value_shape.size() && original_input_shape[j] == yield_value_shape[j]) { - shape_analysis->DimExprBuilder().CstrEq(original_input_shape[j], - input_arg_shape[j]); + shape_analysis->AddEqualCstr(original_input_shape[j], + input_arg_shape[j]); continue; } } @@ -827,16 +827,27 @@ void HasElementsOp::Build(pir::Builder &builder, // NOLINT void HasElementsOp::VerifySig() { VLOG(4) << "Verifying inputs, outputs ,attributes for: HasElementsOp."; // Verify inputs: - IR_ENFORCE(num_operands() == 1u, "The size of inputs must equal to 1."); - IR_ENFORCE(operand_type(0).isa(), - "The first input of cf.has_elements must be container type."); + PADDLE_ENFORCE_EQ( + num_operands(), + 1u, + phi::errors::InvalidArgument("The size of inputs must equal to 1.")); + PADDLE_ENFORCE_EQ( + operand_type(0).isa(), + true, + phi::errors::InvalidArgument( + "The first input of cf.has_elements must be container type.")); // No attributes should be verify. // Verify outputs: - IR_ENFORCE(num_results() == 1u, "The size of outputs must be equal to 1."); - IR_ENFORCE((*this)->result_type(0).isa(), - "The type of cf.has_elements' output is not correct."); + PADDLE_ENFORCE_EQ( + num_results(), + 1u, + phi::errors::InvalidArgument("The size of outputs must be equal to 1.")); + PADDLE_ENFORCE_EQ((*this)->result_type(0).isa(), + true, + phi::errors::InvalidArgument( + "The type of cf.has_elements' output is not correct.")); } const char *AssertOp::attributes_name[1] = {"summarize"}; @@ -886,51 +897,69 @@ void AssertOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); if ((*this)->operand_source(0).type().isa()) { - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .dyn_cast() - .dtype() - .isa(), - "Type validation failed for the 0th input, it should be a " - "bool DenseTensorType."); + PADDLE_ENFORCE_EQ( + (*this) + ->operand_source(0) + .type() + .dyn_cast() + .dtype() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, it should be a " + "bool DenseTensorType.")); } if (auto vec_type = (*this)->operand(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa() || - vec_type[i].isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ( + vec_type[i].isa() || + vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } else { - IR_ENFORCE( + PADDLE_ENFORCE_EQ( (*this)->operand(1).type().isa() || (*this) ->operand(1) .type() .isa(), - "Type validation failed for the 1th input."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("summarize") > 0, "summarize does not exist."); - IR_ENFORCE(attributes.at("summarize").isa(), - "Type of attribute: summarize is not pir::Int64Attribute."); + PADDLE_ENFORCE_GT( + attributes.count("summarize"), + 0, + phi::errors::InvalidArgument("summarize does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("summarize").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: summarize is not pir::Int64Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 0u, - "The size %d of outputs must be equal to 0.", - output_size); + PADDLE_ENFORCE_EQ( + output_size, + 0u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 0.", output_size)); // Outputs num is 0, not need to check outputs type. } VLOG(4) << "End Verifying for: AssertOp."; @@ -941,74 +970,104 @@ void SelectInputOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto in_size = num_operands(); - IR_ENFORCE(in_size == 3u, "Size %d of inputs must be 3.", in_size); + PADDLE_ENFORCE_EQ( + in_size, + 3u, + phi::errors::InvalidArgument("Size %d of inputs must be 3.", in_size)); auto input1 = (*this)->operand_source(1).type(); auto input2 = (*this)->operand_source(2).type(); if (input1.isa() && input2.isa()) { auto tensor1 = input1.dyn_cast(); auto tensor2 = input2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); } else if (input1.isa() && input2.isa()) { auto tensor1 = input1.dyn_cast(); auto tensor2 = input1.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); - IR_ENFORCE( - tensor1.place() == tensor2.place(), - "The 1st input place %s should be equal to 2ed input place %s.", + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); + PADDLE_ENFORCE_EQ( tensor1.place(), - tensor2.place()); + tensor2.place(), + phi::errors::InvalidArgument( + "The 1st input place %s should be equal to 2ed input place %s.", + tensor1.place(), + tensor2.place())); } else { - IR_ENFORCE(input1 == input2, - "The 1st input type %s should be equal to 2ed input type %s.", - input1, - input2); + PADDLE_ENFORCE_EQ( + input1, + input2, + phi::errors::InvalidArgument( + "The 1st input type %s should be equal to 2ed input type %s.", + input1, + input2)); } } VLOG(4) << "Verifying outputs:"; { auto out_size = num_results(); - IR_ENFORCE( - out_size == 1u, "Size %d of outputs must be equal to 1.", out_size); + PADDLE_ENFORCE_EQ(out_size, + 1u, + phi::errors::InvalidArgument( + "Size %d of outputs must be equal to 1.", out_size)); } VLOG(4) << "End Verifying for: AssignArray_Op."; } @@ -1061,13 +1120,18 @@ void SelectOutputOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto in_size = num_operands(); - IR_ENFORCE(in_size == 2u, "Size %d of inputs must be 2.", in_size); + PADDLE_ENFORCE_EQ( + in_size, + 2u, + phi::errors::InvalidArgument("Size %d of inputs must be 2.", in_size)); } VLOG(4) << "Verifying outputs:"; { auto out_size = num_results(); - IR_ENFORCE( - out_size == 2u, "Size %d of outputs must be equal to 2.", out_size); + PADDLE_ENFORCE_EQ(out_size, + 2u, + phi::errors::InvalidArgument( + "Size %d of outputs must be equal to 2.", out_size)); auto out1 = (*this)->result(0).type(); auto out2 = (*this)->result(1).type(); @@ -1075,58 +1139,83 @@ void SelectOutputOp::VerifySig() { out2.isa()) { auto tensor1 = out1.dyn_cast(); auto tensor2 = out2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); } else if (out1.isa() && out2.isa()) { auto tensor1 = out1.dyn_cast(); auto tensor2 = out2.dyn_cast(); - IR_ENFORCE( - tensor1.dtype() == tensor2.dtype(), - "The 1st input dtype %s should be equal to 2ed input dtype %s.", + PADDLE_ENFORCE_EQ( tensor1.dtype(), - tensor2.dtype()); - IR_ENFORCE(tensor1.data_layout() == tensor2.data_layout(), - "The 1st input data_layout %s should be equal to 2ed input " - "data_layout %s.", - tensor1.data_layout(), - tensor2.data_layout()); - IR_ENFORCE(tensor1.lod() == tensor2.lod(), - "The 1st input lod %s should be equal to 2ed input lod %s.", - tensor1.lod(), - tensor2.lod()); - IR_ENFORCE( - tensor1.offset() == tensor2.offset(), - "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor2.dtype(), + phi::errors::InvalidArgument( + "The 1st input dtype %s should be equal to 2ed input dtype %s.", + tensor1.dtype(), + tensor2.dtype())); + PADDLE_ENFORCE_EQ( + tensor1.data_layout(), + tensor2.data_layout(), + phi::errors::InvalidArgument( + "The 1st input data_layout %s should be equal to 2ed input " + "data_layout %s.", + tensor1.data_layout(), + tensor2.data_layout())); + PADDLE_ENFORCE_EQ( + tensor1.lod(), + tensor2.lod(), + phi::errors::InvalidArgument( + "The 1st input lod %s should be equal to 2ed input lod %s.", + tensor1.lod(), + tensor2.lod())); + PADDLE_ENFORCE_EQ( tensor1.offset(), - tensor2.offset()); - IR_ENFORCE( - tensor1.place() == tensor2.place(), - "The 1st input place %s should be equal to 2ed input place %s.", + tensor2.offset(), + phi::errors::InvalidArgument( + "The 1st input offset %s should be equal to 2ed input offset %s.", + tensor1.offset(), + tensor2.offset())); + PADDLE_ENFORCE_EQ( tensor1.place(), - tensor2.place()); + tensor2.place(), + phi::errors::InvalidArgument( + "The 1st input place %s should be equal to 2ed input place %s.", + tensor1.place(), + tensor2.place())); } else { - IR_ENFORCE(out1 == out2, - "The 1st input type %s should be equal to 2ed input type %s.", - out1, - out2); + PADDLE_ENFORCE_EQ( + out1, + out2, + phi::errors::InvalidArgument( + "The 1st input type %s should be equal to 2ed input type %s.", + out1, + out2)); } } VLOG(4) << "End Verifying for: AssignArray_Op."; diff --git a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.h b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.h index 8b5af449d4820..9b9bcd97b78fe 100644 --- a/paddle/fluid/pir/dialect/operator/ir/control_flow_op.h +++ b/paddle/fluid/pir/dialect/operator/ir/control_flow_op.h @@ -173,7 +173,8 @@ class HasElementsOp : public pir::Op { /// print(summarize number of elements in data) /// } /// -class AssertOp : public pir::Op { +class AssertOp + : public pir::Op { public: using Op::Op; static const char *name() { return "pd_op.assert"; } diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc index 4e4b7f46b382c..17d9a1dadc903 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_onednn_op.cc @@ -124,16 +124,21 @@ void ExpandOp::Build(pir::Builder& builder, pir::AttributeMap attributes) { VLOG(4) << "Start build ExpandOp"; - IR_ENFORCE(attributes.find("shape") != attributes.end(), - "'shape' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE(attributes.find("shape"), + attributes.end(), + phi::errors::InvalidArgument( + "'shape' Attribute is expected for ExpandOp. ")); std::vector shape = attributes.at("shape") .dyn_cast() .data() .GetData(); - IR_ENFORCE(attributes.find("mkldnn_data_type") != attributes.end(), - "'mkldnn_data_type' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE( + attributes.find("mkldnn_data_type"), + attributes.end(), + phi::errors::InvalidArgument( + "'mkldnn_data_type' Attribute is expected for ExpandOp. ")); std::string mkldnn_data_type = attributes.at("mkldnn_data_type") .dyn_cast() .AsString(); @@ -190,48 +195,66 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); if (auto vec_type = (*this)->operand_source(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } } else { - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } } VLOG(4) << "Verifying attributes:"; { auto& attributes = this->attributes(); - IR_ENFORCE(attributes.count("mkldnn_data_type") > 0, - "mkldnn_data_type does not exist."); - IR_ENFORCE(attributes.at("mkldnn_data_type").isa(), - "Type of attribute: mkldnn_data_type is not pir::StrAttribute."); + PADDLE_ENFORCE_GT( + attributes.count("mkldnn_data_type"), + 0, + phi::errors::InvalidArgument("mkldnn_data_type does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("mkldnn_data_type").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: mkldnn_data_type is not pir::StrAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: ExpandOp."; } @@ -248,9 +271,11 @@ std::vector ExpandOp::InferMeta( p_attributes, common::errors::Fatal( "AttrtibueMap pointer in InferMeta function is nullptr.")); - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value shape_ = input_values[1]; diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index c5dc4457b737e..640cfc6456f1d 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -148,9 +148,11 @@ std::vector AddNOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddNOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -294,9 +296,11 @@ std::vector AddN_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddN_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -444,9 +448,11 @@ std::vector AddNArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AddNArrayOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value inputs_ = input_values[0]; VLOG(4) << "Builder construction outputs"; pir::VectorType inputs = inputs_.type().dyn_cast(); @@ -670,9 +676,11 @@ std::vector FusedGemmEpilogueOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta FusedGemmEpilogueOp"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; pir::Value bias_ = input_values[2]; @@ -919,9 +927,11 @@ std::vector FusedGemmEpilogueGradOp::InferMeta( common::errors::Fatal( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; - IR_ENFORCE(input_values.size() == 4, - "Num of inputs is expected to be 4 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 4, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 4 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; @@ -1218,9 +1228,11 @@ std::vector SplitGradOp::InferMeta( pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SplitGradOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value out_grad_ = input_values[0]; pir::Value axis_ = input_values[1]; @@ -1479,9 +1491,11 @@ std::vector CreateArrayLikeOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta CreateArrayLikeOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value input_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -1600,9 +1614,11 @@ std::vector ArrayLengthOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayLengthOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; paddle::dialect::DenseTensorArrayType x_type; @@ -1756,9 +1772,11 @@ std::vector ArrayReadOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayLengthOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value array_ = input_values[0]; pir::Value i_ = input_values[1]; @@ -1924,9 +1942,11 @@ std::vector ArrayWrite_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ArrayWrite_Op"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value array_ = input_values[0]; pir::Value x_ = input_values[1]; @@ -2121,17 +2141,23 @@ std::vector ArrayToTensorOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta ArrayToTensorOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("axis") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("axis"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); int32_t axis = attributes.at("axis").dyn_cast().data(); - IR_ENFORCE(attributes.find("use_stack") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("use_stack"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); bool use_stack = attributes.at("use_stack").dyn_cast().data(); @@ -2315,21 +2341,27 @@ std::vector TensorToArrayOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta TensorToArrayOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value out_grad_ = input_values[1]; VLOG(4) << "Builder construction attributes"; pir::AttributeMap argument_attributes = {}; - IR_ENFORCE(attributes.find("axis") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("axis"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); int32_t axis = attributes.at("axis").dyn_cast().data(); - IR_ENFORCE(attributes.find("use_stack") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("use_stack"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); bool use_stack = attributes.at("use_stack").dyn_cast().data(); @@ -2430,39 +2462,53 @@ void SliceArrayOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 3u, - "The size %d of inputs must be equal to 3.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this)->operand_source(1).type().isa() || - (*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(1).type()); - IR_ENFORCE((*this)->operand_source(2).type().isa() || - (*this) - ->operand_source(2) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(2).type()); + PADDLE_ENFORCE_EQ( + input_size, + 3u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 3.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(1).type().isa() || + (*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(1).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(2).type().isa() || + (*this) + ->operand_source(2) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(2).type())); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: SliceArrayOp."; } @@ -2527,9 +2573,11 @@ std::vector SliceArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SliceArrayOp"; - IR_ENFORCE(input_values.size() == 3, - "Num of inputs is expected to be 3 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 3, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 3 but got %d.", + input_values.size())); pir::Value input = input_values[0]; pir::Value starts = input_values[1]; pir::Value ends = input_values[2]; @@ -2622,32 +2670,43 @@ void SliceArrayDenseOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this)->operand_source(1).type().isa() || - (*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1st input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ( + (*this)->operand_source(1).type().isa() || + (*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st input, got %s.", + (*this)->operand_source(1).type())); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: SliceArrayOp."; } @@ -2678,9 +2737,11 @@ std::vector SliceArrayDenseOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta SliceArrayDenseOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value input = input_values[0]; pir::Value starts = input_values[1]; @@ -2772,15 +2833,19 @@ void AssignArrayOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { @@ -2789,12 +2854,16 @@ void AssignArrayOp::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignArrayOp."; } @@ -2817,9 +2886,11 @@ std::vector AssignArrayOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AssignArrayOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -2885,26 +2956,35 @@ void AssignArray_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, but got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ( + (*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, but got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignArray_Op."; } @@ -2918,9 +2998,11 @@ std::vector AssignArray_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta AssignArray_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -3022,8 +3104,10 @@ void ExpandOp::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build ExpandOp"; - IR_ENFORCE(attributes.find("shape") != attributes.end(), - "'shape' Attribute is expected for ExpandOp. "); + PADDLE_ENFORCE_NE(attributes.find("shape"), + attributes.end(), + phi::errors::InvalidArgument( + "'shape' Attribute is expected for ExpandOp. ")); std::vector shape = attributes.at("shape") .dyn_cast() @@ -3114,10 +3198,13 @@ bool ExpandOp::InferSymbolicShape( if (expand_shape[i] == -1) { // copy the dim from x // the shape is right aligned int index = i - (expand_shape.size() - x_dims.size()); - IR_ENFORCE(index >= 0, - "in ExpandOpInferSymbolicShape, the dim to copy must >= 0, " - "but got %d", - index); + PADDLE_ENFORCE_GE( + index, + 0, + phi::errors::InvalidArgument( + "in ExpandOpInferSymbolicShape, the dim to copy must >= 0, " + "but got %d", + index)); out_shape[i] = x_dims[index]; } @@ -3136,26 +3223,34 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input."); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input.")); if (auto vec_type = (*this)->operand_source(1).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - IR_ENFORCE(vec_type[i].isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ(vec_type[i].isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } else { - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input."); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input.")); } } VLOG(4) << "Verifying attributes:"; @@ -3165,12 +3260,16 @@ void ExpandOp::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: ExpandOp."; } @@ -3184,9 +3283,11 @@ std::vector ExpandOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ExpandOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value shape_ = input_values[1]; @@ -3341,8 +3442,10 @@ void IncrementOp::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build IncrementOp"; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction inputs"; @@ -3367,32 +3470,45 @@ void IncrementOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("value") > 0, "value does not exist."); - IR_ENFORCE(attributes.at("value").isa(), - "Type of attribute: value is not pir::FloatAttribute."); + PADDLE_ENFORCE_GT(attributes.count("value"), + 0, + phi::errors::InvalidArgument("value does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("value").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: value is not pir::FloatAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: IncrementOp."; } @@ -3411,13 +3527,17 @@ std::vector IncrementOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta IncrementOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for IncrementOp. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for IncrementOp. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction outputs"; @@ -3526,8 +3646,10 @@ void Increment_Op::Build(pir::Builder &builder, pir::AttributeMap attributes) { VLOG(4) << "Start build Increment_Op"; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for Increment_Op. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for Increment_Op. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction inputs"; @@ -3553,32 +3675,45 @@ void Increment_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("value") > 0, "value does not exist."); - IR_ENFORCE(attributes.at("value").isa(), - "Type of attribute: value is not pir::FloatAttribute."); + PADDLE_ENFORCE_GT(attributes.count("value"), + 0, + phi::errors::InvalidArgument("value does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("value").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: value is not pir::FloatAttribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: Increment_Op."; } @@ -3597,13 +3732,17 @@ std::vector Increment_Op::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta Increment_Op"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; - IR_ENFORCE(attributes.find("value") != attributes.end(), - "'value' Attribute is expected for Increment_Op. "); + PADDLE_ENFORCE_NE(attributes.find("value"), + attributes.end(), + phi::errors::InvalidArgument( + "'value' Attribute is expected for Increment_Op. ")); float value = attributes.at("value").dyn_cast().data(); VLOG(4) << "Builder construction outputs"; @@ -3715,21 +3854,27 @@ void AssignOut_Op::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 2u, - "The size %d of inputs must be equal to 2.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); - IR_ENFORCE((*this) - ->operand_source(1) - .type() - .isa(), - "Type validation failed for the 1th input, got %s.", - (*this)->operand_source(1).type()); + PADDLE_ENFORCE_EQ( + input_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 2.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(1) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1th input, got %s.", + (*this)->operand_source(1).type())); } VLOG(4) << "Verifying attributes:"; { @@ -3738,12 +3883,16 @@ void AssignOut_Op::VerifySig() { VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: AssignOut_Op."; } @@ -3756,9 +3905,11 @@ void AssignOut_Op::InferMeta(phi::InferMetaContext *infer_meta) { std::vector AssignOut_Op::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -3808,6 +3959,29 @@ phi::DataType AssignOut_Op::GetKernelTypeForVar( return expected_kernel_dtype; } +OpInfoTuple ShapeBroadcastOp::GetOpInfo() { + std::vector inputs = { + paddle::dialect::OpInputInfo( + "x", "paddle::dialect::DenseTensorType", false, false, false, true), + paddle::dialect::OpInputInfo( + "y", "paddle::dialect::DenseTensorType", false, false, false, true)}; + std::vector attributes = {}; + std::vector outputs = { + paddle::dialect::OpOutputInfo( + "out", "paddle::dialect::DenseTensorType", false, false)}; + paddle::dialect::OpRunTimeInfo run_time_info = + paddle::dialect::OpRunTimeInfo("ElementwiseInferMeta", + {"x", "y"}, + "shape_broadcast", + {"x", "y"}, + {}, + {}, + {}, + {}); + return std::make_tuple( + inputs, attributes, outputs, run_time_info, "shape_broadcast"); +} + void ShapeBroadcastOp::Build(pir::Builder &builder, pir::OperationArgument &argument, pir::Value x_, @@ -3836,9 +4010,11 @@ std::vector ShapeBroadcastOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { VLOG(4) << "Start infermeta ShapeBroadcastOp"; - IR_ENFORCE(input_values.size() == 2, - "Num of inputs is expected to be 2 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 2, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 2 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; pir::Value y_ = input_values[1]; @@ -3921,11 +4097,14 @@ symbol::DimExpr GetBroadcastDimExpr(const symbol::DimExpr &lhs, std::vector ComputeBroadcastShape( const std::vector &large_shape, const std::vector &small_shape) { - IR_ENFORCE(large_shape.size() >= small_shape.size(), - "Size of large_shape is expected to be greater or equal size of " - "small_shape, but got [%d] >= [%d].", - large_shape.size(), - small_shape.size()); + PADDLE_ENFORCE_GE( + large_shape.size(), + small_shape.size(), + phi::errors::InvalidArgument( + "Size of large_shape is expected to be greater or equal size of " + "small_shape, but got [%d] >= [%d].", + large_shape.size(), + small_shape.size())); std::vector output_data; output_data.reserve(large_shape.size()); auto rank_gap = large_shape.size() - small_shape.size(); @@ -3944,16 +4123,22 @@ bool ShapeBroadcastOp::InferSymbolicShape( pir::Value x = operand_source(0); pir::Value y = operand_source(1); - IR_ENFORCE(shape_analysis->HasShapeOrDataForValue(x) > 0, - "Value x does not exist."); - IR_ENFORCE(shape_analysis->HasShapeOrDataForValue(y) > 0, - "Value y does not exist."); + PADDLE_ENFORCE_GT(shape_analysis->HasShapeOrDataForValue(x), + 0, + phi::errors::InvalidArgument("Value x does not exist.")); + PADDLE_ENFORCE_GT(shape_analysis->HasShapeOrDataForValue(y), + 0, + phi::errors::InvalidArgument("Value y does not exist.")); const auto &x_data_shape = shape_analysis->GetShapeOrDataForValue(x); const auto &y_data_shape = shape_analysis->GetShapeOrDataForValue(y); - IR_ENFORCE(x_data_shape.data().has_value(), - "Value x comes from ShapeOp, it must have data"); - IR_ENFORCE(y_data_shape.data().has_value(), - "Value y comes from ShapeOp, it must have data"); + PADDLE_ENFORCE_EQ(x_data_shape.data().has_value(), + true, + phi::errors::InvalidArgument( + "Value x comes from ShapeOp, it must have data")); + PADDLE_ENFORCE_EQ(y_data_shape.data().has_value(), + true, + phi::errors::InvalidArgument( + "Value y comes from ShapeOp, it must have data")); const auto &x_data = x_data_shape.data().value(); const auto &y_data = y_data_shape.data().value(); @@ -4005,34 +4190,48 @@ void MemcpyD2hMultiIoOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("dst_place_type") > 0, - "dst_place_type does not exist."); - IR_ENFORCE(attributes.at("dst_place_type").isa(), - "Type of attribute: dst_place_type is not pir::Int32Attribute."); + PADDLE_ENFORCE_GT( + attributes.count("dst_place_type"), + 0, + phi::errors::InvalidArgument("dst_place_type does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("dst_place_type").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: dst_place_type is not pir::Int32Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 1u, - "The size %d of outputs must be equal to 1.", - output_size); + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 1.", output_size)); auto output_0_type = (*this)->result(0).type(); - IR_ENFORCE(output_0_type.isa(), - "Type validation failed for the 0th output."); + PADDLE_ENFORCE_EQ( + output_0_type.isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); } VLOG(4) << "End Verifying for: MemcpyD2hMultiIoOp."; } @@ -4045,9 +4244,11 @@ void MemcpyD2hMultiIoOp::InferMeta(phi::InferMetaContext *infer_meta) { std::vector MemcpyD2hMultiIoOp::InferMeta( const std::vector &input_values, pir::AttributeMap *p_attributes) { - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value x_ = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -4130,35 +4331,50 @@ void ArrayPopOp::VerifySig() { VLOG(4) << "Verifying inputs:"; { auto input_size = num_operands(); - IR_ENFORCE(input_size == 1u, - "The size %d of inputs must be equal to 1.", - input_size); - IR_ENFORCE((*this) - ->operand_source(0) - .type() - .isa(), - "Type validation failed for the 0th input, got %s.", - (*this)->operand_source(0).type()); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::InvalidArgument( + "The size %d of inputs must be equal to 1.", input_size)); + PADDLE_ENFORCE_EQ((*this) + ->operand_source(0) + .type() + .isa(), + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th input, got %s.", + (*this)->operand_source(0).type())); } VLOG(4) << "Verifying attributes:"; { auto &attributes = this->attributes(); - IR_ENFORCE(attributes.count("index") > 0, "index does not exist."); - IR_ENFORCE(attributes.at("index").isa(), - "Type of attribute: index is not pir::Int32Attribute."); + PADDLE_ENFORCE_GT(attributes.count("index"), + 0, + phi::errors::InvalidArgument("index does not exist.")); + PADDLE_ENFORCE_EQ( + attributes.at("index").isa(), + true, + phi::errors::InvalidArgument( + "Type of attribute: index is not pir::Int32Attribute.")); } VLOG(4) << "Verifying outputs:"; { auto output_size = num_results(); - IR_ENFORCE(output_size == 2u, - "The size %d of outputs must be equal to 2.", - output_size); - IR_ENFORCE( + PADDLE_ENFORCE_EQ( + output_size, + 2u, + phi::errors::InvalidArgument( + "The size %d of outputs must be equal to 2.", output_size)); + PADDLE_ENFORCE_EQ( (*this)->result(0).type().isa(), - "Type validation failed for the 0th output."); - IR_ENFORCE( + true, + phi::errors::InvalidArgument( + "Type validation failed for the 0th output.")); + PADDLE_ENFORCE_EQ( (*this)->result(1).type().isa(), - "Type validation failed for the 1st output."); + true, + phi::errors::InvalidArgument( + "Type validation failed for the 1st output.")); } VLOG(4) << "End Verifying for: ArrayPopOp."; } @@ -4198,9 +4414,11 @@ std::vector ArrayPopOp::InferMeta( "AttrtibueMap pointer in InferMeta function is nullptr.")); auto &attributes = *p_attributes; VLOG(4) << "Start infermeta ArrayPopOp"; - IR_ENFORCE(input_values.size() == 1, - "Num of inputs is expected to be 1 but got %d.", - input_values.size()); + PADDLE_ENFORCE_EQ(input_values.size(), + 1, + phi::errors::InvalidArgument( + "Num of inputs is expected to be 1 but got %d.", + input_values.size())); pir::Value input = input_values[0]; VLOG(4) << "Builder construction outputs"; @@ -4213,8 +4431,10 @@ std::vector ArrayPopOp::InferMeta( "paddle::dialect::AllocatedDenseTensorArrayType")); } - IR_ENFORCE(attributes.find("index") != attributes.end(), - "'index' Attribute is expected for ArrayPopOp. "); + PADDLE_ENFORCE_NE(attributes.find("index"), + attributes.end(), + phi::errors::InvalidArgument( + "'index' Attribute is expected for ArrayPopOp. ")); int index = attributes.at("index").dyn_cast().data(); paddle::dialect::IrTensor dense_input( diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.h b/paddle/fluid/pir/dialect/operator/ir/manual_op.h index 8d13c11d06a59..7f472ef1fecab 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.h +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.h @@ -734,6 +734,7 @@ class MemcpyD2hMultiIoOp class IR_API ShapeBroadcastOp : public pir::Op { public: @@ -741,6 +742,7 @@ class IR_API ShapeBroadcastOp static const char *name() { return "pd_op.shape_broadcast"; } static constexpr const char **attributes_name = nullptr; static constexpr uint32_t attributes_num = 0; + static OpInfoTuple GetOpInfo(); static void Build(pir::Builder &builder, // NOLINT pir::OperationArgument &argument, // NOLINT pir::Value x_, diff --git a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc index 2f4c9a2b7e504..8a843a8881734 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_attribute.cc @@ -30,7 +30,7 @@ phi::DataLayout DataLayoutAttribute::data() const { return storage()->GetAsKey(); } -phi::Scalar ScalarAttribute::data() { +phi::Scalar ScalarAttribute::data() const { if (isa()) { return phi::Scalar(dyn_cast().data()); } else if (isa()) { diff --git a/paddle/fluid/pir/dialect/operator/ir/op_attribute.h b/paddle/fluid/pir/dialect/operator/ir/op_attribute.h index 153414c7ad0d0..b7a54d6ca58d2 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_attribute.h +++ b/paddle/fluid/pir/dialect/operator/ir/op_attribute.h @@ -37,6 +37,8 @@ class IntArrayAttribute : public pir::Attribute { static IntArrayAttribute Parse(pir::IrParser &parser); // NOLINT const phi::IntArray &data() const; + + static std::string name() { return "a_intarray"; } }; class ScalarAttribute : public pir::Attribute { @@ -59,7 +61,9 @@ class ScalarAttribute : public pir::Attribute { return TransToIrAttribute(scalar, ctx); } - phi::Scalar data(); + phi::Scalar data() const; + + static std::string name() { return "a_scalar"; } }; class DataTypeAttribute : public pir::Attribute { @@ -76,6 +80,8 @@ class DataTypeAttribute : public pir::Attribute { static DataTypeAttribute Parse(pir::IrParser &parser); // NOLINT phi::DataType data() const; + + static std::string name() { return "a_dtype"; } }; class PlaceAttribute : public pir::Attribute { @@ -91,6 +97,7 @@ class PlaceAttribute : public pir::Attribute { static PlaceAttribute Parse(pir::IrParser &parser); // NOLINT phi::Place data() const; + static std::string name() { return "a_place"; } }; class DataLayoutAttribute : public pir::Attribute { diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc index 1beaf8369bdc7..fa9fccaba2701 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc @@ -46,9 +46,11 @@ struct CombineOpInferSymbolicShapeInterfaceModel const auto shape_data_list = [&] { symbol::TensorListShapeOrDataDimExprs shape_data_list; for (size_t i = 0; i < op->num_operands(); ++i) { - IR_ENFORCE(op->operand(i).type().dyn_cast(), - "Currently InferSymbolicShape of CombineOp only support " - "DenseTensorType."); + PADDLE_ENFORCE_NOT_NULL( + op->operand(i).type().dyn_cast(), + phi::errors::InvalidArgument( + "Currently InferSymbolicShape of CombineOp only support " + "DenseTensorType.")); shape_data_list.emplace_back( shape_analysis->GetShapeOrDataForValue(op->operand_source(i)) @@ -70,9 +72,11 @@ struct ConstantOpInferSymbolicShapeInterfaceModel : public InferSymbolicShapeInterface::Concept { static inline bool InferSymbolicShape( pir::Operation* op, pir::ShapeConstraintIRAnalysis* shape_analysis) { - IR_ENFORCE(op->result(0).type().dyn_cast(), - "Currently InferSymbolicShape of ConstantOp only support " - "DenseTensorType result."); + PADDLE_ENFORCE_NOT_NULL( + op->result(0).type().dyn_cast(), + phi::errors::InvalidArgument( + "Currently InferSymbolicShape of ConstantOp only support " + "DenseTensorType result.")); const std::vector out_dims = [op] { std::vector dims; @@ -243,9 +247,9 @@ OperatorDialect::OperatorDialect(pir::IrContext* ctx) ShadowOutputOpInferSymbolicShapeInterfaceModel>()); info = ctx->GetRegisteredOpInfo(pir::SplitOp::name()); - info.AttachInterface(std::move( + info.AttachInterface( pir::InterfaceValue::Get())); + SplitOpInferSymbolicShapeInterfaceModel>()); info = ctx->GetRegisteredOpInfo(pir::YieldOp::name()); info.AttachInterface( diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml index 4da4f54c3ac90..11ff0e8f47c90 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml @@ -1,7 +1,7 @@ # The operators included in this file are: # 1) Operators defined only in PIR, dynamic graphs do not exist; # 2) The definitions of static graphs and dynamic graphs are inconsistent, but the final definition plan has not yet been clarified. -# After the definition is clearly defined, migrate to paddle /fluid/pir/dialect/operator/ir/update_ops.yaml or paddle/phi/api/yaml/ops.yaml +# After the definition is clearly defined, migrate to paddle/fluid/pir/dialect/operator/ir/update_ops.yaml or paddle/phi/api/yaml/ops.yaml - op : adadelta_ args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) @@ -22,6 +22,7 @@ spmd_rule : ElementwiseBinaryInferSpmd kernel : func : add + data_type: x inplace : (x -> out) backward : add_grad data_transform : @@ -107,6 +108,14 @@ inplace : (output -> out) backward : assign_out__grad +- op : assign_pos + args : (Tensor x, Tensor cum_count, Tensor eff_num_len) + output : Tensor(out) + infer_meta : + func : AssignPosInferMeta + kernel : + func : assign_pos + - op : assign_value args : (int[] shape, DataType dtype, Scalar[] values, Place place = {}) output : Tensor(out) @@ -470,6 +479,24 @@ optional : in_accum, in_state, out_scale, out_accum, out_state inplace : (scale -> out_scale, in_accum -> out_accum, in_state -> out_state) +- op : dgc + args : (Tensor u, Tensor v, Tensor grad, Tensor param, Tensor current_step, Tensor nranks, float[] sparsity, float m=0.9, bool use_nesterov=true, float rampup_begin_step=0.0, float rampup_step=0.0, float regular_coeff=0.0, int regular_type=0) + output : Tensor(u_out), Tensor(v_out), Tensor(encode_grad), Tensor(grad_out), Tensor(k), Tensor(gather_buff) + kernel : + func : dgc + param : [u, v, grad, param, current_step, nranks, m, use_nesterov, sparsity, rampup_begin_step, rampup_step, regular_coeff, regular_type] + optional: param + +- op : dgc_momentum + args : (Tensor param, Tensor grad, Tensor velocity, Tensor learning_rate, Tensor master_param, Tensor current_step_tensor, Tensor nranks_tensor, float mu, bool use_nesterov=false, str regularization_method="", float regularization_coeff=0.0f, bool multi_precision=false, float rescale_grad=1.0f, float rampup_begin_step=-1.0f) + output : Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out), Tensor(grad_out) + infer_meta : + func : DGCMomentumInferMeta + kernel : + func : dgc_momentum + data_type : param + optional : master_param, master_param_out + - op : disable_check_model_nan_inf args: (Tensor x, int flag = 0) output: Tensor(out) @@ -491,6 +518,16 @@ data_type : fpn_rois optional : rois_num, multi_level_rois_num +- op : distributed_fused_lamb + args : (Tensor[] param, Tensor[] grad, Tensor fp32_fused_param, Tensor fp32_fused_grad, Tensor fp16_fused_param, Tensor fp16_fused_grad, Tensor moment1, Tensor moment2, Tensor beta1pow, Tensor beta2pow, Tensor fused_param_offsets, Tensor fp32_shard_fused_param_offsets, Tensor fp16_shard_fused_param_offsets, Tensor param_info, Tensor param_order, Tensor learning_rate, Tensor global_scale, float beta1, float beta2, float epsilon, float max_global_grad_norm, float weight_decay, bool clip_after_allreduce, int[] ring_ids= {}, int acc_steps = 1, bool use_master_param_norm = true, bool use_master_acc_grad = true, bool is_grad_scaled_by_nranks = true, int64_t nranks = 1, bool use_hierarchical_allreduce = false) + output : Tensor(fp32_fused_param_out), Tensor(fp16_fused_param_out), Tensor(fp32_acc_fused_grad), Tensor(fp16_acc_fused_grad), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1pow_out), Tensor(beta2pow_out), Tensor[](param_out){param.size()}, Tensor(found_inf), Tensor(acc_step), Tensor(stop_update), Tensor(step) + kernel : + func : distributed_fused_lamb + data_type : DataType::FLOAT32 + param : [param, grad, fp32_fused_param, fp32_fused_grad, fp16_fused_param, fp16fused_grad, moment1, moment2, beta1pow, beta2pow, fused_param_offsets, fp32_shard_fused_param_offsets, fp16_shard_fused_param_offsets, param_info, param_order, learning_rate, global_scale, acc_steps, beta1, beta2, epsilon, max_global_grad_norm, weight_decay, clip_after_allreduce, use_master_param_norm, use_master_acc_grad, is_grad_scaled_by_nranks, use_hierarchical_allreduce, nranks, ring_ids] + optional : fp32_fused_param, fp32_fused_grad, fp16_fused_param, fp16_fused_grad, fp32_fused_param_out, fp16_fused_param_out, fp32_acc_fused_grad, fp16_acc_fused_grad, acc_step, stop_update + inplace : (fp32_fused_param -> fp32_fused_param_out), (fp16_fused_param -> fp16_fused_param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1pow -> beta1pow_out), (beta2pow -> beta2pow_out), (param -> param_out) + - op : distributed_fused_lamb_init args : (Tensor[] param, Tensor[] grad, float beta1, float beta2, int[] apply_weight_decay, int alignment, int rank, int nranks) output : Tensor(fp32_fused_param), Tensor(fp32_fused_grad), Tensor(fp16_fused_param), Tensor(fp16_fused_grad), Tensor(moment1), Tensor(moment2), Tensor(beta1_pow), Tensor(beta2_pow), Tensor(fused_param_offsets), Tensor(fp32_shard_fused_param_offsets), Tensor(fp16_shard_fused_param_offsets), Tensor(param_info), Tensor(param_order), Tensor[](param_out){param.size()}, Tensor[](master_param_out){param.size()}, Tensor[](grad_out){grad.size()}, Tensor(global_scale), Tensor(step) @@ -661,6 +698,7 @@ args : (str name, int col) output : Tensor(out) interfaces : paddle::dialect::InferSymbolicShapeInterface + traits: pir::ImmutableLayoutTrait - op : fetch args : (Tensor x, str name, int col) @@ -671,7 +709,7 @@ kernel : func : fetch param : [x] - traits : pir::SideEffectTrait + traits : pir::SideEffectTrait, pir::ImmutableLayoutTrait interfaces : paddle::dialect::InferSymbolicShapeInterface - op : floor_divide @@ -748,7 +786,7 @@ skip_transform : x - op : full_with_tensor - args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32) + args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32) output: Tensor(out) infer_meta : func : FullWithTensorInferMeta diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml index 2f3d370e4ccff..452b845a43a1a 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml @@ -27,6 +27,7 @@ spmd_rule : ElementwiseBinaryGradInferSpmd kernel : func : add_grad + data_type: out_grad no_need_buffer : x, y composite : add_grad(x, y, out_grad, axis, x_grad, y_grad) backward : add_double_grad @@ -201,15 +202,15 @@ - backward_op : divide_double_grad forward : divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) - args : (Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + args : (Tensor y, Tensor out, Tensor grad_out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) output : Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad) infer_meta : func : GeneralTernaryGradInferMeta - param : [y, grad_x, grad_x] + param : [y, out, out] kernel : func : divide_double_grad data_type : out - optional : grad_x_grad, grad_y_grad + optional : grad_x, grad_x_grad, grad_y_grad inplace : (grad_x_grad -> grad_out_grad) - backward_op : divide_grad diff --git a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h index 86370dd0cc6c1..e8719d4adb73e 100644 --- a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h +++ b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h @@ -98,8 +98,9 @@ struct OpRunTimeInfo { std::vector skip_transform_inputs; pir::AttributeMap extra_args_default_value; std::vector data_format_tensors; - bool is_onednn_only; - bool dynamic_fallback; + bool is_onednn_only = false; + bool dynamic_fallback = false; + OpRunTimeInfo() = default; OpRunTimeInfo(const std::string& infer_meta_func, const std::vector& infer_meta_param, diff --git a/paddle/fluid/pir/drr/CMakeLists.txt b/paddle/fluid/pir/drr/CMakeLists.txt index b23774a431795..ded64839dc97f 100644 --- a/paddle/fluid/pir/drr/CMakeLists.txt +++ b/paddle/fluid/pir/drr/CMakeLists.txt @@ -87,7 +87,7 @@ if(WITH_CINN) set(DRR_SRCS ${DRR_SRCS} ${CINN_SOURCE_FILE}) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(onednn_dialect_name onednn_op) set(pir_op_onednn_yaml ${PADDLE_BINARY_DIR}/paddle/fluid/pir/dialect/operator/ir/generated/onednn.parsed.yaml diff --git a/paddle/fluid/pir/drr/include/drr_pattern_context.h b/paddle/fluid/pir/drr/include/drr_pattern_context.h index b7755f659e85d..17090fb3e210a 100644 --- a/paddle/fluid/pir/drr/include/drr_pattern_context.h +++ b/paddle/fluid/pir/drr/include/drr_pattern_context.h @@ -129,10 +129,11 @@ class TEST_API DrrPatternContext { const Op& ResultOpPattern( const std::string& op_type, - const std::unordered_map& attributes = {}); + const std::unordered_map& attributes = {}, + const std::unordered_map& runtime_attributes = + {}); drr::Tensor& ResultTensorPattern(const std::string& name); - // void RequireEqual(const Attribute& first, const Attribute& second); void RequireEqual(const TensorShape& first, const TensorShape& second); void RequireEqual(const TensorDataType& first, const TensorDataType& second); void RequireNativeCall(const ConstraintFunction& custom_fn); @@ -157,34 +158,28 @@ class Op { const Tensor& arg2) const; TEST_API void operator()(const std::vector& args, const std::vector& outputs) const; - // const Tensor& operator()(const Tensor& arg0, const Tensor& arg1, const - // Tensor& arg2) const; const Tensor& operator()(const Tensor& arg0, const - // Tensor& arg1, const Tensor& arg2, const Tensor& arg3) const; const Tensor& - // operator()(const Tensor& arg0, const Tensor& arg1, const Tensor& arg2, - // const Tensor& arg3, const Tensor& arg4) const; static const char* prefix; private: Op(const std::string& op_type_name, + PatternGraph* pattern_graph, const std::unordered_map& attributes, - PatternGraph* pattern_graph) + const std::unordered_map& runtime_attributes = {}) : op_type_name_(op_type_name), + pattern_graph_(pattern_graph), attributes_(attributes), - pattern_graph_(pattern_graph) {} - - const std::unordered_map& attributes() const { - return attributes_; - } - - friend class DrrPatternContext; - friend class OpCall; + runtime_attributes_(runtime_attributes) {} std::string op_type_name_; - std::unordered_map attributes_; PatternGraph* pattern_graph_{nullptr}; + std::unordered_map attributes_; + std::unordered_map runtime_attributes_; thread_local static int64_t count; + + friend class DrrPatternContext; + friend class OpCall; }; class TEST_API Tensor { @@ -244,7 +239,8 @@ class TEST_API OpCall { : op_name_(op->op_type_name_), inputs_(inputs), outputs_(outputs), - attributes_(op->attributes_) {} + attributes_(op->attributes_), + runtime_attributes_(op->runtime_attributes_) {} const std::string& name() const { return op_name_; } @@ -256,18 +252,24 @@ class TEST_API OpCall { return attributes_; } + const std::unordered_map& runtime_attributes() const { + return runtime_attributes_; + } + private: std::string op_name_; std::vector inputs_; std::vector outputs_; std::unordered_map attributes_; + std::unordered_map runtime_attributes_; }; class TEST_API ResultPattern { public: - const drr::Op& Op( - const std::string& op_type, - const std::unordered_map& attributes = {}); + const drr::Op& + Op(const std::string& op_type, + const std::unordered_map& attributes = {}, + const std::unordered_map& runtime_attributes = {}); drr::Tensor& Tensor(const std::string& name); @@ -304,10 +306,44 @@ class TEST_API ResultPattern { Attribute VectorFloatAttr(const std::vector& value) const; + // {"bool", phi::DataType::BOOL}, + // {"uint8", phi::DataType::UINT8}, + // {"int8", phi::DataType::INT8}, + // {"uint16", phi::DataType::UINT16}, + // {"int16", phi::DataType::INT16}, + // {"uint32", phi::DataType::UINT32}, + // {"int32", phi::DataType::INT32}, + // {"uint64", phi::DataType::UINT64}, + // {"int64", phi::DataType::INT64}, + // {"float32", phi::DataType::FLOAT32}, + // {"complex64", phi::DataType::COMPLEX64}, + // {"complex128", phi::DataType::COMPLEX128}, + // {"Undefined", phi::DataType::UNDEFINED}, + // {"psting", phi::DataType::PSTRING}, + // {"float16", phi::DataType::FLOAT16}, + // {"bfloat16", phi::DataType::BFLOAT16}, + // {"float64", phi::DataType::FLOAT64}}; Attribute DataTypeAttr(const std::string& value) const; + // {"cpu", phi::CPUPlace{}}, + // {"gpu", phi::GPUPlace{}}, + // {"gpu_pinned", phi::GPUPinnedPlace{}}, + // {"xpu", phi::XPUPlace{}}, + // {"ipu", phi::IPUPlace{}}, + // {":", phi::CustomPlace{}}, + // {"undefined", phi::Place{}}}; Attribute PlaceAttr(const std::string& value) const; + // {"NHWC", phi::DataLayout::kNHWC}, + // {"NCHW", phi::DataLayout::kNCHW}, + // {"Undefined", phi::DataLayout::kAnyLayout}, + // {"ONEDNN", phi::DataLayout::ONEDNN}, + // {"SPARSE_COO", phi::DataLayout::SPARSE_COO}, + // {"SPARSE_CSR", phi::DataLayout::SPARSE_CSR}, + // {"NDHWC", phi::DataLayout::kNDHWC}, + // {"NCDHW", phi::DataLayout::kNCDHW}, + // {"PSTRING_UNION", phi::DataLayout::PSTRING_UNION}, + // {"STRIDED", phi::DataLayout::STRIDED}}; Attribute DataLayoutAttr(const std::string& value) const; Attribute ComputeAttr(const AttrComputeFunc& attr_compute_func) const; diff --git a/paddle/fluid/pir/drr/src/ir_operation_factory.cc b/paddle/fluid/pir/drr/src/ir_operation_factory.cc index e625db38d1b8f..20a281dd12d36 100644 --- a/paddle/fluid/pir/drr/src/ir_operation_factory.cc +++ b/paddle/fluid/pir/drr/src/ir_operation_factory.cc @@ -15,19 +15,22 @@ #include #include "paddle/common/layout.h" + #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h" +#ifdef PADDLE_WITH_DNNL +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#endif #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/include/drr_pattern_context.h" #include "paddle/fluid/pir/drr/src/attr_type_uilts.h" #include "paddle/fluid/pir/drr/src/ir_operation_factory.h" -#include "paddle/phi/core/enforce.h" + #include "paddle/pir/include/core/builtin_attribute.h" #include "paddle/pir/include/core/builtin_op.h" #include "paddle/pir/include/core/operation.h" #include "paddle/pir/include/core/value.h" -#ifdef PADDLE_WITH_DNNL -#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" -#endif + +#include "paddle/phi/core/enforce.h" namespace paddle { namespace drr { @@ -317,10 +320,11 @@ pir::Attribute CreateIrAttribute(const std::any& obj) { } } -pir::AttributeMap CreateAttributeMap(const OpCall& op_call, - const MatchContextImpl& src_match_ctx) { +pir::AttributeMap CreateAttributeMap( + const std::unordered_map& attrs, + const MatchContextImpl& src_match_ctx) { pir::AttributeMap attr_map; - for (const auto& kv : op_call.attributes()) { + for (const auto& kv : attrs) { std::visit( [&](auto&& arg) { if constexpr (std::is_same_v, @@ -339,12 +343,12 @@ pir::AttributeMap CreateAttributeMap(const OpCall& op_call, return attr_map; } -pir::Value GetIrValueByDrrTensor(const Tensor& tensor, +pir::Value GetIrValueByDrrTensor(const Tensor* tensor, const MatchContextImpl& res_match_ctx) { - if (tensor.is_none()) { + if (tensor->is_none()) { return pir::Value{}; } - return res_match_ctx.GetIrValue(tensor.name()); + return res_match_ctx.GetIrValue(tensor->name()); } std::vector GetIrValuesByDrrTensors( @@ -353,16 +357,21 @@ std::vector GetIrValuesByDrrTensors( std::vector ir_values; ir_values.reserve(tensors.size()); for (const auto* tensor : tensors) { - ir_values.push_back(GetIrValueByDrrTensor(*tensor, res_match_ctx)); + ir_values.push_back(GetIrValueByDrrTensor(tensor, res_match_ctx)); } return ir_values; } -void BindIrOutputs(const OpCall& op_call, - pir::Operation* op, - MatchContextImpl* match_ctx) { - for (size_t i = 0; i < op_call.outputs().size(); ++i) { - match_ctx->BindIrValue(op_call.outputs()[i]->name(), op->result(i)); +void BindIrOutputsWithDrrOutputs(const std::vector& tensors, + pir::Operation* op, + MatchContextImpl* match_ctx) { + PADDLE_ENFORCE_LE( + tensors.size(), + op->num_results(), + phi::errors::InvalidArgument( + "The size of drr outputs should less equal the size of pir outputs")); + for (size_t i = 0; i < tensors.size(); ++i) { + match_ctx->BindIrValue(tensors[i]->name(), op->result(i)); } } @@ -371,15 +380,17 @@ pir::Operation* CreateOperation(const OpCall& op_call, pir::PatternRewriter& rewriter, // NOLINT MatchContextImpl* res_match_ctx) { VLOG(6) << "Drr create [" << op_call.name() << "] op..."; - const auto& inputs = op_call.inputs(); - std::vector ir_values = - GetIrValuesByDrrTensors(inputs, *res_match_ctx); pir::Operation* op = OperationFactory::Instance().CreateOperation( op_call.name(), - ir_values, - CreateAttributeMap(op_call, src_match_ctx), + GetIrValuesByDrrTensors(op_call.inputs(), *res_match_ctx), + CreateAttributeMap(op_call.attributes(), src_match_ctx), rewriter); - BindIrOutputs(op_call, op, res_match_ctx); + auto runtime_attr_map = + CreateAttributeMap(op_call.runtime_attributes(), src_match_ctx); + for (const auto& kv : runtime_attr_map) { + op->set_attribute(kv.first, kv.second); + } + BindIrOutputsWithDrrOutputs(op_call.outputs(), op, res_match_ctx); VLOG(6) << "Drr create [" << op_call.name() << " @" << op << "] op done."; return op; } diff --git a/paddle/fluid/pir/drr/src/ir_operation_factory.h b/paddle/fluid/pir/drr/src/ir_operation_factory.h index 23095bf9a73e0..eaf3f866ec60c 100644 --- a/paddle/fluid/pir/drr/src/ir_operation_factory.h +++ b/paddle/fluid/pir/drr/src/ir_operation_factory.h @@ -30,13 +30,13 @@ class OperationFactory { return operation_factory; } - using operation_create_fn = + using OperationCreateFunction = std::function&, const pir::AttributeMap&, pir::PatternRewriter&)>; void RegisterOperationCreator(const std::string& op_name, - const operation_create_fn& create_fn) { + const OperationCreateFunction& create_fn) { op_creator_map[op_name] = create_fn; } @@ -76,7 +76,7 @@ class OperationFactory { #ifdef PADDLE_WITH_DNNL void RegisterOnednnOpGeneratedOpCreator(); #endif - std::unordered_map op_creator_map; + std::unordered_map op_creator_map; }; pir::Operation* CreateOperation(const OpCall& op_call, diff --git a/paddle/fluid/pir/drr/src/match_context_impl.h b/paddle/fluid/pir/drr/src/match_context_impl.h index 12a0dc7a65ab5..a9acb5f6ed8df 100644 --- a/paddle/fluid/pir/drr/src/match_context_impl.h +++ b/paddle/fluid/pir/drr/src/match_context_impl.h @@ -37,10 +37,9 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( tensor_map_.count(tensor_name), 0, - phi::errors::NotFound( - "Not found tensor." - "The Drr tensor [%s] must exist in pattern graph to be obtained.", - tensor_name)); + phi::errors::NotFound("Not found tensor. The drr tensor [%s] must " + "exist in pattern graph to be obtained.", + tensor_name)); return tensor_map_.at(tensor_name); } @@ -48,10 +47,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( operation_map_.count(op_call), 0, - phi::errors::NotFound("Not found operation." - "The Drr operation [%s] must exist in the " - "pattern graph to be obtained.", - op_call->name())); + phi::errors::NotFound( + "Not found operation. The drr operation [%s] must exist in the " + "pattern graph to be obtained.", + op_call->name())); return operation_map_.at(op_call); } @@ -65,10 +64,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( iter, tensor_map_.end(), - phi::errors::NotFound("Not found tensor." - "The Drr tensor [%s] is not found in the map, " - "unable to obtain the corresponding IrValue.", - tensor_name)); + phi::errors::NotFound( + "Not found tensor. The drr tensor [%s] is not found in the map, " + "unable to obtain the corresponding IrValue.", + tensor_name)); return iter->second; } @@ -77,10 +76,10 @@ class MatchContextImpl final { PADDLE_ENFORCE_NE( iter, attr_map_.end(), - phi::errors::NotFound("Not found attr." - "The Drr attr [%s] is not found in the map, " - "unable to obtain the corresponding Attribute.", - attr_name)); + phi::errors::NotFound( + "Not found attr. The drr attr [%s] is not found in the map, unable " + "to obtain the corresponding Attribute.", + attr_name)); return iter->second; } diff --git a/paddle/fluid/pir/drr/src/pattern_context.cc b/paddle/fluid/pir/drr/src/pattern_context.cc index 7bdee5d5dcafe..fe72170bc9eea 100644 --- a/paddle/fluid/pir/drr/src/pattern_context.cc +++ b/paddle/fluid/pir/drr/src/pattern_context.cc @@ -39,7 +39,7 @@ const Op& DrrPatternContext::SourceOpPattern( const std::string& op_type, const std::unordered_map& attributes) { owned_ops_.push_back(std::shared_ptr( - new drr::Op(op_type, attributes, source_pattern_graph_.get()))); + new drr::Op(op_type, source_pattern_graph_.get(), attributes))); return *owned_ops_.back(); } @@ -50,9 +50,10 @@ drr::Tensor& DrrPatternContext::SourceTensorPattern(const std::string& name) { const Op& DrrPatternContext::ResultOpPattern( const std::string& op_type, - const std::unordered_map& attributes) { - owned_ops_.push_back(std::shared_ptr( - new drr::Op(op_type, attributes, result_pattern_graph_.get()))); + const std::unordered_map& attributes, + const std::unordered_map& runtime_attributes) { + owned_ops_.push_back(std::shared_ptr(new drr::Op( + op_type, result_pattern_graph_.get(), attributes, runtime_attributes))); return *owned_ops_.back(); } @@ -174,8 +175,9 @@ void Tensor::operator=(const Tensor& other) const { // NOLINT const drr::Op& ResultPattern::Op( const std::string& op_type, - const std::unordered_map& attributes) { - return ctx_->ResultOpPattern(op_type, attributes); + const std::unordered_map& attributes, + const std::unordered_map& runtime_attributes) { + return ctx_->ResultOpPattern(op_type, attributes, runtime_attributes); } drr::Tensor& ResultPattern::Tensor(const std::string& name) { diff --git a/paddle/fluid/pir/drr/src/rewrite_pattern.cc b/paddle/fluid/pir/drr/src/rewrite_pattern.cc index 2bd2fdc36b717..6b2c7cab2ba13 100644 --- a/paddle/fluid/pir/drr/src/rewrite_pattern.cc +++ b/paddle/fluid/pir/drr/src/rewrite_pattern.cc @@ -15,6 +15,7 @@ #include #include +#include "glog/vlog_is_on.h" #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/drr/include/drr_rewrite_pattern.h" #include "paddle/fluid/pir/drr/src/ir_operation_factory.h" @@ -43,12 +44,17 @@ DrrRewritePattern::DrrRewritePattern( constraints_(drr_context.constraints()), result_pattern_graph_(drr_context.result_pattern_graph()), drr_pattern_owner_(drr_pattern_owner) { - PADDLE_ENFORCE_NE( - source_pattern_graph_->owned_op_call().empty(), - true, - phi::errors::InvalidArgument("Source pattern graph is empty." - "Suggested fix: Please check the DRR " - "source pattern definition code.")); + PADDLE_ENFORCE_NE(source_pattern_graph_->owned_op_call().empty(), + true, + phi::errors::InvalidArgument( + "Source pattern graph is empty. Suggested fix: please " + "check the drr source pattern definition code.")); + if (VLOG_IS_ON(4)) { + std::cout << "\nThe source pattern graph in [" << pattern_name << "]:\n" + << *source_pattern_graph_ << std::endl; + std::cout << "\nThe result pattern graph in [" << pattern_name << "]:\n" + << *result_pattern_graph_ << std::endl; + } } bool DrrRewritePattern::MatchAndRewrite( @@ -324,7 +330,11 @@ bool DrrRewritePattern::MatchFromOutputToInput( } return false; }; - + // Check whether Drr Tensor and IR Value is None. + const auto& IsNoneTensorAndValue = [](const Tensor* drr_input_tensor, + pir::Value ir_value) { + return drr_input_tensor->is_none() && ir_value == nullptr; + }; // Step 1: Initialize DRR matched queue. bool matched = true; size_t step = 0; @@ -348,7 +358,15 @@ bool DrrRewritePattern::MatchFromOutputToInput( auto ir_input_values = ir_node->operands_source(); for (size_t i = 0; i < drr_input_tensors.size(); ++i) { if (drr_input_tensors[i]->is_none()) { - continue; + if (IsNoneTensorAndValue(drr_input_tensors[i], ir_input_values[i])) { + continue; + } else { + VLOG(8) << drr_node->name() << "Match failed:drr_input[" << i + << "] != pir_intput[" << i << "] , drr_input_tensor[" << i + << "] is None."; + matched = false; + break; + } } if (HasVisitedOperands(drr_input_tensors[i], ir_input_values[i])) { matched = false; @@ -403,9 +421,8 @@ bool DrrRewritePattern::MatchFromOutputToInput( step, source_pattern_graph.CountOfOpCalls(), phi::errors::PreconditionNotMet( - "Pattern matching failed." - "The number of successful matches and the number of OpCalls in the " - "source pattern graph are not equal.")); + "Pattern matching failed. The number of successful matches and the " + "number of OpCalls in the source pattern graph are not equal.")); } else { return matched; } @@ -453,25 +470,25 @@ MatchContextImpl DrrRewritePattern::CreateOperations( PADDLE_ENFORCE_NE( result_pattern_graph.id2owned_tensor().count(in_tensor), 0, - phi::errors::NotFound("Not found the input tensor." - "Drr input tensor [%s] must exist in the result " - "pattern graph to be obtained.", - in_tensor)); + phi::errors::NotFound( + "Not found the input tensor. Drr input tensor [%s] must exist in " + "the result pattern graph to be obtained.", + in_tensor)); if (!result_pattern_graph.id2owned_tensor().at(in_tensor)->is_none()) { res_match_ctx.BindIrValue(in_tensor, src_match_ctx.GetIrValue(in_tensor)); } } - std::vector> temp_program; - std::unordered_map op_2_temp_program_index; - for (auto& op : *rewriter.block()) { - op_2_temp_program_index[&op] = temp_program.size(); - temp_program.push_back({&op}); - } - // topo order visit result_pattern_graph GraphTopo graph_topo_visit(&result_pattern_graph); graph_topo_visit.WalkGraphNodesTopoOrder([&](const OpCall& op_call) { + std::vector> temp_program; + std::unordered_map op_2_temp_program_index; + for (auto& op : *rewriter.block()) { + op_2_temp_program_index[&op] = temp_program.size(); + temp_program.push_back({&op}); + } + // set insert point size_t max_input_op_index = 0UL; pir::Operation* max_index_op = nullptr; @@ -518,11 +535,13 @@ MatchContextImpl DrrRewritePattern::CreateOperations( pir::Operation* new_op = CreateOperation(op_call, src_match_ctx, rewriter, &res_match_ctx); - op_2_temp_program_index[new_op] = max_input_op_index + 1; - if (max_input_op_index + 1 >= temp_program.size()) { + + size_t new_max_input_op_index = max_input_op_index + 1; + op_2_temp_program_index[new_op] = new_max_input_op_index; + if (new_max_input_op_index >= temp_program.size()) { temp_program.push_back({}); } - temp_program[max_input_op_index + 1].push_back(new_op); + temp_program[new_max_input_op_index].push_back(new_op); }); return res_match_ctx; diff --git a/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt b/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt new file mode 100644 index 0000000000000..4ab79bd350dc0 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/CMakeLists.txt @@ -0,0 +1,9 @@ +file(GLOB_RECURSE SERIALIZE_DESERIALIZE_CPP_SOURCES "*.cc") + +include_directories(pir_save_load PRIVATE + ${PADDLE_SOURCE_DIR}/third_party/nlohmann_json/include/) + +cc_library( + pir_save_load + SRCS ${SERIALIZE_DESERIALIZE_CPP_SOURCES} + DEPS op_dialect phi json) diff --git a/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h new file mode 100644 index 0000000000000..d4aaefe81c983 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h @@ -0,0 +1,316 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/common/layout.h" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/serialize_deserialize/include/schema.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_attribute.h" +#include "paddle/pir/include/core/builtin_type.h" + +namespace pir { + +template +T deserializeTypeFromJson(Json* type_json, pir::IrContext* ctx) { + return T::get(ctx); +} + +template +T deserializeAttrFromJson(Json* attr_json, pir::IrContext* ctx) { + CPP_T data = attr_json->at(DATA).template get(); + return T::get(ctx, data); +} + +template <> +pir::Complex64Attribute deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + phi::dtype::complex data = + phi::dtype::complex(data_json.at(0).template get(), + data_json.at(1).template get()); + return pir::Complex64Attribute::get(ctx, data); +} + +template <> +pir::Complex128Attribute +deserializeAttrFromJson(Json* attr_json, + pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + phi::dtype::complex data = + phi::dtype::complex(data_json.at(0).template get(), + data_json.at(1).template get()); + return pir::Complex128Attribute::get(ctx, data); +} + +template <> +paddle::dialect::IntArrayAttribute +deserializeAttrFromJson>(Json* attr_json, + pir::IrContext* ctx) { + std::vector data = attr_json->at(DATA).get>(); + phi::IntArray int_array = phi::IntArray(data); + return paddle::dialect::IntArrayAttribute::get(ctx, int_array); +} + +pir::Attribute deserializeAttrFromJson_scalarAttr(Json* attr_json, + pir::IrContext* ctx) { + Json content = attr_json->at(DATA); + phi::DataType dtype_ = + phi::StringToDataType(content.at(0).template get()); + phi::Scalar scalar; + + if (dtype_ == phi::DataType::FLOAT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::FLOAT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT8) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::FLOAT16 || + dtype_ == phi::DataType::UINT16 || + dtype_ == phi::DataType::BFLOAT16) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT16) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::INT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT8) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT32) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::UINT64) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::BOOL) { + scalar = phi::Scalar(content.at(1).template get()); + } else if (dtype_ == phi::DataType::COMPLEX64) { + float scalar_real = content.at(1).template get(); + float scalar_imag = content.at(2).template get(); + phi::dtype::complex data = + phi::dtype::complex(scalar_real, scalar_imag); + scalar = phi::Scalar(data); + } else if (dtype_ == phi::DataType::COMPLEX128) { + double scalar_real = content.at(1).template get(); + double scalar_imag = content.at(1).template get(); + phi::dtype::complex data = + phi::dtype::complex(scalar_real, scalar_imag); + scalar = phi::Scalar(data); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Invalid tensor data type `", dtype_, "`.")); + } + + return paddle::dialect::ScalarAttribute::get(ctx, scalar); +} + +template <> +paddle::dialect::DataTypeAttribute +deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + std::string data = attr_json->at(DATA).template get(); + phi::DataType data_type = phi::StringToDataType(data); + return paddle::dialect::DataTypeAttribute::get(ctx, data_type); +} + +template <> +paddle::dialect::PlaceAttribute +deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + Json data_json = attr_json->at(DATA); + int8_t type_id = data_json.at(0).template get(); + phi::AllocationType type = static_cast(type_id); + int8_t id = data_json.at(1).template get(); // int8_t + std::string dev_type = data_json.at(2).template get(); // string + phi::Place place = phi::Place(type, id, dev_type); + return paddle::dialect::PlaceAttribute::get(ctx, place); +} + +pir::Type parseType(Json* type_json) { + auto type_name = type_json->at(ID).template get(); + pir::IrContext* ctx = pir::IrContext::Instance(); + + if (type_name == pir::BoolType::name()) { + VLOG(8) << "Parse BoolType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::BFloat16Type::name()) { + VLOG(8) << "Parse BFloat16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float16Type::name()) { + VLOG(8) << "Parse Float16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float32Type::name()) { + VLOG(8) << "Parse Float32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Float64Type::name()) { + VLOG(8) << "Parse Float64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int8Type::name()) { + VLOG(8) << "Parse Int8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::UInt8Type::name()) { + VLOG(8) << "Parse UInt8Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int16Type::name()) { + VLOG(8) << "Parse Int16Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int32Type::name()) { + VLOG(8) << "Parse Int32Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Int64Type::name()) { + VLOG(8) << "Parse Int64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::IndexType::name()) { + VLOG(8) << "Parse IndexType ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex64Type::name()) { + VLOG(8) << "Parse Complex64Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::Complex128Type::name()) { + VLOG(8) << "Parse Complex128Type ... "; + return pir::deserializeTypeFromJson(type_json, ctx); + } else if (type_name == pir::VectorType::name()) { + VLOG(8) << "Parse VectorType ... "; + std::vector content; + for (auto& type_x : type_json->at(DATA)) { + content.push_back(parseType(&type_x)); + } + return pir::VectorType::get(ctx, content); + } else if (type_name == pir::DenseTensorType::name()) { + VLOG(8) << "Parse DenseTensorType ... "; + Json data_json = type_json->at(DATA); + pir::Type dtype = parseType(&(data_json.at(0))); + + std::vector dims = + data_json.at(1).template get>(); + phi::DDim ddim = phi::make_ddim(dims); + pir::DataLayout data_layout = + common::StringToDataLayout(data_json.at(2).template get()); + + std::vector> lod = + data_json.at(3).template get>>(); + + size_t offset = data_json.at(4).get(); + return pir::DenseTensorType::get( + ctx, dtype, ddim, data_layout, lod, offset); + } else if (type_name == NULL_TYPE) { + return pir::Type(); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Type %s for parse type", type_name)); + } + VLOG(8) << "Finish Parse Type ... "; + + return pir::Type(); +} + +template <> +pir::TypeAttribute deserializeAttrFromJson( + Json* attr_json, pir::IrContext* ctx) { + pir::Type type = parseType(&(attr_json->at(DATA))); + return pir::TypeAttribute::get(ctx, type); +} + +pir::Attribute parseAttr(Json* attr_json) { + std::string attr_name = attr_json->at(ID).template get(); + pir::IrContext* ctx = pir::IrContext::Instance(); + + if (attr_name == pir::BoolAttribute::name()) { + VLOG(8) << "Parse BoolAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::FloatAttribute::name()) { + VLOG(8) << "Parse FloatAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::DoubleAttribute::name()) { + VLOG(8) << "Parse DoubleAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::Int32Attribute::name()) { + VLOG(8) << "Parse Int32Attribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::Int64Attribute::name()) { + VLOG(8) << "Parse Int64Attribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::IndexAttribute::name()) { + VLOG(8) << "Parse IndexAttribute ."; + return pir::deserializeAttrFromJson(attr_json, + ctx); + } else if (attr_name == pir::ArrayAttribute::name()) { + VLOG(8) << "Parse ArrayAttribute ."; + std::vector val; + for (auto& attr_ : attr_json->at(DATA)) { + val.push_back(parseAttr(&(attr_))); + } + return pir::ArrayAttribute::get(ctx, val); + } else if (attr_name == pir::TypeAttribute::name()) { + VLOG(8) << "Parse TypeAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::TensorNameAttribute::name()) { + VLOG(8) << "Parse TensorNameAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::Complex64Attribute::name()) { + VLOG(8) << "Parse Complex64Attribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::Complex128Attribute::name()) { + VLOG(8) << "Parse Complex128Attribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == pir::StrAttribute::name()) { + VLOG(8) << "Parse StrAttribute ."; + return pir::deserializeAttrFromJson( + attr_json, ctx); + } else if (attr_name == paddle::dialect::IntArrayAttribute::name()) { + VLOG(8) << "Parse IntArrayAttribute ."; + return pir::deserializeAttrFromJson>(attr_json, ctx); + } else if (attr_name == paddle::dialect::ScalarAttribute::name()) { + VLOG(8) << "Parse ScalarAttribute ."; + // this func's return type is pir::Attribute which is diffrent + // from paddle::dialect::ScalarAttribute + return pir::deserializeAttrFromJson_scalarAttr(attr_json, ctx); + } else if (attr_name == paddle::dialect::DataTypeAttribute::name()) { + VLOG(8) << "Parse DataTypeAttribute ."; + return pir::deserializeAttrFromJson(attr_json, ctx); + } else if (attr_name == paddle::dialect::PlaceAttribute::name()) { + VLOG(8) << "Parse PlaceAttribute ."; + return pir::deserializeAttrFromJson(attr_json, ctx); + } else { + PADDLE_ENFORCE(false, + phi::errors::InvalidArgument( + "Unknown Attr %s for parse attr", attr_name)); + } + VLOG(8) << "Finish Parse Attr ... "; + + return pir::Attribute(); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/interface.h b/paddle/fluid/pir/serialize_deserialize/include/interface.h new file mode 100644 index 0000000000000..3302dc1b90bb7 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/interface.h @@ -0,0 +1,63 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/pir/include/core/program.h" +namespace pir { +/** + * @brief Write the given PIR program into a file at the specified file path. + * + * @param[in] program The PIR program to be written. + * @param[in] file_path The path to the file to be written. + * @param[in] pir_version The version number of PIR, used to identify or verify + * the written program version + * @param[in] overwrite If the file already exists, this flag determines + * whether to overwrite the existing file. + * @param[in] readable (Optional parameter, default to false) If true, the + * generated file will be has indent structure. + * @param[in] trainable (Optional parameter, default to true) If true, + * operation has opresult_attrs for training like stop_gradient,persistable; + * Otherwise, it may only has opinfo attrs. + * + * @return void。 + * + * @note readable and trainable Parameters may affect the content and format of + * the generated file, depending on implementation. + */ +void WriteModule(const pir::Program& program, + const std::string& file_path, + const uint64_t& pir_version, + bool overwrite, + bool readable = false, + bool trainable = true); + +/** + * @brief Gets a PIR program from the specified file path. + * + * @param[in] file_path The path to the file from which the PIR program + * should be read. + * @param[out] program A pointer to the PIR program object where the + * deserilize program will be stored. + * @param[in] pir_version The current version of the PIR program format. + * + * @return Void. The function modifies the 'program' object to contain the data + * read from the file. + * + * @note If 'pir_version' is larger than the version of file, will trigger + * version compatibility modification rule. + */ +void ReadModule(const std::string& file_path, + pir::Program* program, + const uint64_t& pir_version); +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h new file mode 100644 index 0000000000000..2ae9f22d21a9c --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h @@ -0,0 +1,52 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/common/enforce.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/pir/include/core/operation.h" +#include "paddle/pir/include/core/program.h" + +namespace pir { + +class ProgramReader { + public: + explicit ProgramReader(const uint64_t version) : current_version(version) {} + + ProgramReader(ProgramReader&&) = delete; + ProgramReader(const ProgramReader& ProgramReader) = delete; + ProgramReader& operator=(const ProgramReader&) = delete; + ProgramReader& operator=(ProgramReader&&); + + // static void staticInit() + + void RecoverProgram(Json* program_json, pir::Program* recover_program); + ~ProgramReader() = default; + + private: + uint64_t current_version; + std::map id_value_map; + + void ReadProgram(Json* program_json, pir::Program* program); + void ReadRegion(Json* region_json, pir::Region* region); + void ReadBlock(Json* block_json, pir::Block* block); + pir::Operation* ReadOp(Json* op_json); + pir::AttributeMap ReadAttributesMap(Json* attrs_json, + Json* operesult_attrs_json); + pir::Attribute ReadAttribute(Json* attr_json); + pir::Type ReadType(Json* type_json); +}; + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h new file mode 100644 index 0000000000000..de8c7be16c5d6 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h @@ -0,0 +1,81 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/pir/include/core/program.h" + +namespace pir { +/** + * ProgramWriter is used to serialize pir program to json object. + * + */ + +class ProgramWriter { + public: + explicit ProgramWriter(const uint64_t version) : version_(version) {} + explicit ProgramWriter(const uint64_t version, const bool trainable) + : version_(version), trainable_(trainable) {} + + ProgramWriter(ProgramWriter&&) = delete; + ProgramWriter(const ProgramWriter& ProgramWriter) = delete; + ProgramWriter& operator=(const ProgramWriter&) = delete; + ProgramWriter& operator=(ProgramWriter&&); + + /** GetProgramJson is used by writeModulde api*/ + Json GetProgramJson(const pir::Program* program); + ~ProgramWriter() = default; + + private: + /** version_ is the version of paddlepaddle. which is used to + * Conduct version compatibility judgment and modification.*/ + uint64_t version_; + + /** program_json is the json object of pir program. */ + Json program_json; + + /** value_id_map is used to record the serialize id of pir::Value. + * which is used to serilize op's operands. */ + std::map value_id_map; + + /** xxx_id_ is used to record current id of IR structure + * which should be serialized.*/ + + int64_t region_id_ = 0; + int64_t block_id_ = 0; + int64_t value_id_ = 1; + int64_t blockarg_id_ = -1; + + bool trainable_ = true; + + Json WriteProgram(const pir::Program* program); + Json WriteRegion(const pir::Region* region, const std::string& region_name); + Json WriteBlock(const pir::Block* block, const std::string& block_name); + Json WriteOp(const pir::Operation& op); + Json WriteBlockArg(const pir::Value& value); + Json WriteValue(const pir::Value& value); + Json WriteOpOperand(const pir::OpOperand& op_operand); + Json WriteAttributesMapOpinfo(pir::Operation* op, + const AttributeMap& attr_map); + Json WriteAttributesMapOther(const AttributeMap& attr_map); + /** WriteAttribute is used to write attribute of op. + * which call writeAttr to get Derived Class‘s json object. + * same as WriteType + */ + + Json WriteAttribute(const std::string& op_attr_name, + const pir::Attribute& attr); + Json WriteType(const pir::Type& type); +}; + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h b/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h new file mode 100644 index 0000000000000..5ebbafb1eb4f7 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include + +#include "paddle/phi/core/dense_tensor.h" + +namespace pir { + +void SaveFunction(const phi::DenseTensor& x, + const std::string& name, + const std::string& file_path, + bool overwrite, + bool save_as_fp16); + +void SaveCombineFunction(const std::vector& x, + const std::vector& names, + const std::string& file_path, + bool overwrite, + bool save_as_fp16, + bool save_to_memory); + +void LoadFunction(const std::string& file_path, + int64_t seek, + const std::vector& shape, + bool load_as_fp16, + phi::DenseTensor* out); + +void LoadCombineFunction(const std::string& file_path, + const std::vector& names, + std::vector* out, + bool load_as_fp16); +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/schema.h b/paddle/fluid/pir/serialize_deserialize/include/schema.h new file mode 100644 index 0000000000000..d444bee469596 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/schema.h @@ -0,0 +1,67 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +namespace pir { +/** + * IMPORTANT!!! + * all those defining strings can't be changed, otherwise the deserialization + * will failed. define all keys in serialized files to ensure accuracy for + * deserialization make sure all the key mutually exclusive + */ + +// all IR structure's identifier (region, block, op, attr, type value etc) +// which can be string , int64_t etc. +#define ID "id" + +// program's key: +#define REGIONS "regions" + +// region's key: +// which is json array with block json object(ID and BLOCKARGS and BLOCKOPS) +#define BLOCKS "blocks" + +// block's key: +// which is json array with value json object +#define BLOCKARGS "args" +// which is json array with operation json object +#define BLOCKOPS "ops" + +// operation's key: +// which is json array with opoperand json object(ID) +#define OPOPERANDS "I" + +// which is json array with value json object(ID and TYPE_TYPE) +#define OPRESULTS "O" + +// which is json array with json object(NAME and ATTR_TYPE) +#define ATTRS "A" +#define OPRESULTS_ATTRS "OA" + +// value's key: +// value's type which should be pir::Type's json object(ID or ID and DATA). +#define TYPE_TYPE "TT" + +// attr's name which is operation's feature. +#define NAME "N" + +// attr's value which is pir::Attribute's json object(ID and DATA). +#define ATTR_TYPE "AT" + +// type/attr's contents which is json::array. +#define DATA "D" + +// NULL_TYPE +#define NULL_TYPE "NULL" +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h new file mode 100644 index 0000000000000..a6cae97f135d9 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h @@ -0,0 +1,346 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "glog/logging.h" + +#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" +#include "paddle/fluid/pir/serialize_deserialize/include/schema.h" +#include "paddle/fluid/pir/serialize_deserialize/include/third_part.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_attribute.h" +#include "paddle/pir/include/core/builtin_type.h" + +namespace pir { +/** serializeTypeToJson is a template function to serialize + * a pir type to a json object. a pir type may have value or no value + * Value free types only have ID, while value based types have + * DATA in addition to ID. + * + * If a new pir type is added, which needs to be serialized, + * it must have a name() method, returning a string which + * should be different from other types' names. + * (The name template is t_dialectname_typename). + * Note: The prefixes t are assumed to represent 'type'. + * + * If the pir type has value, it should have a data() method, + * which returns the value of type. The data() method is better + * suited to return TYPE which supported by json like std::vector, + * std::string, int, float and so on. if not, serailizeTypeToJson + * need to be specialized. + */ + +template +Json serializeTypeToJson(const T& type) { + Json json_obj; + json_obj[ID] = type.name(); + return json_obj; +} + +/** serializeAttrToJson is a template function to serialize + * pir attribute to json object. pir attribute usually have + * value, so it's json object has DATA and ID. + * + * If a new pir attr is added, which needs to be serialized, + * it must have a name() method, returning a string which + * should be different from other types' names. + * (The name template is a_dialectname_typename). + * Note: The prefixes a are assumed to represent 'attribute'. + * + * It also need have a data() method, which returns the value of + * attribute. The data() method is better suited to return TYPE + * which supported by json like std::vector, std::string, int, + * float and so on. if not, serailizeAttrToJson + * need to be specialized. + */ + +template +Json serializeAttrToJson(const T& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + json_obj[DATA] = attr.data(); + return json_obj; +} + +#define SERIALIZE_ATTR_TO_JSON(type, data) \ + template <> \ + Json serializeAttrToJson(const type& attr) { \ + Json json_obj; \ + json_obj[ID] = attr.name(); \ + json_obj[DATA] = data; \ + return json_obj; \ + } + +SERIALIZE_ATTR_TO_JSON(pir::StrAttribute, attr.AsString()); + +SERIALIZE_ATTR_TO_JSON(pir::Complex64Attribute, + std::vector({attr.data().real, attr.data().imag})); +SERIALIZE_ATTR_TO_JSON(pir::Complex128Attribute, + std::vector({attr.data().real, attr.data().imag})); +SERIALIZE_ATTR_TO_JSON(paddle::dialect::IntArrayAttribute, + attr.data().GetData()); +SERIALIZE_ATTR_TO_JSON(paddle::dialect::DataTypeAttribute, + phi::DataTypeToString(attr.data())); + +template <> +Json serializeAttrToJson( + const paddle::dialect::ScalarAttribute& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + + Json content = Json::array(); + auto scalar = attr.data(); + auto dtype_ = scalar.dtype(); + content.push_back(DataTypeToString(dtype_)); + + if (dtype_ == phi::DataType::FLOAT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::FLOAT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT8) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::FLOAT16 || + dtype_ == phi::DataType::UINT16 || + dtype_ == phi::DataType::BFLOAT16) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT16) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::INT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT8) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT32) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::UINT64) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::BOOL) { + content.push_back(scalar.to()); + } else if (dtype_ == phi::DataType::COMPLEX64) { + content.push_back(scalar.to>().real); + content.push_back(scalar.to>().imag); + } else if (dtype_ == phi::DataType::COMPLEX128) { + content.push_back(scalar.to>().real); + content.push_back(scalar.to>().imag); + } else { + PADDLE_THROW(common::errors::InvalidArgument( + "Invalid tensor data type `", dtype_, "`.")); + } + json_obj[DATA] = content; + return json_obj; +} + +template <> +Json serializeAttrToJson( + const paddle::dialect::PlaceAttribute& attr) { + Json json_obj; + json_obj[ID] = attr.name(); + Json content = Json::array(); + auto place = attr.data(); + content.push_back(static_cast(place.GetType())); + content.push_back(place.GetDeviceId()); // int8_t + content.push_back(place.GetDeviceType()); // string + json_obj[DATA] = content; + return json_obj; +} + +Json writeType(const pir::Type& type) { + Json type_json = Json::object(); + if (type.isa()) { + VLOG(8) << "Write BoolType ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write BFloat16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float32Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Float64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int8Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write UInt8Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int16Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int32Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Int64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write IndexType ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Complex64Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + } else if (type.isa()) { + VLOG(8) << "Write Complex128Type ... "; + return pir::serializeTypeToJson( + type.dyn_cast()); + // NOTE(Ruting) those Types need call writeType which make build error + // when use template func serializeTypeToJson + } else if (type.isa()) { + VLOG(8) << "Write VectorType ... "; + auto type_ = type.dyn_cast(); + type_json[ID] = type_.name(); + Json content = Json::array(); + for (auto type_x : type_.data()) { + content.push_back(writeType(type_x)); + } + type_json[DATA] = content; + return type_json; + } else if (type.isa()) { + VLOG(8) << "Write DenseTensorType ... "; + auto type_ = type.dyn_cast(); + + type_json[ID] = type_.name(); + Json content = Json::array(); + content.push_back(writeType(type_.dtype())); + + std::vector dims_; + for (auto i = 0; i < type_.dims().size(); i++) { + dims_.push_back(type_.dims().at(i)); + } + content.push_back(dims_); + + content.push_back(DataLayoutToString(type_.data_layout())); + + content.push_back(type_.lod()); + + content.push_back(type_.offset()); + type_json[DATA] = content; + return type_json; + } else if (!type) { + type_json[ID] = NULL_TYPE; + return type_json; + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Type when write type")); + } + VLOG(8) << "Finish write Type ... "; + + return type_json; +} + +SERIALIZE_ATTR_TO_JSON(pir::TypeAttribute, writeType(attr.data())); + +Json writeAttr(const pir::Attribute& attr) { + Json attr_json = Json::object(); + if (attr.isa()) { + VLOG(8) << "write BoolAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write FloatAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DoubleAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int32Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Int64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write IndexAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write ArrayAttribute ."; + auto attr_ = attr.dyn_cast(); + Json val = Json::array(); + for (size_t i = 0; i < attr_.size(); i++) { + val.push_back(writeAttr(attr_.at(i))); + } + attr_json[ID] = attr_.name(); + attr_json[DATA] = val; + return attr_json; + } else if (attr.isa()) { + VLOG(8) << "write TypeAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write TensorNameAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex64Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write Complex128Attribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write StrAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write IntArrayAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write ScalarAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write DataTypeAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else if (attr.isa()) { + VLOG(8) << "write PlaceAttribute ."; + return pir::serializeAttrToJson( + attr.dyn_cast()); + } else { + PADDLE_ENFORCE( + false, phi::errors::InvalidArgument("Unknown Attr %s when write attr")); + } + VLOG(8) << "Finish write& attr ... "; + + return attr_json; +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/include/third_part.h b/paddle/fluid/pir/serialize_deserialize/include/third_part.h new file mode 100644 index 0000000000000..bfa5146336902 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/include/third_part.h @@ -0,0 +1,17 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "nlohmann/json.hpp" +using Json = nlohmann::json; diff --git a/paddle/fluid/pir/serialize_deserialize/src/interface.cc b/paddle/fluid/pir/serialize_deserialize/src/interface.cc new file mode 100644 index 0000000000000..7a55c478c8b1b --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/interface.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/interface.h" +#include "paddle/common/enforce.h" +#include "paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h" +#include "paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h" +#include "paddle/phi/common/port.h" + +namespace pir { +#define PROGRAM "program" +#define BASE_CODE "base_code" +#define MAGIC "magic" +#define PIRVERSION "version" +#define PIR "pir" +void WriteModule(const pir::Program& program, + const std::string& file_path, + const uint64_t& pir_version, + bool overwrite, + bool readable, + bool trainable) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + common::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + // write base code + Json total; + + total[BASE_CODE] = {{MAGIC, PIR}, {PIRVERSION, pir_version}}; + + ProgramWriter writer(pir_version, trainable); + // write program + total[PROGRAM] = writer.GetProgramJson(&program); + std::string total_str; + if (readable) { + total_str = total.dump(4); + } else { + total_str = total.dump(); + } + + MkDirRecursively(DirName(file_path).c_str()); + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fout), + true, + common::errors::Unavailable( + "Cannot open %s to save variables.", file_path)); + fout << total_str; + fout.close(); +} + +void ReadModule(const std::string& file_path, + pir::Program* program, + const uint64_t& pir_version) { + std::ifstream f(file_path); + Json data = Json::parse(f); + + if (data.contains(BASE_CODE) && data[BASE_CODE].contains(MAGIC) && + data[BASE_CODE][MAGIC] == PIR) { + uint64_t file_version = + data.at(BASE_CODE).at(PIRVERSION).template get(); + if (file_version != pir_version) { + PADDLE_THROW( + common::errors::InvalidArgument("Invalid model version file.")); + } + } else { + PADDLE_THROW(common::errors::InvalidArgument("Invalid model file.")); + } + + ProgramReader reader(pir_version); + reader.RecoverProgram(&(data[PROGRAM]), program); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc new file mode 100644 index 0000000000000..88ee2ba168476 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_deserialize.cc @@ -0,0 +1,160 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/ir_deserialize.h" +#include "paddle/fluid/pir/serialize_deserialize/include/deserialize_utils.h" +namespace pir { +void ProgramReader::RecoverProgram(Json* program_json, + pir::Program* recover_program) { + id_value_map[0] = pir::Value(); + ReadProgram(program_json, recover_program); + VLOG(6) << "Finish json to program."; + return; +} +void ProgramReader::ReadProgram(Json* program_json, pir::Program* program) { + auto top_level_op = program->module_op(); + PADDLE_ENFORCE_EQ( + program_json->at(REGIONS).size(), + 1, + common::errors::InvalidArgument( + "The redions size of program module should be 1 but got %d.", + program_json->at(REGIONS).size())); + auto& region_json = program_json->at(REGIONS).at(0); + auto& block_json = region_json.at(BLOCKS).at(0); + auto& block = top_level_op.block(); + ReadBlock(&block_json, &block); + + VLOG(6) << "Finish Read program."; + return; +} + +void ProgramReader::ReadRegion(Json* region_json, pir::Region* region) { + auto region_name = region_json->at(ID).template get(); + for (auto& block_json : region_json->at(BLOCKS)) { + auto& block = region->emplace_back(); + ReadBlock(&block_json, &block); + } + VLOG(6) << "Finish Read " << region_name; + return; +} + +void ProgramReader::ReadBlock(Json* block_json, pir::Block* block) { + auto block_name = block_json->at(ID).template get(); + + Json& args_json = block_json->at(BLOCKARGS); + if (!args_json.empty()) { + for (auto& arg_json : args_json) { + int64_t arg_id_ = arg_json.at(ID).template get(); + auto value = block->AddArg(ReadType(&(arg_json.at(TYPE_TYPE)))); + id_value_map[arg_id_] = value; + VLOG(6) << "Finish Read blockargument " << arg_id_; + } + } + + Json& ops_json = block_json->at(BLOCKOPS); + if (!ops_json.empty()) { + for (auto& op_json : ops_json) { + block->push_back(ReadOp(&op_json)); + } + } + + VLOG(6) << "Finish Read " << block_name; + return; +} + +pir::Operation* ProgramReader::ReadOp(Json* op_json) { + auto op_name = op_json->at(ID).template get(); + + // deserialize opoperands (find value) + Json& operands_json = op_json->at(OPOPERANDS); + std::vector inputs; + for (auto& operand_json : operands_json) { + int64_t id = operand_json.at(ID).template get(); + inputs.push_back(id_value_map[id]); + } + + // deserialize opresults (find type) + Json& opresults_json = op_json->at(OPRESULTS); + std::vector output_types; + std::vector output_ids; + for (auto& opresult_json : opresults_json) { + int64_t value_id_ = opresult_json.at(ID).template get(); + output_ids.push_back(value_id_); + output_types.push_back(ReadType(&(opresult_json.at(TYPE_TYPE)))); + VLOG(6) << "Finish Read value " << value_id_; + } + + // serialize necessary attributes + Json& attrs_json = op_json->at(ATTRS); + + pir::AttributeMap attributes; + if (op_json->contains(OPRESULTS_ATTRS)) { + Json& opresults_attrs_json = op_json->at(OPRESULTS_ATTRS); + attributes = ReadAttributesMap(&attrs_json, &opresults_attrs_json); + } else { + Json empty_json = Json::array(); + attributes = ReadAttributesMap(&attrs_json, &empty_json); + } + + pir::IrContext* ctx_ = pir::IrContext::Instance(); + // prepare opinfo + pir::OpInfo op_info = ctx_->GetRegisteredOpInfo(op_name); + + // deserialize op + pir::Operation* op = + Operation::Create(inputs, attributes, output_types, op_info); + + PADDLE_ENFORCE_EQ( + output_ids.size(), + static_cast(op->num_results()), + common::errors::InvalidArgument( + "deserialized op has %d results, but the original op has %d results.", + op->num_results(), + output_ids.size())); + + for (uint32_t i = 0; i < op->num_results(); i++) { + id_value_map[output_ids[i]] = op->result(i); + } + + VLOG(6) << "Finish Read Operation " << op->name(); + return op; +} + +pir::AttributeMap ProgramReader::ReadAttributesMap(Json* attrs_json, + Json* opresult_attrs_json) { + pir::AttributeMap attributes; + for (auto& attr_json : *attrs_json) { + auto attr_name = attr_json.at(NAME).template get(); + attributes.insert({attr_name, ReadAttribute(&attr_json)}); + } + VLOG(6) << "Finish Read pir::AttributeMap "; + for (auto& attr_json : *opresult_attrs_json) { + auto attr_name = attr_json.at(NAME).template get(); + attributes.insert({attr_name, ReadAttribute(&attr_json)}); + } + VLOG(6) << "Finish Read Opresults_AttributeMap "; + return attributes; +} + +pir::Attribute ProgramReader::ReadAttribute(Json* attr_json) { + VLOG(6) << "Begin Read Attribute. "; + return pir::parseAttr(&attr_json->at(ATTR_TYPE)); +} + +pir::Type ProgramReader::ReadType(Json* type_json) { + VLOG(6) << "Begin Read Type. "; + return pir::parseType(type_json); +} + +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc new file mode 100644 index 0000000000000..21067aa83906d --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/ir_serialize.cc @@ -0,0 +1,216 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/serialize_deserialize/include/ir_serialize.h" +#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" +#include "paddle/fluid/pir/serialize_deserialize/include/serialize_utils.h" +#include "paddle/pir/include/core/dialect.h" +#include "paddle/pir/include/core/operation.h" + +namespace pir { + +Json ProgramWriter::GetProgramJson(const pir::Program* program) { + program_json = WriteProgram(program); + VLOG(6) << "Finish program to json."; + return program_json; +} + +Json ProgramWriter::WriteProgram(const pir::Program* program) { + Json program_json; + program_json[REGIONS] = Json::array(); + auto top_level_op = program->module_op(); + + for (size_t i = 0; i < top_level_op->num_regions(); ++i) { + std::string region_name = "region_" + std::to_string(region_id_++); + auto& region = top_level_op->region(i); + auto region_json = WriteRegion(®ion, region_name); + program_json[REGIONS].emplace_back(region_json); + } + VLOG(6) << "Finish write program."; + return program_json; +} + +Json ProgramWriter::WriteRegion(const pir::Region* region, + const std::string& region_name) { + Json region_json; + region_json[ID] = region_name; + region_json[BLOCKS] = Json::array(); + for (auto block : region->blocks()) { + std::string block_name = "block_" + std::to_string(block_id_++); + auto block_json = WriteBlock(block, block_name); + region_json[BLOCKS].emplace_back(block_json); + } + VLOG(6) << "Finish write " << region_name; + return region_json; +} + +Json ProgramWriter::WriteBlock(const pir::Block* block, + const std::string& block_name) { + Json block_json; + block_json[ID] = block_name; + + Json args_json = Json::array(); + for (auto arg : block->args()) { + auto arg_json = WriteBlockArg(arg); + args_json.emplace_back(arg_json); + } + block_json[BLOCKARGS] = args_json; + + Json ops_json = Json::array(); + for (auto op : block->ops()) { + auto op_json = WriteOp(*op); + ops_json.emplace_back(op_json); + } + block_json[BLOCKOPS] = ops_json; + + VLOG(6) << "Finish write " << block_name; + return block_json; +} + +Json ProgramWriter::WriteBlockArg(const pir::Value& value) { + Json arg_json; + Json var = WriteType(value.type()); + value_id_map[value] = blockarg_id_; + arg_json[ID] = blockarg_id_; + arg_json[TYPE_TYPE] = var; + + VLOG(6) << "Finish write blockargument " << blockarg_id_; + blockarg_id_--; + + return arg_json; +} + +Json ProgramWriter::WriteValue(const pir::Value& value) { + Json var_json; + if (value) { + value_id_map[value] = value_id_; + var_json[ID] = value_id_; + VLOG(6) << "Finish write value " << value_id_; + value_id_++; + } else { + var_json[ID] = 0; // NULL_TYPE + VLOG(6) << "Finish write NULL_TYPE value."; + } + + Json var = WriteType(value.type()); + var_json[TYPE_TYPE] = var; + + return var_json; +} + +Json ProgramWriter::WriteOp(const pir::Operation& op) { + Json op_json = Json::object(); + op_json[ID] = op.name(); + // serialize opoperands + Json operands_json = Json::array(); + for (auto operand : op.operands()) { + auto operand_json = WriteOpOperand(operand); + operands_json.emplace_back(operand_json); + } + op_json[OPOPERANDS] = operands_json; + + // serialize opresults + Json opresults_json = Json::array(); + for (auto& opresult : op.results()) { + auto opresult_json = WriteValue(opresult); + opresults_json.emplace_back(opresult_json); + } + op_json[OPRESULTS] = opresults_json; + + // serialize attributes + op_json[ATTRS] = WriteAttributesMapOpinfo(const_cast(&op), + op.attributes()); + if (trainable_) { + op_json[OPRESULTS_ATTRS] = WriteAttributesMapOther(op.attributes()); + } + + VLOG(6) << "Finish write Operation " << op.name(); + return op_json; +} + +Json ProgramWriter::WriteOpOperand(const pir::OpOperand& op_operand) { + Json operand_json = Json::object(); + if (op_operand.source()) { + int64_t id = value_id_map[op_operand.source()]; + operand_json[ID] = id; + VLOG(6) << "Finish write OpOperand " << id; + } else { + operand_json[ID] = 0; // NULL_VALUE + VLOG(6) << "Finish write NULL_VALUE OpOperand."; + } + + return operand_json; +} + +Json ProgramWriter::WriteAttributesMapOpinfo(pir::Operation* op, + const AttributeMap& attr_map) { + Json attrs_json = Json::array(); + + if (op->dialect()->name() == "pd_op") { + if (op->dyn_cast() != nullptr) { + auto [_1, attr_info, _3, _4, _5] = + op->dyn_cast().GetOpInfo(); + if (attr_info.size() != 0) { + for (auto it = attr_info.begin(); it != attr_info.end(); it++) { + if (attr_map.find(it->name) != attr_map.end()) { + attrs_json.emplace_back( + WriteAttribute(it->name, attr_map.at(it->name))); + } + } + } + } else { + PADDLE_THROW(common::errors::InvalidArgument( + "the %s do not has OpYamlInfoInterface", op->name())); + } + } else { + for (auto& attr : attr_map) { + if (attr.first != "stop_gradient" && attr.first != "persistable" && + attr.first != "op_callstack") { + attrs_json.emplace_back(WriteAttribute(attr.first, attr.second)); + } + } + } + + VLOG(6) << "Finish write Opinfo AttributeMap "; + return attrs_json; +} + +Json ProgramWriter::WriteAttributesMapOther(const AttributeMap& attr_map) { + Json operesult_attrs_json = Json::array(); + for (auto& attr : attr_map) { + if (attr.first == "stop_gradient" || attr.first == "persistable") { + operesult_attrs_json.emplace_back( + WriteAttribute(attr.first, attr.second)); + } + } + + VLOG(6) << "Finish write Other AttributeMap "; + return operesult_attrs_json; +} + +Json ProgramWriter::WriteAttribute(const std::string& op_attr_name, + const pir::Attribute& attr) { + Json attr_json; + attr_json[NAME] = op_attr_name; + attr_json[ATTR_TYPE] = pir::writeAttr(attr); + + VLOG(6) << "Finish write Attribute. "; + return attr_json; +} + +Json ProgramWriter::WriteType(const pir::Type& type) { + VLOG(6) << "Finish write Type. "; + return pir::writeType(type); +} +} // namespace pir diff --git a/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc b/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc new file mode 100644 index 0000000000000..d3c047f78b960 --- /dev/null +++ b/paddle/fluid/pir/serialize_deserialize/src/save_load_parameters.cc @@ -0,0 +1,209 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h" + +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/phi/common/port.h" +#include "paddle/phi/kernels/funcs/data_type_transform.h" + +namespace pir { + +const phi::DeviceContext* GetDeviceContext(const phi::DenseTensor& x) { + phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); + const phi::DeviceContext* dev_ctx = nullptr; + auto place = x.place(); + dev_ctx = pool.Get(place); + return dev_ctx; +} + +const phi::DenseTensor CastTensorType(const phi::DeviceContext* dev_ctx, + const phi::DenseTensor& x, + phi::DataType out_dtype) { + auto place = x.place(); + if (paddle::platform::is_cpu_place(place)) { + auto out = phi::funcs::TransDataType( + reinterpret_cast(*dev_ctx), x, out_dtype); + return out; + } else if (paddle::platform::is_gpu_place(place)) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + return phi::funcs::TransDataType( + reinterpret_cast(*dev_ctx), x, out_dtype); +#endif + } + return x; +} + +void SaveFunction(const phi::DenseTensor& x, + const std::string& name, + const std::string& file_path, + bool overwrite, + bool save_as_fp16) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + phi::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + MkDirRecursively(DirName(file_path).c_str()); + VLOG(6) << "save func save path: " << file_path; + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ( + static_cast(fout), + true, + phi::errors::Unavailable("Cannot open %s to save variables.", file_path)); + + auto in_dtype = x.dtype(); + auto out_dtype = save_as_fp16 ? phi::DataType::FLOAT16 : in_dtype; + + const phi::DeviceContext* dev_ctx = GetDeviceContext(x); + if (in_dtype != out_dtype) { + auto out = CastTensorType(dev_ctx, x, out_dtype); + paddle::framework::SerializeToStream(fout, out, *dev_ctx); + } else { + paddle::framework::SerializeToStream(fout, x, *dev_ctx); + } + fout.close(); + VLOG(6) << "save func done "; +} + +void SaveCombineFunction(const std::vector& x, + const std::vector& names, + const std::string& file_path, + bool overwrite, + bool save_as_fp16, + bool save_to_memory) { + PADDLE_ENFORCE_EQ( + FileExists(file_path) && !overwrite, + false, + phi::errors::PreconditionNotMet( + "%s exists!, cannot save to it when overwrite is set to false.", + file_path, + overwrite)); + + MkDirRecursively(DirName(file_path).c_str()); + VLOG(6) << "save func save path: " << file_path; + std::ostringstream ss; + PADDLE_ENFORCE_GT(x.size(), + 0UL, + phi::errors::InvalidArgument( + "The number of variables to be saved is %d, expect " + "it to be greater than 0.", + x.size())); + const phi::DeviceContext* dev_ctx = GetDeviceContext(*(x[0])); + for (size_t i = 0; i < x.size(); i++) { + auto& tensor = *(x[i]); + PADDLE_ENFORCE_EQ( + tensor.IsInitialized(), + true, + phi::errors::InvalidArgument( + "The Tensor with Index (%d) to be saved is not initialized.", i)); + auto in_dtype = tensor.dtype(); + auto out_dtype = save_as_fp16 ? phi::DataType::FLOAT16 : in_dtype; + if (in_dtype != out_dtype) { + auto out = CastTensorType(dev_ctx, tensor, out_dtype); + paddle::framework::SerializeToStream(ss, out, *dev_ctx); + } else { + paddle::framework::SerializeToStream(ss, tensor, *dev_ctx); + } + } + MkDirRecursively(DirName(file_path).c_str()); + std::ofstream fout(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ( + static_cast(fout), + true, + phi::errors::Unavailable("Cannot open %s to save variables.", file_path)); + fout << ss.str(); + fout.close(); + VLOG(6) << "save combine done "; +} + +void LoadFunction(const std::string& file_path, + int64_t seek, + const std::vector& shape, + bool load_as_fp16, + phi::DenseTensor* out) { + std::ifstream fin(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fin), + true, + phi::errors::Unavailable( + "Load operator fail to open file %s, please check " + "whether the model file is complete or damaged.", + file_path)); + PADDLE_ENFORCE_NOT_NULL(out, + phi::errors::InvalidArgument( + "The variable to be loaded cannot be found.")); + const phi::DeviceContext* dev_ctx = GetDeviceContext(*out); + + if (seek != -1) { + PADDLE_ENFORCE_GE(seek, + 0, + phi::errors::InvalidArgument( + "seek with tensor must great than or equal to 0")); + paddle::framework::DeserializeFromStream(fin, out, *dev_ctx, seek, shape); + } else { + paddle::framework::DeserializeFromStream(fin, out); + } + + auto in_dtype = out->dtype(); + auto out_dtype = load_as_fp16 ? phi::DataType::FLOAT16 : in_dtype; + if (in_dtype != out_dtype) { + auto cast_in = *out; + *out = CastTensorType(dev_ctx, cast_in, out_dtype); + } +} + +void LoadCombineFunction(const std::string& file_path, + const std::vector& names, + std::vector* out, + bool load_as_fp16) { + std::ifstream fin(file_path, std::ios::binary); + PADDLE_ENFORCE_EQ(static_cast(fin), + true, + phi::errors::Unavailable( + "Load operator fail to open file %s, please check " + "whether the model file is complete or damaged.", + file_path)); + + PADDLE_ENFORCE_GT(out->size(), + 0UL, + phi::errors::InvalidArgument( + "The number of variables to be saved is %d, expect " + "it to be greater than 0.", + out->size())); + const phi::DeviceContext* dev_ctx = GetDeviceContext(*(out->at(0))); + for (size_t i = 0; i < names.size(); i++) { + auto tensor = out->at(i); + paddle::framework::DeserializeFromStream(fin, tensor); + + auto in_dtype = tensor->dtype(); + auto out_dtype = load_as_fp16 ? phi::DataType::FLOAT16 : in_dtype; + if (in_dtype != out_dtype) { + auto cast_in = *tensor; + *tensor = CastTensorType(dev_ctx, cast_in, out_dtype); + } + } + fin.peek(); + PADDLE_ENFORCE_EQ( + fin.eof(), + true, + phi::errors::Unavailable("Not allowed to load partial data via " + "load_combine_op, please use load_op instead.")); +} + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/CMakeLists.txt b/paddle/fluid/pir/transforms/CMakeLists.txt index 627fcb78d8563..3a06aa2da7d77 100644 --- a/paddle/fluid/pir/transforms/CMakeLists.txt +++ b/paddle/fluid/pir/transforms/CMakeLists.txt @@ -6,7 +6,7 @@ if(NOT WITH_CINN) ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_detector.cc) endif() -if(NOT WITH_MKLDNN) +if(NOT WITH_ONEDNN) file(GLOB_RECURSE onednn_srcs "onednn/*.cc") list(REMOVE_ITEM transforms_srcs ${onednn_srcs}) endif() @@ -26,7 +26,7 @@ set(transforms_deps device_event_base) if(WITH_CINN) - set(transforms_deps ${transforms_deps} cinn_op_dialect cinnapi) + set(transforms_deps ${transforms_deps} cinnapi) endif() cc_library( diff --git a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc index bf1bc26850c56..e70039be7d375 100644 --- a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc +++ b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc @@ -238,7 +238,11 @@ class ConstantFoldingPattern : public pir::RewritePattern { const std::vector>& use_ops) const { for (auto [use_op, idx] : use_ops) { if (use_op->isa()) { - if (!ReplaceResultByParameterOp(use_op)) return false; + if (!ReplaceResultByParameterOp(use_op)) { + return false; + } + } else if (use_op->isa()) { + return false; } else if (use_op->HasInterface()) { auto [input_infos, _1, _2, _3, _4] = use_op->dyn_cast() @@ -255,6 +259,9 @@ class ConstantFoldingPattern : public pir::RewritePattern { } bool ReplaceResultByParameterOp(pir::Operation* op) const { + if (op->isa()) { + return false; + } for (uint32_t i = 0; i < op->num_results(); i++) { auto use_ops = pir::GetUseOpsForOutput(op, i); if (!CheckUseOps(use_ops)) return false; diff --git a/paddle/fluid/pir/transforms/general/identity_op_clean_pass.cc b/paddle/fluid/pir/transforms/general/identity_op_clean_pass.cc index fe2369e71a551..ed7dc13da540c 100644 --- a/paddle/fluid/pir/transforms/general/identity_op_clean_pass.cc +++ b/paddle/fluid/pir/transforms/general/identity_op_clean_pass.cc @@ -245,39 +245,6 @@ class ReplaceDropoutWithScalePattern : public paddle::drr::DrrPatternBase { } }; -class RemoveRedundantTransposePattern : public paddle::drr::DrrPatternBase { - public: - std::string name() const override { - return "RemoveRedundantTransposePattern"; - } - - void operator()(paddle::drr::DrrPatternContext *ctx) const override { - paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &transpose1 = - pat.Op("pd_op.transpose", {{"perm", pat.Attr("perm_1")}}); - const auto &transpose2 = - pat.Op("pd_op.transpose", {{"perm", pat.Attr("perm_2")}}); - - pat.Tensor("ret") = transpose2(transpose1(pat.Tensor("arg_transpose"))); - - paddle::drr::ResultPattern res = pat.ResultPattern(); - const auto &new_perm_attr = res.ComputeAttr( - [](const paddle::drr::MatchContext &match_ctx) -> std::vector { - const auto &perm1 = match_ctx.Attr>("perm_1"); - const auto &perm2 = match_ctx.Attr>("perm_2"); - std::vector new_perm; - for (int v : perm2) { - new_perm.emplace_back(perm1[v]); - } - return new_perm; - }); - const auto &transpose_continuous = - res.Op("pd_op.transpose", {{"perm", new_perm_attr}}); - - res.Tensor("ret") = transpose_continuous(res.Tensor("arg_transpose")); - } -}; - class IdentityOpCleanPass : public pir::PatternRewritePass { public: IdentityOpCleanPass() @@ -292,7 +259,6 @@ class IdentityOpCleanPass : public pir::PatternRewritePass { ps.Add(paddle::drr::Create(context)); ps.Add(paddle::drr::Create(context)); ps.Add(paddle::drr::Create(context)); - ps.Add(paddle::drr::Create(context)); return ps; } }; diff --git a/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.cc b/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.cc new file mode 100644 index 0000000000000..d6a1efb0e16ab --- /dev/null +++ b/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class RemoveRedundantTransposePattern : public paddle::drr::DrrPatternBase { + public: + std::string name() const override { + return "RemoveRedundantTransposePattern"; + } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + const auto &transpose1 = + pat.Op("pd_op.transpose", {{"perm", pat.Attr("perm_1")}}); + const auto &transpose2 = + pat.Op("pd_op.transpose", {{"perm", pat.Attr("perm_2")}}); + + pat.Tensor("ret") = transpose2(transpose1(pat.Tensor("arg_transpose"))); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &new_perm_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + const auto &perm1 = match_ctx.Attr>("perm_1"); + const auto &perm2 = match_ctx.Attr>("perm_2"); + std::vector new_perm; + for (int v : perm2) { + new_perm.emplace_back(perm1[v]); + } + return new_perm; + }); + const auto &transpose_continuous = + res.Op("pd_op.transpose", {{"perm", new_perm_attr}}); + + res.Tensor("ret") = transpose_continuous(res.Tensor("arg_transpose")); + } +}; + +class RemoveRedundantTransposePass : public pir::PatternRewritePass { + public: + RemoveRedundantTransposePass() + : pir::PatternRewritePass("remove_redundant_transpose_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateRemoveRedundantTransposePass() { + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(remove_redundant_transpose_pass, RemoveRedundantTransposePass); diff --git a/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.h b/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.h new file mode 100644 index 0000000000000..338dcd26f6564 --- /dev/null +++ b/paddle/fluid/pir/transforms/general/remove_redundant_transpose_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateRemoveRedundantTransposePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc new file mode 100644 index 0000000000000..bcf88170bdd54 --- /dev/null +++ b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.h" + +#include "paddle/common/enforce.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/pir/dialect/kernel/ir/kernel_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/pir/include/core/block.h" +#include "paddle/pir/include/core/builtin_op.h" +#include "paddle/pir/include/core/ir_context.h" +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" +#include "paddle/pir/include/pattern_rewrite/pattern_match.h" + +namespace { + +std::unique_ptr GetParser( + pir::Operation *op) { + std::unique_ptr op_info_parser(nullptr); + std::string op_name = op->dyn_cast().op_name(); + auto op_info = pir::IrContext::Instance()->GetRegisteredOpInfo(op_name); + if (op_info.HasInterface()) { + auto impl = + op_info.GetInterfaceImpl(); + auto op_info_tuple = impl->get_op_info_(op_name); + op_info_parser = std::make_unique( + op_info_tuple, paddle::dialect::IsLegacyOp(op_name)); + } + return op_info_parser; +} + +template +phi::Place GetVarPlace(const paddle::framework::Variable *var, + const phi::Place &exe_place) { + phi::Place place; + auto &tensor = var->Get(); + if (tensor.initialized()) { + place = tensor.place(); + } else { + place = exe_place; + } + return place; +} + +class RemoveShadowFeedPattern + : public pir::OpRewritePattern { + public: + explicit RemoveShadowFeedPattern(pir::IrContext *context, + const pir::Block *block, + const phi::Place &place, + const paddle::framework::Scope *scope) + : pir::OpRewritePattern::OpRewritePattern( + context), + place_(place), + scope_(scope) { + for (auto &[name, value] : block->kwargs()) { + kwargs_map_[value] = name; + } + } + + bool IsSamePlaceShadowFeed(paddle::dialect::PhiKernelOp op) const { + if (op.op_name() == "pd_op.shadow_feed") { + auto in = op.operand_source(0); + if (!kwargs_map_.count(in)) { + return false; + } + auto in_name = kwargs_map_.at(in); + auto *var = scope_->FindVar(in_name); + phi::Place var_place; + if (var->IsType()) { + var_place = GetVarPlace(var, place_); + } else if (var->IsType()) { + var_place = GetVarPlace(var, place_); + } else if (var->IsType()) { + var_place = + GetVarPlace(var, place_); + } else { + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "RemoveShadowFeedPattern only support output " + "variable of type DenseTensor, SelectedRows or VariableRefArray")); + } + return var_place == place_; + } + return false; + } + + bool IsTensorAttrShadowFeed(paddle::dialect::PhiKernelOp op) const { + if (op.op_name() == "pd_op.shadow_feed") { + auto in = op.operand_source(0); + if (!kwargs_map_.count(in)) { + return false; + } + auto out = op.result(0); + if (out.use_count() == 1) { + auto use_op = out.first_use().owner(); + if (!use_op->isa()) { + return false; + } + auto op_info_parser = GetParser(use_op); + for (size_t i = 0; i < use_op->num_operands(); ++i) { + if (out == use_op->operand_source(i) && + op_info_parser->IsTensorAttribute(i)) { + return true; + } + } + } + } + return false; + } + + bool Match(paddle::dialect::PhiKernelOp op) const override { + return IsSamePlaceShadowFeed(op) || IsTensorAttrShadowFeed(op); + } + + void Rewrite(paddle::dialect::PhiKernelOp op, + pir::PatternRewriter &rewriter) const override { // NOLINT + auto in = op.operand_source(0); + auto out = op.result(0); + in.set_type(out.type()); + rewriter.ReplaceAllUsesWith(out, in); + rewriter.EraseOp(op); + } + + private: + const phi::Place place_; + const paddle::framework::Scope *scope_; + std::unordered_map<::pir::Value, std::string> kwargs_map_; +}; + +class RemoveShadowFeedPass : public pir::PatternRewritePass { + public: + RemoveShadowFeedPass() + : pir::PatternRewritePass("remove_shadow_feed_pass", 0) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + PADDLE_ENFORCE_EQ( + Has("top_block"), + true, + phi::errors::InvalidArgument( + "Pass initialize failed." + "When using RemoveShadowFeedPass, block attribute is required!" + "Use Set method to set the place attribute.")); + PADDLE_ENFORCE_EQ( + Has(pir::Pass::kPlaceAttr), + true, + phi::errors::InvalidArgument( + "Pass initialize failed." + "When using RemoveShadowFeedPass, place attribute is required!" + "Use Set method to set the place attribute.")); + PADDLE_ENFORCE_EQ( + Has(pir::Pass::kParamScopeAttr), + true, + phi::errors::InvalidArgument( + "Pass initialize failed." + "When using RemoveShadowFeedPass, scope attribute is required!" + "Use Set method to set the scope attribute.")); + auto block = &Get("top_block"); + auto &place = Get(pir::Pass::kPlaceAttr); + auto scope = + &Get(pir::Pass::kParamScopeAttr); + PADDLE_ENFORCE_NOT_NULL( + block, phi::errors::InvalidArgument("block can not be nullptr")); + PADDLE_ENFORCE_NOT_NULL( + scope, phi::errors::InvalidArgument("scope can not be nullptr")); + + pir::RewritePatternSet ps(context); + ps.Add(context, block, place, scope); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateRemoveShadowFeedPass() { + return std::make_unique(); +} + +} // namespace pir + +// REGISTER_IR_PASS(remove_shadow_feed_pass, +// RemoveShadowFeedPass); diff --git a/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.h b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.h new file mode 100644 index 0000000000000..0096eb452a585 --- /dev/null +++ b/paddle/fluid/pir/transforms/general/remove_shadow_feed_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateRemoveShadowFeedPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc index fc58eb2db607c..bf0c758ef3530 100644 --- a/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/add_norm_fuse_pass.cc @@ -37,7 +37,7 @@ class RmsNormFusePattern : public paddle::drr::DrrPatternBase { std::string name() const override { return "RmsNormFusePattern"; } - uint32_t benefit() const override { return 2; } + uint32_t benefit() const override { return 3; } void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); @@ -139,7 +139,14 @@ class RmsNormFusePattern : public paddle::drr::DrrPatternBase { }; class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { + private: + const bool extra_add_; + public: + explicit AddRmsNormFusePattern(bool extra_add) : extra_add_(extra_add) {} + + uint32_t benefit() const override { return extra_add_ ? 2 : 1; } + std::string name() const override { return "AddRmsNormFusePattern"; } void operator()(paddle::drr::DrrPatternContext *ctx) const override { @@ -157,16 +164,21 @@ class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { }); pat.Tensor("add_out") = add(pat.Tensor("x"), pat.Tensor("residual")); pat_rms_norm({&pat.Tensor("add_out"), - &pat.InputNoneTensor(), + &pat.Tensor("bias"), &pat.InputNoneTensor(), &pat.Tensor("w"), &pat.InputNoneTensor()}, {&pat.Tensor("rms_norm_out"), &pat.Tensor("residual_out_0"), &pat.Tensor("inv_var_0")}); - + // TODO(bukejiyu) :DRR support matching placeholder op, + // the following needs to be deleted + if (extra_add_) { + const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); + pat.Tensor("add_out1") = + add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); + } paddle::drr::ResultPattern res = pat.ResultPattern(); - const auto &res_rms_norm = res.Op(paddle::dialect::RmsNormOp::name(), { @@ -181,19 +193,25 @@ class AddRmsNormFusePattern : public paddle::drr::DrrPatternBase { res_rms_norm( { &res.Tensor("x"), - &res.InputNoneTensor(), + &res.Tensor("bias"), &res.Tensor("residual"), &res.Tensor("w"), &res.InputNoneTensor(), }, {&res.Tensor("rms_norm_out"), - &res.Tensor("residual_out"), + &res.Tensor("add_out"), &res.Tensor("inv_var")}); } }; class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { + private: + const bool extra_add_; + public: + explicit AddLayerNormFusePattern(bool extra_add) : extra_add_(extra_add) {} + + uint32_t benefit() const override { return extra_add_ ? 2 : 1; } std::string name() const override { return "AddLayerNormFusePattern"; } void operator()(paddle::drr::DrrPatternContext *ctx) const override { @@ -204,13 +222,31 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { {{"epsilon", pat.Attr("epsilon")}, {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); pat.Tensor("add_out") = add(pat.Tensor("x"), pat.Tensor("residual")); - layer_norm( - {&pat.Tensor("add_out"), &pat.Tensor("w"), &pat.InputNoneTensor()}, - {&pat.Tensor("layer_norm_out"), - &pat.Tensor("mean_out_0"), - &pat.Tensor("variance_out_0")}); + layer_norm({&pat.Tensor("add_out"), &pat.Tensor("w"), &pat.Tensor("bias")}, + {&pat.Tensor("layer_norm_out"), + &pat.Tensor("mean_out_0"), + &pat.Tensor("variance_out_0")}); + // TODO(bukejiyu) :DRR support matching placeholder op, + // the following needs to be deleted + if (extra_add_) { + const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); + pat.Tensor("add_out1") = + add1(pat.Tensor("add_out"), pat.Tensor("any_tensor")); + } paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + return paddle::dialect::TransToPhiDataType(x_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_w") = cast_op_2(res.Tensor("w")); + const auto &fuse_layer_norm = res.Op(paddle::dialect::FusedBiasResidualLayernormOp::name(), {{"epsilon", pat.Attr("epsilon")}, @@ -224,13 +260,13 @@ class AddLayerNormFusePattern : public paddle::drr::DrrPatternBase { fuse_layer_norm( { &res.Tensor("x"), - &res.InputNoneTensor(), + &res.Tensor("casted_bias"), &res.Tensor("residual"), - &res.Tensor("w"), + &res.Tensor("casted_w"), &res.InputNoneTensor(), }, {&res.Tensor("layer_norm_out"), - &res.Tensor("residual_out"), + &res.Tensor("add_out"), &res.Tensor("mean_out"), &res.Tensor("variance_out")}); } @@ -248,16 +284,19 @@ class AddNormFusePass : public pir::PatternRewritePass { // mul --->rms_norm // w----------------------------- bool is_half_weight = true; + bool extra_add = true; ps.Add(paddle::drr::Create(context, !is_half_weight)); ps.Add(paddle::drr::Create(context, is_half_weight)); // x-------- // add-rms_norm ---> rms_norm // residual- - ps.Add(paddle::drr::Create(context)); + ps.Add(paddle::drr::Create(context, !extra_add)); + ps.Add(paddle::drr::Create(context, extra_add)); // x-------- // add-layer_norm ----> fused_bias_residual_layernorm // residual- - ps.Add(paddle::drr::Create(context)); + ps.Add(paddle::drr::Create(context, !extra_add)); + ps.Add(paddle::drr::Create(context, extra_add)); return ps; } }; diff --git a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc index dfd2b0ed588e2..09ecf2f170155 100644 --- a/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/conv2d_add_fuse_pass.cc @@ -20,8 +20,6 @@ #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/utils/general_functions.h" -#include "paddle/pir/include/core/builtin_op.h" -#include "paddle/pir/include/core/value.h" #include "paddle/pir/include/pass/pass.h" #include "paddle/pir/include/pass/pass_registry.h" @@ -89,7 +87,7 @@ class Conv2dAddFusePattern : public paddle::drr::DrrPatternBase { &res.Tensor("filter"), &res.Tensor("bias"), &res.InputNoneTensor()}, - {&res.Tensor("add_out")}); + {&res.Tensor("add_out"), &res.OutputNoneTensor()}); } }; diff --git a/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc index 58409b2fbcb15..97b560e503265 100644 --- a/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.cc @@ -14,10 +14,12 @@ #include "paddle/fluid/pir/transforms/gpu/embedding_eltwise_layernorm_fuse_pass.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/include/drr_pattern_base.h" #include "paddle/fluid/pir/utils/general_functions.h" -#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/pir/include/core/builtin_type.h" #include "paddle/pir/include/pass/pass.h" #include "paddle/pir/include/pass/pass_registry.h" @@ -32,19 +34,12 @@ class Fused2EmbeddingEltwiseLayernormPattern void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - + const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name()); const auto &add = pat.Op(paddle::dialect::AddOp::name()); - const auto &layernorm = - pat.Op(paddle::dialect::LayerNormOp::name(), - {{"epsilon", pat.Attr("epsilon")}, - {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}}); embedding_1({&pat.Tensor("x1"), &pat.Tensor("w1")}, {&pat.Tensor("embedding_1_out")}); @@ -57,14 +52,20 @@ class Fused2EmbeddingEltwiseLayernormPattern {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + + pat.RequireNativeCall([](const paddle::drr::MatchContext &match_ctx) { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + auto w2_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w2")); + if (w1_dtype != w2_dtype || (!w1_dtype.isa() && + !w1_dtype.isa())) { + return false; + } + auto x1_shape = pir::GetShapeFromValue(match_ctx.Tensor("x1")); auto x2_shape = pir::GetShapeFromValue(match_ctx.Tensor("x2")); - if (x1_shape.size() != x2_shape.size()) { return false; } - for (size_t i = 0; i < x1_shape.size(); i++) { if (x1_shape.at(i) != x2_shape.at(i)) { return false; @@ -76,13 +77,25 @@ class Fused2EmbeddingEltwiseLayernormPattern paddle::drr::ResultPattern res = pat.ResultPattern(); - auto &combine_op_1 = res.Op(pir::CombineOp::name()); + const auto &combine_op_1 = res.Op(pir::CombineOp::name()); combine_op_1({&res.Tensor("x1"), &res.Tensor("x2")}, {&res.Tensor("combine1_out")}); - auto &combine_op_2 = res.Op(pir::CombineOp::name()); + const auto &combine_op_2 = res.Op(pir::CombineOp::name()); combine_op_2({&res.Tensor("w1"), &res.Tensor("w2")}, {&res.Tensor("combine2_out")}); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + return paddle::dialect::TransToPhiDataType(w1_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_embedding_eltwise_layernorm_op = res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(), {{ @@ -90,8 +103,8 @@ class Fused2EmbeddingEltwiseLayernormPattern }}); fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"), &res.Tensor("combine2_out"), - &res.Tensor("bias"), - &res.Tensor("scale")}, + &res.Tensor("casted_bias"), + &res.Tensor("casted_scale")}, {&res.Tensor("layernorm_out")}); } }; @@ -105,21 +118,13 @@ class Fused3EmbeddingEltwiseLayernormPattern void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); - const auto &embedding_3 = pat.Op(paddle::dialect::EmbeddingOp::name(), - {{{"padding_idx", pat.Attr("padding_idx")}, - {"sparse", pat.Attr("sparse")}}}); + const auto &embedding_1 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_2 = pat.Op(paddle::dialect::EmbeddingOp::name()); + const auto &embedding_3 = pat.Op(paddle::dialect::EmbeddingOp::name()); const auto &add1 = pat.Op(paddle::dialect::AddOp::name()); const auto &add2 = pat.Op(paddle::dialect::AddOp::name()); - const auto &layernorm = - pat.Op(paddle::dialect::LayerNormOp::name(), - {{"epsilon", pat.Attr("epsilon")}, - {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}}); embedding_1({&pat.Tensor("x1"), &pat.Tensor("w1")}, {&pat.Tensor("embedding_1_out")}); @@ -136,7 +141,17 @@ class Fused3EmbeddingEltwiseLayernormPattern {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + + pat.RequireNativeCall([](const paddle::drr::MatchContext &match_ctx) { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + auto w2_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w2")); + auto w3_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w3")); + if (w1_dtype != w2_dtype || w1_dtype != w3_dtype || + (!w1_dtype.isa() && + !w1_dtype.isa())) { + return false; + } + auto x1_shape = pir::GetShapeFromValue(match_ctx.Tensor("x1")); auto x2_shape = pir::GetShapeFromValue(match_ctx.Tensor("x2")); auto x3_shape = pir::GetShapeFromValue(match_ctx.Tensor("x3")); @@ -146,7 +161,7 @@ class Fused3EmbeddingEltwiseLayernormPattern } for (size_t i = 0; i < x1_shape.size(); i++) { if (x1_shape.at(i) != x2_shape.at(i) || - x1_shape.at(i) != x2_shape.at(i)) { + x1_shape.at(i) != x3_shape.at(i)) { return false; } } @@ -162,6 +177,18 @@ class Fused3EmbeddingEltwiseLayernormPattern combine_op_2({&res.Tensor("w1"), &res.Tensor("w2"), &res.Tensor("w3")}, {&res.Tensor("combine2_out")}); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto w1_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w1")); + return paddle::dialect::TransToPhiDataType(w1_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias") = cast_op_1(res.Tensor("bias")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_embedding_eltwise_layernorm_op = res.Op(paddle::dialect::FusedEmbeddingEltwiseLayernormOp::name(), {{ @@ -169,8 +196,8 @@ class Fused3EmbeddingEltwiseLayernormPattern }}); fused_embedding_eltwise_layernorm_op({&res.Tensor("combine1_out"), &res.Tensor("combine2_out"), - &res.Tensor("bias"), - &res.Tensor("scale")}, + &res.Tensor("casted_bias"), + &res.Tensor("casted_scale")}, {&res.Tensor("layernorm_out")}); } }; @@ -193,7 +220,6 @@ class EmbeddingEltwiseLayernormFusePass : public pir::PatternRewritePass { } // namespace namespace pir { - std::unique_ptr CreateFusedEmbeddingEltwiseLayerNormPass() { return std::make_unique(); } diff --git a/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc b/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc index d3e4ed862e741..fa0436d3e5f78 100644 --- a/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/fc_elementwise_layernorm_fuse_pass.cc @@ -36,13 +36,13 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { { {"in_num_col_dims", pat.Attr("in_num_col_dims")}, {"activation_type", pat.Attr("activation_type")}, - {"padding_weights", pat.Attr("padding_weights")}, }); const auto &add = pat.Op(paddle::dialect::AddOp::name()); const auto &layernorm = pat.Op(paddle::dialect::LayerNormOp::name(), {{"epsilon", pat.Attr("epsilon")}, {"begin_norm_axis", pat.Attr("begin_norm_axis")}}); + fc({&pat.Tensor("x"), &pat.Tensor("w"), &pat.Tensor("bias0")}, {&pat.Tensor("fc_out")}); add({&pat.Tensor("fc_out"), &pat.Tensor("y")}, {&pat.Tensor("add_out")}); @@ -51,8 +51,14 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { {&pat.Tensor("layernorm_out"), &pat.Tensor("layernorm_mean"), &pat.Tensor("layernorm_variance")}); - // Constrains the activation is none + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + if (!x_dtype.isa() && + !x_dtype.isa()) { + return false; + } + int64_t layer_norm_x = 1; auto fc_out_dims = pir::GetShapeFromValue(match_ctx.Tensor("fc_out")); auto w_dims = pir::GetShapeFromValue(match_ctx.Tensor("w")); @@ -68,6 +74,18 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { }); paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &cast_op_dtype = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> phi::DataType { + auto x_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("x")); + return paddle::dialect::TransToPhiDataType(x_dtype); + }); + const auto &cast_op_1 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_bias1") = cast_op_1(res.Tensor("bias1")); + const auto &cast_op_2 = + res.Op(paddle::dialect::CastOp::name(), {{"dtype", cast_op_dtype}}); + res.Tensor("casted_scale") = cast_op_2(res.Tensor("scale")); + const auto &fused_fc_elementwise_op = res.Op(paddle::dialect::FusedFcElementwiseLayernormOp::name(), {{ @@ -80,8 +98,8 @@ class FcElementwiseLayerNormFusePattern : public paddle::drr::DrrPatternBase { &res.Tensor("w"), &res.Tensor("y"), &res.Tensor("bias0"), - &res.Tensor("scale"), - &res.Tensor("bias1")}, + &res.Tensor("casted_scale"), + &res.Tensor("casted_bias1")}, {&res.Tensor("layernorm_out"), &res.Tensor("layernorm_mean"), &res.Tensor("layernorm_variance")}); diff --git a/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc new file mode 100644 index 0000000000000..440aeee5f3ac5 --- /dev/null +++ b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.cc @@ -0,0 +1,489 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class FlashAttnPatternQscale : public paddle::drr::DrrPatternBase { + private: + bool softmax_with_cast_; + + public: + explicit FlashAttnPatternQscale(bool softmax_with_cast) + : softmax_with_cast_(softmax_with_cast) {} + + std::string name() const override { return "FlashAttnPatternQscale"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // check the transpose + // q[b, s, head, head_dim] -> transpose -> q[b, head, s, head_dim] -> scale + const auto &transpose_q = src.Op("pd_op.transpose"); + src.Tensor("q_transpose_out") = transpose_q(src.Tensor("q")); + // scale before matmul + const auto &scale_q = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_q_value")}}); + src.Tensor("q_scale_out") = + scale_q(src.Tensor("q_transpose_out"), full_scale()); + // k[b, s, head, head_dim] -> transpose -> k[b, head, s, head_dim] + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + const auto &transpose_k2 = src.Op("pd_op.transpose"); + src.Tensor("k_transpose2_out") = + transpose_k2(src.Tensor("k_transpose_out")); + // v[b, s, head, head_dim] -> transpose -> v[b, head, s, head_dim] + const auto &transpose_v = src.Op("pd_op.transpose"); + src.Tensor("v_transpose_out") = transpose_v(src.Tensor("v")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = + qk_matmul(src.Tensor("q_scale_out"), src.Tensor("k_transpose2_out")); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_out"), src.Tensor("mask")); + + if (softmax_with_cast_) { + // cast + softmax + cast + const auto &softmax_cast1 = src.Op("pd_op.cast"); + src.Tensor("softmax_cast1_out") = + softmax_cast1(src.Tensor("mask_add_out")); + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_cast2_in") = softmax(src.Tensor("softmax_cast1_out")); + const auto &softmax_cast2 = src.Op("pd_op.cast"); + src.Tensor("softmax_out") = softmax_cast2(src.Tensor("softmax_cast2_in")); + } else { + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + } + + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = context_matmul( + src.Tensor("softmax_out"), src.Tensor("v_transpose_out")); + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("q_transpose_out")); + auto k_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("k_transpose_out")); + auto v_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("v_transpose_out")); + if (q_transpose_out.size() != 4 || k_transpose_out.size() != 4 || + v_transpose_out.size() != 4 || + !(q_transpose_out.at(0) == k_transpose_out.at(0) && + k_transpose_out.at(0) == v_transpose_out.at(0)) || + !(q_transpose_out.at(1) == k_transpose_out.at(1) && + k_transpose_out.at(1) == v_transpose_out.at(1)) || + !(q_transpose_out.at(3) == k_transpose_out.at(3) && + k_transpose_out.at(3) == v_transpose_out.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q"), + &res.Tensor("k"), + &res.Tensor("v"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +// 1. scale after matmul +// 2. cast before and after softmax +class FlashAttnPatternOutscale : public paddle::drr::DrrPatternBase { + private: + bool softmax_with_cast_; + + public: + explicit FlashAttnPatternOutscale(bool softmax_with_cast) + : softmax_with_cast_(softmax_with_cast) {} + + public: + std::string name() const override { return "FlashAttnPatternOutscale"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // check the transpose, + // q[b, s, head, head_dim] -> transpose -> q[b, head, s, head_dim] -> scale + const auto &transpose_q = src.Op("pd_op.transpose"); + src.Tensor("q_transpose_out") = transpose_q(src.Tensor("q")); + // k[b, s, head, head_dim] -> transpose -> k[b, head, s, head_dim] + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + const auto &transpose_k2 = src.Op("pd_op.transpose"); + src.Tensor("k_transpose2_out") = + transpose_k2(src.Tensor("k_transpose_out")); + // v[b, s, head, head_dim] -> transpose -> v[b, head, s, head_dim] + const auto &transpose_v = src.Op("pd_op.transpose"); + src.Tensor("v_transpose_out") = transpose_v(src.Tensor("v")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = qk_matmul(src.Tensor("q_transpose_out"), + src.Tensor("k_transpose2_out")); + const auto &scale_out = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_out_value")}}); + src.Tensor("qk_scale_out") = scale_out(src.Tensor("qk_out"), full_scale()); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_scale_out"), src.Tensor("mask")); + + if (softmax_with_cast_) { + // cast + softmax + cast + const auto &softmax_cast1 = src.Op("pd_op.cast"); + src.Tensor("softmax_cast1_out") = + softmax_cast1(src.Tensor("mask_add_out")); + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_cast2_in") = softmax(src.Tensor("softmax_cast1_out")); + const auto &softmax_cast2 = src.Op("pd_op.cast"); + src.Tensor("softmax_out") = softmax_cast2(src.Tensor("softmax_cast2_in")); + } else { + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + } + + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = context_matmul( + src.Tensor("softmax_out"), src.Tensor("v_transpose_out")); + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("q_transpose_out")); + auto k_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("k_transpose_out")); + auto v_transpose_out = + pir::GetShapeFromValue(match_ctx.Tensor("v_transpose_out")); + if (q_transpose_out.size() != 4 || k_transpose_out.size() != 4 || + v_transpose_out.size() != 4 || + !(q_transpose_out.at(0) == k_transpose_out.at(0) && + k_transpose_out.at(0) == v_transpose_out.at(0)) || + !(q_transpose_out.at(1) == k_transpose_out.at(1) && + k_transpose_out.at(1) == v_transpose_out.at(1)) || + !(q_transpose_out.at(3) == k_transpose_out.at(3) && + k_transpose_out.at(3) == v_transpose_out.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q"), + &res.Tensor("k"), + &res.Tensor("v"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +// slice qkv +class TransposeSliceFlashAttnPattern : public paddle::drr::DrrPatternBase { + public: + std::string name() const override { return "TransposeSliceFlashAttnPattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern src = ctx->SourcePattern(); + // transpose + const auto &transpose_qkv = + src.Op("pd_op.transpose", {{"perm", src.Attr("perm")}}); + src.Tensor("qkv_transpose") = transpose_qkv(src.Tensor("qkv")); + // slice q -> [b, head, s, head_dim] + const auto &slice_q = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_q")}, + {"infer_flags", src.Attr("infer_flags_q")}, + {"decrease_axis", src.Attr("decrease_axis_q")}}); + const auto &full_int_array_q1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_q2 = src.Op("pd_op.full_int_array"); + src.Tensor("q") = slice_q( + src.Tensor("qkv_transpose"), full_int_array_q1(), full_int_array_q2()); + // slice k -> [b, head, s, head_dim] + const auto &slice_k = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_k")}, + {"infer_flags", src.Attr("infer_flags_k")}, + {"decrease_axis", src.Attr("decrease_axis_k")}}); + const auto &full_int_array_k1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_k2 = src.Op("pd_op.full_int_array"); + src.Tensor("k") = slice_k( + src.Tensor("qkv_transpose"), full_int_array_k1(), full_int_array_k2()); + // slice v -> [b, head, s, head_dim] + const auto &slice_v = + src.Op(paddle::dialect::SliceOp::name(), + {{"axes", src.Attr("axes_v")}, + {"infer_flags", src.Attr("infer_flags_v")}, + {"decrease_axis", src.Attr("decrease_axis_v")}}); + const auto &full_int_array_v1 = src.Op("pd_op.full_int_array"); + const auto &full_int_array_v2 = src.Op("pd_op.full_int_array"); + src.Tensor("v") = slice_v( + src.Tensor("qkv_transpose"), full_int_array_v1(), full_int_array_v2()); + + // k[b, head, s, head_dim] -> transpose -> k[b, head, head_dim, s] + const auto &transpose_k = src.Op("pd_op.transpose"); + src.Tensor("k_transpose_out") = transpose_k(src.Tensor("k")); + // qk + const auto &qk_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("matmul_qk_transpose_x")}, + {"transpose_y", src.Attr("matmul_qk_transpose_y")}}); + src.Tensor("qk_out") = + qk_matmul(src.Tensor("q"), src.Tensor("k_transpose_out")); + // scale + const auto &scale_out = src.Op("pd_op.scale"); + const auto &full_scale = + src.Op("pd_op.full", {{"value", src.Attr("scale_out_value")}}); + src.Tensor("qk_scale_out") = scale_out(src.Tensor("qk_out"), full_scale()); + + // mask + const auto &mask_add = src.Op("pd_op.add"); + src.Tensor("mask_add_out") = + mask_add(src.Tensor("qk_scale_out"), src.Tensor("mask")); + + // softmax + const auto &softmax = + src.Op("pd_op.softmax", {{"axis", src.Attr("softmax_axis")}}); + src.Tensor("softmax_out") = softmax(src.Tensor("mask_add_out")); + // o + const auto &context_matmul = + src.Op("pd_op.matmul", + {{"transpose_x", src.Attr("context_matmul_transpose_x")}, + {"transpose_y", src.Attr("context_matmul_transpose_y")}}); + src.Tensor("context_matmul_out") = + context_matmul(src.Tensor("softmax_out"), src.Tensor("v")); + // [b, head, s, head_dim] -> [b, s, head, head_dim] + const auto &o_transpose = src.Op("pd_op.transpose"); + src.Tensor("out") = o_transpose(src.Tensor("context_matmul_out")); + + // Constraints + src.RequireNativeCall( + [](const paddle::drr::MatchContext &match_ctx) -> bool { + auto q_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("q")); + if (!q_dtype.isa() && + !q_dtype.isa()) { + return false; + } + // softmax + const auto &softmax_axis = match_ctx.Attr("softmax_axis"); + if (softmax_axis != -1 && softmax_axis != 3) return false; + // matmul transpose + bool matmul_qk_transpose_x = + match_ctx.Attr("matmul_qk_transpose_x"); + bool matmul_qk_transpose_y = + match_ctx.Attr("matmul_qk_transpose_y"); + if (matmul_qk_transpose_x || matmul_qk_transpose_y) return false; + + bool matmul_o_transpose_x = + match_ctx.Attr("context_matmul_transpose_x"); + bool matmul_o_transpose_y = + match_ctx.Attr("context_matmul_transpose_y"); + if (matmul_o_transpose_x || matmul_o_transpose_y) return false; + // tensor shape + auto q = pir::GetShapeFromValue(match_ctx.Tensor("q")); + auto k = pir::GetShapeFromValue(match_ctx.Tensor("k")); + auto v = pir::GetShapeFromValue(match_ctx.Tensor("v")); + if (q.size() != 4 || k.size() != 4 || v.size() != 4 || + !(q.at(0) == k.at(0) && k.at(0) == v.at(0)) || + !(q.at(1) == k.at(1) && k.at(1) == v.at(1)) || + !(q.at(3) == k.at(3) && k.at(3) == v.at(3))) { + return false; + } + // mask's shape [bs, 1, seq_len, seq_len] + auto mask_add = pir::GetShapeFromValue(match_ctx.Tensor("mask")); + if (mask_add.size() != 4 || mask_add.at(1) != 1) { + return false; + } + + return true; + }); + + // + // Result Pattern. + // + paddle::drr::ResultPattern res = src.ResultPattern(); + // [b, head, seq_len, head_dim] -> [b, seq_len, head, head_dim] + const auto &q_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("q_transpose") = q_transpose(res.Tensor("q")); + const auto &k_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("k_transpose") = k_transpose(res.Tensor("k")); + const auto &v_transpose = res.Op( + "pd_op.transpose", {{"perm", res.VectorInt32Attr({0, 2, 1, 3})}}); + res.Tensor("v_transpose") = v_transpose(res.Tensor("v")); + + const auto &flash_attn = res.Op("pd_op.flash_attn", + {{{"dropout", res.Float32Attr(0.0)}, + {"causal", res.BoolAttr(false)}, + {"return_softmax", res.BoolAttr(false)}, + {"is_test", res.BoolAttr(true)}, + {"rng_name", res.StrAttr("")}}}); + flash_attn({&res.Tensor("q_transpose"), + &res.Tensor("k_transpose"), + &res.Tensor("v_transpose"), + &res.InputNoneTensor(), + &res.Tensor("mask")}, + {&res.Tensor("out"), + &res.Tensor("softmax"), + &res.Tensor("softmax_lse"), + &res.Tensor("seed_offset")}); + } +}; + +class FusedFlashAttnPass : public pir::PatternRewritePass { + public: + FusedFlashAttnPass() : pir::PatternRewritePass("fused_flash_attn_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context, true)); + ps.Add(paddle::drr::Create(context, false)); + ps.Add(paddle::drr::Create(context, true)); + ps.Add(paddle::drr::Create(context, false)); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { +std::unique_ptr CreateFusedFlashAttnPass() { + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(fused_flash_attn_pass, FusedFlashAttnPass); diff --git a/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h new file mode 100644 index 0000000000000..14183174760bc --- /dev/null +++ b/paddle/fluid/pir/transforms/gpu/fused_flash_attn_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateFusedFlashAttnPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc b/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc index e9b522ce85189..db41a0d5cb78a 100644 --- a/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc +++ b/paddle/fluid/pir/transforms/gpu/fused_weight_only_linear_pass.cc @@ -31,7 +31,7 @@ int getSMVersion() { sm_version = paddle::platform::GetGPUComputeCapability( paddle::platform::GetCurrentDeviceId()); #else - PADDLE_THROW(paddle::platform::errors::Unavailable( + PADDLE_THROW(common::errors::Unavailable( "fused_weight_only_linear_pass needs paddle compiled with CUDA.")); #endif return sm_version; @@ -40,11 +40,15 @@ int getSMVersion() { class FusedWeightOnlyLinearWithBiasPattern : public paddle::drr::DrrPatternBase { private: - bool reverse_; + bool reverse_add_; + std::string algo_; + int sm_version_; public: - explicit FusedWeightOnlyLinearWithBiasPattern(bool reverse) - : reverse_(reverse) {} + FusedWeightOnlyLinearWithBiasPattern(bool reverse_add, + const std::string &algo, + int sm_version) + : reverse_add_(reverse_add), algo_(algo), sm_version_(sm_version) {} std::string name() const override { return "FusedWeightOnlyLinearWithBiasPattern"; @@ -65,8 +69,8 @@ class FusedWeightOnlyLinearWithBiasPattern const auto &add = src.Op(paddle::dialect::AddOp::name()); src.Tensor("add_out") = - reverse_ ? add(src.Tensor("matmul_out"), src.Tensor("bias")) - : add(src.Tensor("bias"), src.Tensor("matmul_out")); + reverse_add_ ? add(src.Tensor("matmul_out"), src.Tensor("bias")) + : add(src.Tensor("bias"), src.Tensor("matmul_out")); // // Constraints. @@ -80,21 +84,21 @@ class FusedWeightOnlyLinearWithBiasPattern bool matmul_trans_y = match_ctx.Attr("matmul_transpose_y"); if (matmul_trans_x || matmul_trans_y) return false; + auto w_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w")); + if (!w_dtype.isa() && + !w_dtype.isa()) { + return false; + } + auto w_dims = pir::GetShapeFromValue(match_ctx.Tensor("w")); auto x_dims = pir::GetShapeFromValue(match_ctx.Tensor("x")); auto bias_dims = pir::GetShapeFromValue(match_ctx.Tensor("bias")); if (!(w_dims.size() == 2 && x_dims.size() >= 2 && - bias_dims.size() == x_dims.size())) { + bias_dims.size() == 1)) { return false; } if (w_dims.at(0) % 64 != 0 || w_dims.at(1) % 16 != 0) return false; - - auto w_dtype = pir::GetDataTypeFromValue(match_ctx.Tensor("w")); - if (!w_dtype.isa() && - !w_dtype.isa()) - return false; - if (x_dims.at(x_dims.size() - 1) != w_dims.at(0)) return false; return true; @@ -104,19 +108,49 @@ class FusedWeightOnlyLinearWithBiasPattern // paddle::drr::ResultPattern res = src.ResultPattern(); - const auto &weight_quantize = - res.Op(paddle::dialect::WeightQuantizeOp::name(), - {{"algo", res.StrAttr("weight_only_int8")}, - {"arch", res.Int32Attr(getSMVersion())}, - {"group_size", res.Int32Attr(-1)}}); - weight_quantize({&res.Tensor("w")}, - {&res.Tensor("quanted_weight_tensor"), - &res.Tensor("weight_scale_tensor")}); + if (algo_ == "weight_only_int4") { + // TODO(liuyuanle): When the operator weight_quantize supports + // weight_only_int4 on gpu version, delete the memory copy. + const auto &memcpy_d2h = + res.Op(paddle::dialect::MemcpyD2hOp::name(), + {{"dst_place_type", res.Int32Attr(0 /*cpu*/)}}); + res.Tensor("w_cpu") = memcpy_d2h(res.Tensor("w")); + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + weight_quantize({&res.Tensor("w_cpu")}, + {&res.Tensor("quanted_weight_tensor_cpu"), + &res.Tensor("weight_scale_tensor_cpu")}); + + const auto &memcpy_h2d_1 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("quanted_weight_tensor") = + memcpy_h2d_1(res.Tensor("quanted_weight_tensor_cpu")); + const auto &memcpy_h2d_2 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("weight_scale_tensor") = + memcpy_h2d_2(res.Tensor("weight_scale_tensor_cpu")); + } else { + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + + weight_quantize({&res.Tensor("w")}, + {&res.Tensor("quanted_weight_tensor"), + &res.Tensor("weight_scale_tensor")}); + } const auto &weight_only_linear = res.Op(paddle::dialect::WeightOnlyLinearOp::name(), - {{"weight_dtype", res.StrAttr("int8")}, - {"arch", res.Int32Attr(getSMVersion())}, + {{"weight_dtype", + res.StrAttr(algo_ == "weight_only_int8" ? "int8" : "int4")}, + {"arch", res.Int32Attr(sm_version_)}, {"group_size", res.Int32Attr(-1)}}); weight_only_linear({&res.Tensor("x"), &res.Tensor("quanted_weight_tensor"), @@ -127,6 +161,14 @@ class FusedWeightOnlyLinearWithBiasPattern }; class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { + private: + std::string algo_; + int sm_version_; + + public: + FusedWeightOnlyLinearNoBiasPattern(const std::string &algo, int sm_version) + : algo_(algo), sm_version_(sm_version) {} + public: std::string name() const override { return "FusedWeightOnlyLinearNoBiasPattern"; @@ -179,19 +221,48 @@ class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { // paddle::drr::ResultPattern res = src.ResultPattern(); - const auto &weight_quantize = - res.Op(paddle::dialect::WeightQuantizeOp::name(), - {{"algo", res.StrAttr("weight_only_int8")}, - {"arch", res.Int32Attr(getSMVersion())}, - {"group_size", res.Int32Attr(-1)}}); - weight_quantize({&res.Tensor("w")}, - {&res.Tensor("quanted_weight_tensor"), - &res.Tensor("weight_scale_tensor")}); - + if (algo_ == "weight_only_int4") { + // TODO(liuyuanle): When the operator weight_quantize supports + // weight_only_int4 on gpu version, delete the memory copy. + const auto &memcpy_d2h = + res.Op(paddle::dialect::MemcpyD2hOp::name(), + {{"dst_place_type", res.Int32Attr(0 /*cpu*/)}}); + res.Tensor("w_cpu") = memcpy_d2h(res.Tensor("w")); + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + weight_quantize({&res.Tensor("w_cpu")}, + {&res.Tensor("quanted_weight_tensor_cpu"), + &res.Tensor("weight_scale_tensor_cpu")}); + + const auto &memcpy_h2d_1 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("quanted_weight_tensor") = + memcpy_h2d_1(res.Tensor("quanted_weight_tensor_cpu")); + const auto &memcpy_h2d_2 = + res.Op(paddle::dialect::MemcpyH2dOp::name(), + {{"dst_place_type", res.Int32Attr(1 /*gpu*/)}}); + res.Tensor("weight_scale_tensor") = + memcpy_h2d_2(res.Tensor("weight_scale_tensor_cpu")); + } else { + const auto &weight_quantize = + res.Op(paddle::dialect::WeightQuantizeOp::name(), + {{"algo", res.StrAttr(algo_)}, + {"arch", res.Int32Attr(sm_version_)}, + {"group_size", res.Int32Attr(-1)}}); + + weight_quantize({&res.Tensor("w")}, + {&res.Tensor("quanted_weight_tensor"), + &res.Tensor("weight_scale_tensor")}); + } const auto &weight_only_linear = res.Op(paddle::dialect::WeightOnlyLinearOp::name(), - {{"weight_dtype", res.StrAttr("int8")}, - {"arch", res.Int32Attr(getSMVersion())}, + {{"weight_dtype", + res.StrAttr(algo_ == "weight_only_int8" ? "int8" : "int4")}, + {"arch", res.Int32Attr(sm_version_)}, {"group_size", res.Int32Attr(-1)}}); weight_only_linear({&res.Tensor("x"), &res.Tensor("quanted_weight_tensor"), @@ -204,15 +275,28 @@ class FusedWeightOnlyLinearNoBiasPattern : public paddle::drr::DrrPatternBase { class FusedWeightOnlyLinearPass : public pir::PatternRewritePass { public: FusedWeightOnlyLinearPass() - : pir::PatternRewritePass("fused_weight_only_linear_pass", 4) {} + : pir::PatternRewritePass("fused_weight_only_linear_pass", 4), + sm_version_(getSMVersion()) {} pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + std::string algo = "weight_only_int4"; + if (Has("weight_only_algo")) { + algo = Get("weight_only_algo"); + } + PADDLE_ENFORCE_EQ(algo == "weight_only_int8" || algo == "weight_only_int4", + true, + common::errors::InvalidArgument( + "fused_weight_only_linear_pass only support " + "weight_only_int8 or weight_only_int4, but get %s.", + algo)); + pir::RewritePatternSet ps(context); - ps.Add(paddle::drr::Create(context, - true)); - ps.Add(paddle::drr::Create(context, - false)); - ps.Add(paddle::drr::Create(context)); + ps.Add(paddle::drr::Create( + context, true, algo, sm_version_)); + ps.Add(paddle::drr::Create( + context, false, algo, sm_version_)); + ps.Add(paddle::drr::Create( + context, algo, sm_version_)); return ps; } @@ -228,15 +312,15 @@ class FusedWeightOnlyLinearPass : public pir::PatternRewritePass { } bool CanApplyOn(pir::Operation *op) const override { - int sm_version = getSMVersion(); - if (sm_version != 70 && sm_version != 75 && sm_version != 80 && - sm_version != 86) { + if (sm_version_ != 70 && sm_version_ != 75 && sm_version_ != 80 && + sm_version_ != 86) { return false; } return op->num_regions() > 0; } private: + int sm_version_; pir::FrozenRewritePatternSet patterns_; }; diff --git a/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..2c715ab9b437c --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.cc @@ -0,0 +1,574 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class ConvActivationFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t activation_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ > 0 : conv2d + bias + activation + : conv2d + residual + activation + : conv2d + + bias + residual + activation + */ + const int fused_level_; + + public: + ConvActivationFusePattern(size_t activation_count, + const std::string &activation_name, + int fused_level) + : activation_count_(activation_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(fused_level_) + activation_name_ + + "FusePattern"; + } + + uint32_t benefit() const override { return activation_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + std::string activation_name_op = "pd_op." + activation_name_; + if (activation_name_ == "hard_swish") { + // oneDNN use hard_swish, paddle use hardswish + activation_name_op = "pd_op.hardswish"; + } else if (activation_name_ == "hard_sigmoid") { + activation_name_op = "pd_op.hardsigmoid"; + } + + std::unordered_map act_attrs; + if (activation_name_op == paddle::dialect::HardsigmoidOp::name()) { + act_attrs.emplace("slope", pat.Attr("slope")); + act_attrs.emplace("offset", pat.Attr("offset")); + } else if (activation_name_op == paddle::dialect::LeakyReluOp::name()) { + act_attrs.emplace("negative_slope", pat.Attr("negative_slope")); + } else if (activation_name_op == paddle::dialect::GeluOp::name()) { + act_attrs.emplace("approximate", pat.Attr("approximate")); + } + const auto &activation = pat.Op(activation_name_op, act_attrs); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + pat.Tensor("act_out") = activation(pat.Tensor("conv2d_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (activation_name_ == "leaky_relu") { + float negative_slope = match_ctx.Attr("negative_slope"); + // leaky relu alpha is a positive number + if (negative_slope <= 0.0) { + return false; + } + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + auto fuse_beta = res.Float32Attr(0.0f); + auto fuse_alpha = res.Float32Attr(0.0f); + if (activation_name_ == "relu6") { + fuse_beta = res.Float32Attr(6.0f); + } else if (activation_name_ == "hard_swish") { + // hard swish have not attr float threshold = 6.0f, float scale = 6.0f, + // float offset = 3.0f attr But in previous implementation hard swish, + // fuse_alpha=1.f / 6.f, fuse_beta=1.f / 2.f, it has fixed + fuse_beta = res.Float32Attr(1.f / 2.f); + fuse_alpha = res.Float32Attr(1.f / 6.f); + } else if (activation_name_ == "swish") { + fuse_alpha = res.Float32Attr(1.0f); + } else if (activation_name_ == "leaky_relu") { + fuse_alpha = pat.Attr("negative_slope"); + } else if (activation_name_ == "hard_sigmoid") { + fuse_alpha = pat.Attr("slope"); + fuse_beta = pat.Attr("offset"); + } + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvGeluFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string activation_name_; + const int fused_level_; + + public: + ConvGeluFusePattern(const std::string &activation_name, int fused_level) + : activation_name_(activation_name), fused_level_(fused_level) {} + + std::string name() const override { return "ConvGeluFusePattern"; } + + uint32_t benefit() const override { return fused_level_ + 1; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + const auto &activation = + pat.Op(activation_name_, {{"approximate", pat.Attr("approximate")}}); + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + + pat.Tensor("act_out") = activation(pat.Tensor("conv2d_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + paddle::drr::ResultPattern res = pat.ResultPattern(); + const auto &gelu = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::string { + bool approximate = match_ctx.Attr("approximate"); + if (approximate) return "gelu_tanh"; + return "gelu_erf"; + }); + auto fuse_residual = res.BoolAttr(false); + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvClipFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string activation_name_; + const int fused_level_; + + public: + ConvClipFusePattern(const std::string &activation_name, int fused_level) + : activation_name_(activation_name), fused_level_(fused_level) {} + + std::string name() const override { return "ConvClipFusePattern"; } + + uint32_t benefit() const override { return fused_level_ + 1; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + const auto &full_1 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_1_value")}}); + const auto &full_2 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_2_value")}}); + pat.Tensor("min") = full_1(); + pat.Tensor("max") = full_2(); + const auto &conv = + fused_level_ == 0 + ? pat.Op(conv_name, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}) + : pat.Op(conv_name, + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + const auto &activation = pat.Op(activation_name_); + if (fused_level_ > 0) { + conv({&pat.Tensor("input"), + &pat.Tensor("filter"), + &pat.Tensor("bias"), + &pat.Tensor("residual_param")}, + {&pat.Tensor("conv2d_out")}); + + } else { + conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv2d_out")}); + } + pat.Tensor("act_out") = activation( + pat.Tensor("conv2d_out"), pat.Tensor("min"), pat.Tensor("max")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &fused_conv = + fused_level_ == 0 + ? res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op(paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", pat.Attr("scale_in")}, + {"scale_out", pat.Attr("scale_out")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_weights", pat.Attr("scale_weights")}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.Tensor("bias"), + &res.Tensor("residual_param")}, + {&res.Tensor("act_out")}); + } else { + fused_conv({&res.Tensor("input"), + &res.Tensor("filter"), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out")}); + } + } +}; + +class ConvActFusePass : public pir::PatternRewritePass { + public: + ConvActFusePass() + : pir::PatternRewritePass("conv_activation_mkldnn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + + // This eleven activations have no extra attribute, can use the same pattern + std::vector supported_activations_name = {"abs", + "sqrt", + "mish", + "relu", + "sigmoid", + "tanh", + "relu6", + "hard_swish", + "swish", + "leaky_relu", + "hard_sigmoid"}; + + size_t pattern_num = 1; + // conv + activation -> fused_conv2d + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, pattern_num, activation, 0)); + pattern_num++; + } + + // conv + bias(residual / residual + bias) + // -> fused_conv2d + activation -> fused_conv2d + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, pattern_num, activation, 1)); + pattern_num++; + } + + ps.Add(paddle::drr::Create( + context, paddle::dialect::GeluOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, paddle::dialect::GeluOp::name(), 1)); + + ps.Add(paddle::drr::Create( + context, paddle::dialect::ClipOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, paddle::dialect::ClipOp::name(), 1)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateConv2dActFusePass() { + /** + * conv + * | -> fused_conv + * activation + * + * fused_conv2d (bias or residual) + * | -> fused_conv2d + * activation + */ + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(conv_activation_onednn_fuse_pass, ConvActFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h new file mode 100644 index 0000000000000..520449bbd028e --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_activation_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateConv2dActFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..5f2da932bb2af --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.cc @@ -0,0 +1,1039 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class NConvConcatActivationFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + const int benefit_; + + public: + NConvConcatActivationFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level, + int benefit) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level), + benefit_(benefit) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + activation_name_ + "Pattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + std::string activation_name_op = "pd_op." + activation_name_; + if (activation_name_ == "hard_swish") { + // oneDNN use hard_swish, paddle use hardswish + activation_name_op = "pd_op.hardswish"; + } + const auto &activation = + activation_name_op != "pd_op.leaky_relu" + ? pat.Op(activation_name_op) + : pat.Op(activation_name_op, + {{"negative_slope", pat.Attr("negative_slope")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (activation_name_ == "leaky_relu") { + float negative_slope = match_ctx.Attr("negative_slope"); + // leaky relu alpha is a positive number + if (negative_slope <= 0.0) { + return false; + } + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + auto fuse_beta = res.Float32Attr(0.0f); + auto fuse_alpha = res.Float32Attr(0.0f); + if (activation_name_ == "relu6") { + fuse_beta = res.Float32Attr(6.0f); + } else if (activation_name_ == "hard_swish") { + // hard swish have not attr float threshold = 6.0f, float scale = 6.0f, + // float offset = 3.0f attr But in previous implementation hard swish, + // fuse_alpha=1.f / 6.f, fuse_beta=1.f / 2.f, it has fixed + fuse_beta = res.Float32Attr(1.f / 2.f); + fuse_alpha = res.Float32Attr(1.f / 6.f); + } else if (activation_name_ == "swish") { + fuse_alpha = res.Float32Attr(1.0f); + } else if (activation_name_ == "leaky_relu") { + fuse_alpha = pat.Attr("negative_slope"); + } + + std::vector combine_result_in; + // int input_num = 1; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr(activation_name_)}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", fuse_alpha}, + {"fuse_beta", fuse_beta}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatHardSigmoidFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatHardSigmoidFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "HardSigmoidPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &activation = + pat.Op(activation_name_, + {{"slope", pat.Attr("slope")}, {"offset", pat.Attr("offset")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("hard_sigmoid")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("slope")}, + {"fuse_beta", pat.Attr("offset")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr("hard_sigmoid")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", pat.Attr("slope")}, + {"fuse_beta", pat.Attr("offset")}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatGeluFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatGeluFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "GeluPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &activation = + pat.Op(activation_name_, {{"approximate", pat.Attr("approximate")}}); + pat.Tensor("activation_out") = activation(pat.Tensor("concat_out")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + const auto &gelu = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::string { + bool approximate = match_ctx.Attr("approximate"); + if (approximate) return "gelu_tanh"; + return "gelu_erf"; + }); + + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", gelu}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class NConvConcatClipFusePattern : public paddle::drr::DrrPatternBase { + private: + const size_t concat_count_; + std::string activation_name_; + /* + * fused_level_ = 0 : conv2d + activation + fused_level_ = 1 : conv2d + bias + activation + conv2d + residual + activation + conv2d + bias + residual + activation + */ + const int fused_level_; + + public: + NConvConcatClipFusePattern(size_t concat_count, + const std::string &activation_name, + int fused_level) + : concat_count_(concat_count), + activation_name_(activation_name), + fused_level_(fused_level) {} + + std::string name() const override { + return "Conv" + std::to_string(concat_count_) + "Concat" + "Level" + + std::to_string(fused_level_) + "ClipPattern"; + } + + uint32_t benefit() const override { return concat_count_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + std::string conv_name = paddle::dialect::Conv2dOp::name(); + if (fused_level_ > 0) { + conv_name = paddle::onednn::dialect::FusedConv2dOp::name(); + } + + std::vector combine_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &conv = + fused_level_ == 0 + ? pat.Op( + conv_name, + {{"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}}) + : pat.Op( + conv_name, + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", + pat.Attr("fuse_activation" + std::to_string(i))}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", + pat.Attr("fuse_alpha" + std::to_string(i))}, + {"fuse_beta", + pat.Attr("fuse_beta" + std::to_string(i))}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i)), + &pat.Tensor("__@bias" + std::to_string(i) + "@__"), + &pat.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + + } else { + conv({&pat.Tensor("input" + std::to_string(i)), + &pat.Tensor("filter" + std::to_string(i))}, + {&pat.Tensor("conv2d_out_" + std::to_string(i))}); + } + + combine_in.push_back(&pat.Tensor("conv2d_out_" + std::to_string(i))); + } + const auto &combine_op = pat.Op(pir::CombineOp::name()); + const auto &full_op = pat.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + combine_op(combine_in, {&pat.Tensor("combine_out")}); + const auto &concat_op = pat.Op(paddle::dialect::ConcatOp::name()); + concat_op({&pat.Tensor("combine_out"), &full_op()}, + {&pat.Tensor("concat_out")}); + + const auto &full_1 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_1_value")}}); + const auto &full_2 = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("full_2_value")}}); + pat.Tensor("min") = full_1(); + pat.Tensor("max") = full_2(); + + const auto &activation = pat.Op(activation_name_); + + pat.Tensor("activation_out") = activation( + pat.Tensor("concat_out"), pat.Tensor("min"), pat.Tensor("max")); + + if (fused_level_ > 0) { + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto act_type = match_ctx.Attr("fuse_activation"); + if (act_type != "") { + return false; + } + return true; + }); + } + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::vector combine_result_in; + for (size_t i = 1; i <= concat_count_; i++) { + const auto &fused_conv = + fused_level_ == 0 + ? res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", res.BoolAttr(false)}, + {"force_fp32_output", res.BoolAttr(false)}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", res.Float32Attr(1.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(1.0f)}, + {"scale_weights", res.VectorFloatAttr({1.0f})}, + }}) + : res.Op( + paddle::onednn::dialect::FusedConv2dOp::name(), + {{ + {"strides", pat.Attr("strides" + std::to_string(i))}, + {"paddings", pat.Attr("paddings" + std::to_string(i))}, + {"padding_algorithm", + pat.Attr("padding_algorithm" + std::to_string(i))}, + {"dilations", + pat.Attr("dilations" + std::to_string(i))}, + {"groups", pat.Attr("groups" + std::to_string(i))}, + {"data_format", + pat.Attr("data_format" + std::to_string(i))}, + {"mkldnn_data_type", + pat.Attr("mkldnn_data_type" + std::to_string(i))}, + {"fuse_activation", res.StrAttr("clip")}, + {"fuse_residual_connection", + pat.Attr("fuse_residual_connection" + + std::to_string(i))}, + {"force_fp32_output", + pat.Attr("force_fp32_output" + std::to_string(i))}, + {"fuse_alpha", pat.Attr("full_1_value")}, + {"fuse_beta", pat.Attr("full_2_value")}, + {"scale_in", pat.Attr("scale_in" + std::to_string(i))}, + {"scale_out", + pat.Attr("scale_out" + std::to_string(i))}, + {"scale_in_eltwise", + pat.Attr("scale_in_eltwise" + std::to_string(i))}, + {"scale_weights", + pat.Attr("scale_weights" + std::to_string(i))}, + }}); + + if (fused_level_ > 0) { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.Tensor("__@bias" + std::to_string(i) + "@__"), + &res.Tensor("__@residual" + std::to_string(i) + "@__")}, + {&res.Tensor("act_out_" + std::to_string(i))}); + + } else { + fused_conv({&res.Tensor("input" + std::to_string(i)), + &res.Tensor("filter" + std::to_string(i)), + &res.InputNoneTensor(), + &res.InputNoneTensor()}, + {&res.Tensor("act_out_" + std::to_string(i))}); + } + combine_result_in.push_back(&res.Tensor("act_out_" + std::to_string(i))); + } + + const auto &combine = res.Op(pir::CombineOp::name()); + + combine(combine_result_in, {&res.Tensor("combine_result_out")}); + + // const auto &concat_result_op = + // res.Op(paddle::dialect::ConcatOp::name(),{{"axis", res.Int32Attr(0) }}); + const auto &full_result_op = res.Op(paddle::dialect::FullOp::name(), + {{"shape", pat.Attr("shape")}, + {"value", pat.Attr("value")}, + {"dtype", pat.Attr("dtype")}, + {"place", pat.Attr("place")}}); + + const auto &concat_result_op = res.Op(paddle::dialect::ConcatOp::name()); + concat_result_op({&res.Tensor("combine_result_out"), &full_result_op()}, + {&res.Tensor("activation_out")}); + + // concat_result_op(combine_result_in, {&res.Tensor("concat_out")}); + } +}; + +class ConvConcatActFusePass : public pir::PatternRewritePass { + public: + ConvConcatActFusePass() + : pir::PatternRewritePass("conv_concat_activation_mkldnn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + std::vector supported_activations_name = {"abs", + "sqrt", + "mish", + "relu", + "sigmoid", + "tanh", + "relu6", + "hard_swish", + "swish", + "leaky_relu"}; + int benefit = 1; + /** + * To avoid many for loop patterns to reduce efficiency + * We just support 6 conv2d concat now + * And concat in OneDNN with a large number of concat ops + * performance is worse than CPU kernel. + */ + /** + * fused_level 0: conv2d + activation + * 1: fused_conv2d + activation + */ + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + for (auto activation : supported_activations_name) { + ps.Add(paddle::drr::Create( + context, concat_num, activation, 0, benefit++)); + ps.Add(paddle::drr::Create( + context, concat_num, activation, 1, benefit++)); + } + } + + /** + * These activation use separate pattern to avoid to too large of benefit + */ + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::HardsigmoidOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::HardsigmoidOp::name(), 1)); + } + + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::GeluOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::GeluOp::name(), 1)); + } + + for (size_t concat_num = 1; concat_num <= 6; concat_num++) { + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::ClipOp::name(), 0)); + ps.Add(paddle::drr::Create( + context, concat_num, paddle::dialect::ClipOp::name(), 1)); + } + + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateConv2dConcatActFusePass() { + // /** + // * This pass must execution before conv_activation_mkldnn_fuse_pass + // * conv conv conv conv conv conv fused_conv + // fused_conv fused_conv + // * \ / ... | | ... \ / + // ..... + // * concat -> act act -> concat + // * | \ / + // * act concat + // */ + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(conv_concat_activation_onednn_fuse_pass, + ConvConcatActFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h new file mode 100644 index 0000000000000..972d594569684 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/conv_concat_activation_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateConv2dConcatActFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc similarity index 99% rename from paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc index 4ecd752b85997..c367712927dcc 100644 --- a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h" #include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" @@ -385,7 +385,7 @@ class FusedConvBiasElementwiseAddAsYPattern class ConvElementwiseAddFusePass : public pir::PatternRewritePass { public: ConvElementwiseAddFusePass() - : pir::PatternRewritePass("conv_elementwise_add_mkldnn_fuse_pass", 3) {} + : pir::PatternRewritePass("conv_elementwise_add_onednn_fuse_pass", 3) {} pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { pir::RewritePatternSet ps(context); @@ -421,5 +421,5 @@ std::unique_ptr CreateConvElementwiseAddFusePass() { } // namespace pir -REGISTER_IR_PASS(conv_elementwise_add_mkldnn_fuse_pass, +REGISTER_IR_PASS(conv_elementwise_add_onednn_fuse_pass, ConvElementwiseAddFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/pir/transforms/onednn/conv_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/pir/transforms/onednn/conv_elementwise_add_onednn_fuse_pass.h diff --git a/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc new file mode 100644 index 0000000000000..5b89ac9a1f0f7 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class DepthwiseConvPattern : public paddle::drr::DrrPatternBase { + private: + std::string depthwise_conv_name_; + + public: + explicit DepthwiseConvPattern(const std::string &conv_name) + : depthwise_conv_name_(conv_name) {} + + std::string name() const override { return "DepthwiseConvPattern"; } + + uint32_t benefit() const override { return 2; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &depthwise_conv = + pat.Op(depthwise_conv_name_, + {{"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}}); + + depthwise_conv({&pat.Tensor("input"), &pat.Tensor("filter")}, + {&pat.Tensor("conv_out")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + std::set padding_algorithm = {"EXPLICIT", "SAME", "VALID"}; + std::set data_format = {"NCHW", "NHWC", "AnyLayout"}; + if (padding_algorithm.count( + match_ctx.Attr("padding_algorithm")) == 0 || + data_format.count(match_ctx.Attr("data_format")) == 0 || + match_ctx.Attr("groups") < 1) { + return false; + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &conv2d = + res.Op(paddle::dialect::Conv2dOp::name(), + {{ + {"strides", pat.Attr("strides")}, + {"paddings", pat.Attr("paddings")}, + {"padding_algorithm", pat.Attr("padding_algorithm")}, + {"dilations", pat.Attr("dilations")}, + {"groups", pat.Attr("groups")}, + {"data_format", pat.Attr("data_format")}, + }}); + + conv2d({&res.Tensor("input"), &res.Tensor("filter")}, + {&res.Tensor("conv_out")}); + } +}; + +class DepthwiseConvMKLDNNPass : public pir::PatternRewritePass { + public: + DepthwiseConvMKLDNNPass() + : pir::PatternRewritePass("depthwise_conv_mkldnn_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create( + context, paddle::dialect::DepthwiseConv2dOp::name())); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateDepthwiseConvMKLDNNPass() { + // pd_op.depthwise_conv -> pd_op.conv2d + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(depthwise_conv_onednn_pass, DepthwiseConvMKLDNNPass); diff --git a/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h new file mode 100644 index 0000000000000..9f91993ce8dbe --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/depthwise_conv_onednn_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateDepthwiseConvMKLDNNPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc index 1db28281578d4..45f182c955f16 100644 --- a/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/matmul_activation_fuse_pass.cc @@ -92,16 +92,6 @@ class MatmulActivationFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - if (act_type_ == paddle::dialect::GeluOp::name()) { pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); @@ -187,15 +177,6 @@ class MatmulGeluTanhFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); if (!result_gelu) return false; @@ -272,16 +253,6 @@ class MatmulClipFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out"), pat.Tensor("min"), pat.Tensor("max")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - paddle::drr::ResultPattern res = pat.ResultPattern(); std::unordered_map fused_attrs{ @@ -375,16 +346,11 @@ class FusedMatmulActivationFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); + if (act_type_ == paddle::dialect::GeluOp::name()) { pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); @@ -490,16 +456,11 @@ class FusedMatmulGeluTanhFusePattern : public paddle::drr::DrrPatternBase { pat.Tensor("act_out") = act(pat.Tensor("Out")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { auto result_gelu = match_ctx.Attr("approximate"); if (!result_gelu) return false; @@ -597,14 +558,8 @@ class FusedMatmulClipFusePattern : public paddle::drr::DrrPatternBase { act(pat.Tensor("Out"), pat.Tensor("min"), pat.Tensor("max")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); auto act_type = match_ctx.Attr("fuse_activation"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0 || - act_type != "") { - return false; - } + if (act_type != "") return false; return true; }); @@ -645,7 +600,6 @@ class MatmulActivationFusePass : public pir::PatternRewritePass { pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { pir::RewritePatternSet ps(context); - // std::vector bool_set = {false, true}; int benefit_idx = 1; for (auto act_op : act_ops) { ps.Add(paddle::drr::Create( diff --git a/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc index e4ebc7d79378e..91ce0f80018c5 100644 --- a/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/onednn/matmul_elementwise_add_fuse_pass.cc @@ -59,16 +59,6 @@ class MatmulElementwiseAddFusePattern : public paddle::drr::DrrPatternBase { as_x_ ? add(pat.Tensor("Out"), pat.Tensor("residual")) : add(pat.Tensor("residual"), pat.Tensor("Out")); - pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { - return false; - } - return true; - }); - paddle::drr::ResultPattern res = pat.ResultPattern(); const auto &fused_matmul = @@ -106,20 +96,17 @@ class FusedMatmulElementwiseAddFusePattern std::string matmul_name_; std::string fused_matmul_name_; uint32_t benefit_; - bool as_x_; // Decide input direction of 1st add - bool as_x2_; // Decide input direction of 2nd add + bool as_x_; // Decide input direction of add public: FusedMatmulElementwiseAddFusePattern(const std::string &matmul_name, const std::string &fused_matmul_name, uint32_t benefit, - bool as_x, - bool as_x2) + bool as_x) : matmul_name_(matmul_name), fused_matmul_name_(fused_matmul_name), benefit_(benefit), - as_x_(as_x), - as_x2_(as_x2) {} + as_x_(as_x) {} std::string name() const override { return "FusedMatmulElementwiseAddFusePattern"; @@ -130,26 +117,39 @@ class FusedMatmulElementwiseAddFusePattern void operator()(paddle::drr::DrrPatternContext *ctx) const override { paddle::drr::SourcePattern pat = ctx->SourcePattern(); - const auto &matmul = pat.Op(matmul_name_, - {{"transpose_x", pat.Attr("transpose_x")}, - {"transpose_y", pat.Attr("transpose_y")}}); + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); const auto &add = pat.Op(paddle::dialect::AddOp::name()); - const auto &add2 = pat.Op(paddle::dialect::AddOp::name()); - matmul({&pat.Tensor("X"), &pat.Tensor("Y")}, {&pat.Tensor("Out")}); + matmul({&pat.Tensor("X"), &pat.Tensor("Y"), &pat.Tensor("none")}, + {&pat.Tensor("Out")}); pat.Tensor("add_out") = as_x_ ? add(pat.Tensor("Out"), pat.Tensor("residual")) : add(pat.Tensor("residual"), pat.Tensor("Out")); - pat.Tensor("add_out_end") = - as_x2_ ? add2(pat.Tensor("add_out"), pat.Tensor("residual2")) - : add2(pat.Tensor("residual2"), pat.Tensor("add_out")); pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { - std::set bool_sets = {true, false}; - auto result_x = match_ctx.Attr("transpose_x"); - auto result_y = match_ctx.Attr("transpose_y"); - if (bool_sets.count(result_x) == 0 || bool_sets.count(result_y) == 0) { + auto none_tensor = match_ctx.Tensor("none"); + if (none_tensor.impl() != nullptr) { return false; } return true; @@ -157,36 +157,32 @@ class FusedMatmulElementwiseAddFusePattern paddle::drr::ResultPattern res = pat.ResultPattern(); - const auto &fused_add = res.Op(paddle::dialect::AddOp::name()); - res.Tensor("residual3") = - fused_add(res.Tensor("residual1"), res.Tensor("residual2")); - const auto &fused_matmul = res.Op(fused_matmul_name_, {{ {"trans_x", pat.Attr("transpose_x")}, {"trans_y", pat.Attr("transpose_y")}, - {"matmul_alpha", res.Float32Attr(1.0f)}, - {"fuse_activation", res.StrAttr("")}, - {"fuse_alpha", res.Float32Attr(0.0f)}, - {"fuse_beta", res.Float32Attr(0.0f)}, - {"fused_output_scale", res.Float32Attr(1.0f)}, - {"fused_reshape_x", res.VectorInt32Attr({})}, - {"fused_transpose_x", res.VectorInt32Attr({})}, - {"fused_reshape_y", res.VectorInt32Attr({})}, - {"fused_transpose_y", res.VectorInt32Attr({})}, - {"fused_reshape_out", res.VectorInt32Attr({})}, - {"fused_transpose_out", res.VectorInt32Attr({})}, - {"mkldnn_data_type", res.StrAttr("float32")}, - {"scale_x", res.Float32Attr(1.0f)}, - {"scale_y", res.Float32Attr(1.0f)}, - {"scale_in_eltwise", res.Float32Attr(0.0f)}, - {"scale_out", res.Float32Attr(1.0f)}, - {"force_fp32_output", res.BoolAttr(false)}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}, }}); - fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.Tensor("residual3")}, - {&res.Tensor("add_out_end")}); + fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.Tensor("residual")}, + {&res.Tensor("add_out")}); } }; @@ -209,17 +205,15 @@ class MatmulElementwiseAddFusePass : public pir::PatternRewritePass { benefit_idx++; } - for (auto as_x : bool_set) - for (auto as_x2 : bool_set) { - ps.Add(paddle::drr::Create( - context, - paddle::dialect::MatmulOp::name(), - paddle::onednn::dialect::FusedMatmulOp::name(), - benefit_idx, - as_x, - as_x2)); - benefit_idx++; - } + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx++; + } return ps; } }; @@ -230,8 +224,7 @@ namespace pir { std::unique_ptr CreateMatmulElementwiseAddFusePass() { // pd_op.matmul + pd_op.add -> onednn_op.fused_matmul - // pd_op.matmul + pd_op.add + pd_op.add -> pd_op.add + onednn_op.fused_matmul - // -> onednn_op.fused_matmul + // onednn_op.fused_matmul + pd_op.add -> onednn_op.fused_matmul return std::make_unique(); } } // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc new file mode 100644 index 0000000000000..246cde678593c --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.cc @@ -0,0 +1,271 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { +class MatmulTransposeReshapeFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + + public: + MatmulTransposeReshapeFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit) {} + + std::string name() const override { + return "MatmulTransposeReshapeFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &matmul = pat.Op(matmul_name_, + {{"transpose_x", pat.Attr("transpose_x")}, + {"transpose_y", pat.Attr("transpose_y")}}); + matmul({&pat.Tensor("X"), &pat.Tensor("Y")}, {&pat.Tensor("Out")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("Out")); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("transpose_out"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + const std::vector supported_axis{0, 2, 1, 3}; + if (perm != supported_axis) return false; + if (shape.size() != 3) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", res.Float32Attr(1.0f)}, + {"fuse_activation", res.StrAttr("")}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"fused_output_scale", res.Float32Attr(1.0f)}, + {"fused_reshape_x", res.VectorInt32Attr({})}, + {"fused_transpose_x", res.VectorInt32Attr({})}, + {"fused_reshape_y", res.VectorInt32Attr({})}, + {"fused_transpose_y", res.VectorInt32Attr({})}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"scale_x", res.Float32Attr(1.0f)}, + {"scale_y", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(0.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"force_fp32_output", res.BoolAttr(false)}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + fused_attrs.emplace("fused_reshape_out", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_out", pat.Attr("perm")); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.InputNoneTensor()}, + {&res.Tensor("reshape_out")}); + } +}; + +class FusedMatmulTransposeReshapeFusePattern + : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + + public: + FusedMatmulTransposeReshapeFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit) {} + + std::string name() const override { + return "FusedMatmulTransposeReshapeFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); + + matmul({&pat.Tensor("X"), &pat.Tensor("Y"), &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("Out")); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("transpose_out"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + const std::vector supported_axis{0, 2, 1, 3}; + if (perm != supported_axis) return false; + if (shape.size() != 3) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + + return true; + }); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (!(match_ctx.Attr>("fused_reshape_out").empty())) + return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + fused_attrs.emplace("fused_reshape_out", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_out", pat.Attr("perm")); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + fused_matmul({&res.Tensor("X"), &res.Tensor("Y"), &res.Tensor("residual")}, + {&res.Tensor("reshape_out")}); + } +}; + +class MatmulTransposeReshapeFusePass : public pir::PatternRewritePass { + public: + MatmulTransposeReshapeFusePass() + : pir::PatternRewritePass("matmul_transpose_reshape_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + int benefit_idx = 1; + ps.Add(paddle::drr::Create( + context, + paddle::dialect::MatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx++)); + + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx++)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateMatmulTransposeReshapeFusePass() { + // pd_op.matmul + pd_op.transpose + pd_op.reshape -> onednn_op.fused_matmul + // pd_op.fused_matmul + pd_op.transpose + pd_op.reshape -> + // onednn_op.fused_matmul + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(matmul_transpose_reshape_fuse_pass, + MatmulTransposeReshapeFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h new file mode 100644 index 0000000000000..c56fa7ee62d7a --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/matmul_transpose_reshape_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateMatmulTransposeReshapeFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc new file mode 100644 index 0000000000000..d249a2174ed88 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.cc @@ -0,0 +1,335 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { +class ReshapeTransposeMatmulFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of transpose is for input_x of matmul + + public: + ReshapeTransposeMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { + return "ReshapeTransposeMatmulFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("reshape_in"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("reshape_out")); + + const auto &matmul = pat.Op(matmul_name_, + {{"transpose_x", pat.Attr("transpose_x")}, + {"transpose_y", pat.Attr("transpose_y")}}); + if (as_x_) { + matmul({&pat.Tensor("transpose_out"), &pat.Tensor("other")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), &pat.Tensor("transpose_out")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + if (shape.size() < 2 || shape.size() > 4) return false; + if (shape.size() != perm.size()) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", res.Float32Attr(1.0f)}, + {"fuse_activation", res.StrAttr("")}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"fused_output_scale", res.Float32Attr(1.0f)}, + {"fused_reshape_out", res.VectorInt32Attr({})}, + {"fused_transpose_out", res.VectorInt32Attr({})}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"scale_x", res.Float32Attr(1.0f)}, + {"scale_y", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(0.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"force_fp32_output", res.BoolAttr(false)}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + if (as_x_) { + fused_attrs.emplace("fused_reshape_x", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_x", pat.Attr("perm")); + fused_attrs.emplace("fused_reshape_y", res.VectorInt32Attr({})); + fused_attrs.emplace("fused_transpose_y", res.VectorInt32Attr({})); + } else { + fused_attrs.emplace("fused_reshape_x", res.VectorInt32Attr({})); + fused_attrs.emplace("fused_transpose_x", res.VectorInt32Attr({})); + fused_attrs.emplace("fused_reshape_y", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_y", pat.Attr("perm")); + } + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("reshape_in"), + &res.Tensor("other"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("reshape_in"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } + } +}; + +class ReshapeTransposeFusedMatmulFusePattern + : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of transpose is for input_x of matmul + + public: + ReshapeTransposeFusedMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { + return "ReshapeTransposFusedMatmulFusePattern"; + } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full_int_array = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("int_array")}}); + pat.Tensor("shape") = full_int_array(); + + const auto &reshape = pat.Op(paddle::dialect::ReshapeOp::name()); + reshape({&pat.Tensor("reshape_in"), &pat.Tensor("shape")}, + {&pat.Tensor("reshape_out"), &pat.Tensor("Xshape")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + pat.Tensor("transpose_out") = transpose(pat.Tensor("reshape_out")); + + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); + if (as_x_) { + matmul({&pat.Tensor("transpose_out"), + &pat.Tensor("other"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), + &pat.Tensor("transpose_out"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto shape = match_ctx.Attr>("int_array"); + auto perm = match_ctx.Attr>("perm"); + if (shape.size() < 2 || shape.size() > 4) return false; + if (shape.size() != perm.size()) return false; + if (std::count(shape.begin(), shape.end(), -1) > 1) return false; + + return true; + }); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + if (as_x_) { + if (!(match_ctx.Attr>("fused_reshape_x").empty())) + return false; + } else { + if (!(match_ctx.Attr>("fused_reshape_y").empty())) + return false; + } + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}; + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("int_array"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + if (as_x_) { + fused_attrs.emplace("fused_reshape_x", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_x", pat.Attr("perm")); + fused_attrs.emplace("fused_reshape_y", pat.Attr("fused_reshape_y")); + fused_attrs.emplace("fused_transpose_y", pat.Attr("fused_transpose_y")); + } else { + fused_attrs.emplace("fused_reshape_x", pat.Attr("fused_reshape_x")); + fused_attrs.emplace("fused_transpose_x", pat.Attr("fused_transpose_x")); + fused_attrs.emplace("fused_reshape_y", fused_reshape_attr); + fused_attrs.emplace("fused_transpose_y", pat.Attr("perm")); + } + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("reshape_in"), + &res.Tensor("other"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("reshape_in"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } + } +}; + +class ReshapeTransposeMatmulFusePass : public pir::PatternRewritePass { + public: + ReshapeTransposeMatmulFusePass() + : pir::PatternRewritePass("reshape_transpose_matmul_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + std::vector bool_set = {false, true}; + int benefit_idx = 5; + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::dialect::MatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateReshapeTransposeMatmulFusePass() { + // pd_op.reshape + pd_op.transpose + pd_op.matmul -> onednn_op.fused_matmul + // pd_op.reshape + pd_op.transpose + pd_op.fused_matmul -> + // onednn_op.fused_matmul + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(reshape_transpose_matmul_fuse_pass, + ReshapeTransposeMatmulFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.h new file mode 100644 index 0000000000000..71b5fe47f034b --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/reshape_transpose_matmul_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateReshapeTransposeMatmulFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc new file mode 100644 index 0000000000000..07a26a6beee34 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.cc @@ -0,0 +1,296 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { +class ScaleMatmulFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of scale is for input_x of matmul + + public: + ScaleMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { return "ScaleMatmulFusePattern"; } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("scale_")}}); + pat.Tensor("scale") = full(); + + const auto &scale = + pat.Op(paddle::dialect::ScaleOp::name(), + {{"bias", pat.Attr("bias")}, + {"bias_after_scale", pat.Attr("bias_after_scale")}}); + scale({&pat.Tensor("scale_in"), &pat.Tensor("scale")}, + {&pat.Tensor("scale_out")}); + + const auto &matmul = pat.Op(matmul_name_, + {{"transpose_x", pat.Attr("transpose_x")}, + {"transpose_y", pat.Attr("transpose_y")}}); + if (as_x_) { + matmul({&pat.Tensor("scale_out"), &pat.Tensor("other")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), &pat.Tensor("scale_out")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto scale = match_ctx.Attr("scale_"); + auto bias = match_ctx.Attr("bias"); + // conditions align with fluid pass + if (bias != 0.0f) return false; + if (scale <= 0.0f) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"fuse_activation", res.StrAttr("")}, + {"fuse_alpha", res.Float32Attr(0.0f)}, + {"fuse_beta", res.Float32Attr(0.0f)}, + {"fused_reshape_x", res.VectorInt32Attr({})}, + {"fused_transpose_x", res.VectorInt32Attr({})}, + {"fused_reshape_y", res.VectorInt32Attr({})}, + {"fused_transpose_y", res.VectorInt32Attr({})}, + {"fused_output_scale", res.Float32Attr(1.0f)}, + {"fused_reshape_out", res.VectorInt32Attr({})}, + {"fused_transpose_out", res.VectorInt32Attr({})}, + {"mkldnn_data_type", res.StrAttr("float32")}, + {"scale_x", res.Float32Attr(1.0f)}, + {"scale_y", res.Float32Attr(1.0f)}, + {"scale_in_eltwise", res.Float32Attr(0.0f)}, + {"scale_out", res.Float32Attr(1.0f)}, + {"force_fp32_output", res.BoolAttr(false)}}; + + const auto &matmul_alpha_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> float { + auto scale = match_ctx.Attr("scale_"); + return scale; + }); + + fused_attrs.emplace("matmul_alpha", matmul_alpha_attr); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("scale_in"), + &res.Tensor("other"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("scale_in"), + &res.InputNoneTensor()}, + {&res.Tensor("Out")}); + } + } +}; + +class ScaleFusedMatmulFusePattern : public paddle::drr::DrrPatternBase { + private: + std::string matmul_name_; + std::string fused_matmul_name_; + uint32_t benefit_; + bool as_x_; // decide if the output of transpose is for input_x of matmul + + public: + ScaleFusedMatmulFusePattern(const std::string &matmul_name, + const std::string &fused_matmul_name, + uint32_t benefit, + bool as_x) + : matmul_name_(matmul_name), + fused_matmul_name_(fused_matmul_name), + benefit_(benefit), + as_x_(as_x) {} + + std::string name() const override { return "ScaleFusedMatmulFusePattern"; } + + uint32_t benefit() const override { return benefit_; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &full = pat.Op(paddle::dialect::FullOp::name(), + {{"value", pat.Attr("scale_")}}); + pat.Tensor("scale") = full(); + + const auto &scale = + pat.Op(paddle::dialect::ScaleOp::name(), + {{"bias", pat.Attr("bias")}, + {"bias_after_scale", pat.Attr("bias_after_scale")}}); + scale({&pat.Tensor("scale_in"), &pat.Tensor("scale")}, + {&pat.Tensor("scale_out")}); + + const auto &matmul = + pat.Op(matmul_name_, + {{"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"matmul_alpha", pat.Attr("matmul_alpha")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}); + if (as_x_) { + matmul({&pat.Tensor("scale_out"), + &pat.Tensor("other"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } else { + matmul({&pat.Tensor("other"), + &pat.Tensor("scale_out"), + &pat.Tensor("residual")}, + {&pat.Tensor("Out")}); + } + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto matmul_alpha = match_ctx.Attr("matmul_alpha"); + auto scale = match_ctx.Attr("scale_"); + auto bias = match_ctx.Attr("bias"); + // conditions align with fluid pass + if (matmul_alpha == 0.0f) return false; + if (bias != 0.0f) return false; + if (scale <= 0.0f) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + std::unordered_map fused_attrs{ + {"trans_x", pat.Attr("transpose_x")}, + {"trans_y", pat.Attr("transpose_y")}, + {"fuse_activation", pat.Attr("fuse_activation")}, + {"fuse_alpha", pat.Attr("fuse_alpha")}, + {"fuse_beta", pat.Attr("fuse_beta")}, + {"fused_reshape_x", pat.Attr("fused_reshape_x")}, + {"fused_transpose_x", pat.Attr("fused_transpose_x")}, + {"fused_reshape_y", pat.Attr("fused_reshape_y")}, + {"fused_transpose_y", pat.Attr("fused_transpose_y")}, + {"fused_output_scale", pat.Attr("fused_output_scale")}, + {"fused_reshape_out", pat.Attr("fused_reshape_out")}, + {"fused_transpose_out", pat.Attr("fused_transpose_out")}, + {"mkldnn_data_type", pat.Attr("mkldnn_data_type")}, + {"scale_x", pat.Attr("scale_x")}, + {"scale_y", pat.Attr("scale_y")}, + {"scale_in_eltwise", pat.Attr("scale_in_eltwise")}, + {"scale_out", pat.Attr("scale_out")}, + {"force_fp32_output", pat.Attr("force_fp32_output")}}; + + const auto &matmul_alpha_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> float { + auto scale = match_ctx.Attr("scale_"); + auto matmul_alpha = match_ctx.Attr("matmul_alpha"); + return scale * matmul_alpha; + }); + + fused_attrs.emplace("matmul_alpha", matmul_alpha_attr); + + const auto &fused_matmul = res.Op(fused_matmul_name_, fused_attrs); + + if (as_x_) { + fused_matmul({&res.Tensor("scale_in"), + &res.Tensor("other"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } else { + fused_matmul({&res.Tensor("other"), + &res.Tensor("scale_in"), + &res.Tensor("residual")}, + {&res.Tensor("Out")}); + } + } +}; + +class ScaleMatmulFusePass : public pir::PatternRewritePass { + public: + ScaleMatmulFusePass() + : pir::PatternRewritePass("reshape_transpose_matmul_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + std::vector bool_set = {false, true}; + int benefit_idx = 5; + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::dialect::MatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + + for (auto as_x : bool_set) { + ps.Add(paddle::drr::Create( + context, + paddle::onednn::dialect::FusedMatmulOp::name(), + paddle::onednn::dialect::FusedMatmulOp::name(), + benefit_idx, + as_x)); + benefit_idx--; + } + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateScaleMatmulFusePass() { + // pd_op.reshape + pd_op.transpose + pd_op.matmul -> onednn_op.fused_matmul + // pd_op.reshape + pd_op.transpose + pd_op.fused_matmul -> + // onednn_op.fused_matmul + return std::make_unique(); +} +} // namespace pir + +REGISTER_IR_PASS(scale_matmul_fuse_pass, ScaleMatmulFusePass); diff --git a/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h new file mode 100644 index 0000000000000..2ba8a3787e5dc --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/scale_matmul_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateScaleMatmulFusePass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc new file mode 100644 index 0000000000000..e1f7250de2932 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.cc @@ -0,0 +1,109 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/onednn_op.h" +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +namespace { + +class SqueezeTransposePattern : public paddle::drr::DrrPatternBase { + public: + SqueezeTransposePattern() {} + + std::string name() const override { return "SqueezeTransposePattern"; } + + uint32_t benefit() const override { return 2; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + + const auto &squeeze = pat.Op(paddle::dialect::SqueezeOp::name()); + const auto &full_1 = pat.Op(paddle::dialect::FullIntArrayOp::name(), + {{"value", pat.Attr("full_1_value")}}); + + squeeze({&pat.Tensor("x"), &full_1()}, + {&pat.Tensor("squeeze_out"), &pat.Tensor("xshape")}); + + const auto &transpose = pat.Op(paddle::dialect::TransposeOp::name(), + {{"perm", pat.Attr("perm")}}); + + transpose({&pat.Tensor("squeeze_out")}, {&pat.Tensor("transpose_op_out")}); + + pat.RequireNativeCall([&](const paddle::drr::MatchContext &match_ctx) { + auto axis = match_ctx.Attr>("full_1_value"); + auto perm = match_ctx.Attr>("perm"); + if (perm.size() <= 0) return false; + if (axis.size() <= 0) return false; + return true; + }); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &fused_reshape_attr = res.ComputeAttr( + [](const paddle::drr::MatchContext &match_ctx) -> std::vector { + std::vector int_array_value; + auto shape = match_ctx.Attr>("full_1_value"); + for (auto i : shape) { + int_array_value.emplace_back(static_cast(i)); + } + return int_array_value; + }); + + const auto &fused_transpose = + res.Op(paddle::onednn::dialect::FusedTransposeOp::name(), + {{ + {"axis", pat.Attr("perm")}, + {"fused_squeeze2_axes", fused_reshape_attr}, + {"fused_unsqueeze2_axes", res.VectorInt32Attr({})}, + {"fused_reshape2_shape", res.VectorInt32Attr({})}, + {"scale", res.Float32Attr(1.0f)}, + {"shift", res.Float32Attr(0.0f)}, + {"output_data_type", res.StrAttr("fp32")}, + {"data_format", res.StrAttr("AnyLayout")}, + {"mkldnn_data_type", res.StrAttr("float32")}, + }}); + fused_transpose({&res.Tensor("x")}, {&res.Tensor("transpose_op_out")}); + } +}; + +class SqueezeTransposePass : public pir::PatternRewritePass { + public: + SqueezeTransposePass() + : pir::PatternRewritePass("squeeze_transpose_onednn_fuse_pass", 3) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { + +std::unique_ptr CreateSqueezeTransposeOneDNNPass() { + // pd_op.squeeze + transpose2 -> onednn_op.fused_transpose + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(squeeze_transpose_onednn_fuse_pass, SqueezeTransposePass); diff --git a/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h new file mode 100644 index 0000000000000..fce3e0f6e8a80 --- /dev/null +++ b/paddle/fluid/pir/transforms/onednn/squeeze_transpose_onednn_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateSqueezeTransposeOneDNNPass(); + +} // namespace pir diff --git a/paddle/fluid/pir/transforms/passes.h b/paddle/fluid/pir/transforms/passes.h index 27a5c741e157d..bc15794c45ec6 100644 --- a/paddle/fluid/pir/transforms/passes.h +++ b/paddle/fluid/pir/transforms/passes.h @@ -38,17 +38,27 @@ USE_PIR_PASS(conv2d_add_act_fuse_pass); USE_PIR_PASS(embedding_eltwise_layernorm_fuse_pass); USE_PIR_PASS(add_norm_fuse_pass); USE_PIR_PASS(fused_dot_product_attention_pass); +USE_PIR_PASS(fused_flash_attn_pass); +USE_PIR_PASS(remove_redundant_transpose_pass); #ifdef PADDLE_WITH_DNNL +USE_PIR_PASS(depthwise_conv_onednn_pass); +USE_PIR_PASS(squeeze_transpose_onednn_fuse_pass); USE_PIR_PASS(batch_norm_act_fuse_pass); USE_PIR_PASS(conv2d_bias_fuse_pass); USE_PIR_PASS(conv2d_transpose_bias_fuse_pass); USE_PIR_PASS(conv3d_bias_fuse_pass); +USE_PIR_PASS(scale_matmul_fuse_pass); +USE_PIR_PASS(reshape_transpose_matmul_fuse_pass); +USE_PIR_PASS(matmul_transpose_reshape_fuse_pass); USE_PIR_PASS(matmul_elementwise_add_fuse_pass); USE_PIR_PASS(matmul_activation_fuse_pass); -USE_PIR_PASS(conv_elementwise_add_mkldnn_fuse_pass); +USE_PIR_PASS(conv_elementwise_add_onednn_fuse_pass); +USE_PIR_PASS(conv_activation_onednn_fuse_pass); +USE_PIR_PASS(conv_concat_activation_onednn_fuse_pass); #endif #ifdef PADDLE_WITH_XPU USE_PIR_PASS(add_layernorm_xpu_fuse_pass); +USE_PIR_PASS(group_norm_silu_xpu_fuse_pass); #endif diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc index 182aa009a020c..43a3e2237036b 100644 --- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc @@ -754,7 +754,7 @@ static phi::Backend GetKernelBackendByYaml( auto& backend_info = op_info_parser->OpRuntimeInfo().kernel_key_backend; phi::Backend kernel_backend = phi::Backend::UNDEFINED; - for (auto slot_name : backend_info) { + for (const auto& slot_name : backend_info) { auto& input_map = op_info_parser->InputName2Id(); if (input_map.count(slot_name)) { diff --git a/paddle/fluid/pir/transforms/shape_optimization_pass.cc b/paddle/fluid/pir/transforms/shape_optimization_pass.cc index d5ced352047da..932d03d7a42ff 100644 --- a/paddle/fluid/pir/transforms/shape_optimization_pass.cc +++ b/paddle/fluid/pir/transforms/shape_optimization_pass.cc @@ -270,6 +270,24 @@ class ShapeOptimizationPass : public pir::Pass { } // namespace +static inline bool IsStaticShape(const Value& value) { + const auto& value_type = value.type(); + if (!value || !value_type || + !value_type.isa()) { + return false; + } + return !::common::contain_unknown_dim( + value_type.dyn_cast().dims()); +} + +symbol::ShapeOrDataDimExprs CreateShapeOrDataByDDim(const pir::DDim& dims) { + std::vector dim_exprs; + for (int i = 0; i < dims.size(); ++i) { + dim_exprs.emplace_back(symbol::DimExpr{dims.at(i)}); + } + return symbol::TensorShapeOrDataDimExprs{dim_exprs}; +} + void InferSymExprForBlock(const Block& block, ShapeConstraintIRAnalysis* shape_analysis) { for (auto& op : block) { @@ -290,8 +308,33 @@ void InferSymExprForBlock(const Block& block, &op, shape_analysis->GetShapeOrDataForValue(op.result(0))); } } else { - PADDLE_THROW(phi::errors::Unimplemented( - op.name() + " DOES NOT have InferSymbolicShapeInterface!")); + const bool all_outs_static_dims = [&] { + bool all_static_dims = true; + for (uint32_t i = 0; i < op.num_results(); ++i) { + if (IsStaticShape(op.result(i))) { + continue; + } else { + all_static_dims = false; + break; + } + } + return all_static_dims; + }(); + + if (all_outs_static_dims) { + for (uint32_t i = 0; i < op.num_results(); ++i) { + shape_analysis->SetShapeOrDataForValue( + op.result(i), + CreateShapeOrDataByDDim( + op.result(i) + .type() + .dyn_cast() + .dims())); + } + } else { + PADDLE_THROW(phi::errors::Unimplemented( + op.name() + " DOES NOT have InferSymbolicShapeInterface!")); + } } DebugPrintOpInfo(&op, shape_analysis); CheckInferSymWithInferMeta(&op, shape_analysis); diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc new file mode 100644 index 0000000000000..0b93a496d4dde --- /dev/null +++ b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h" + +#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" +#include "paddle/fluid/pir/drr/include/drr_pattern_base.h" +#include "paddle/fluid/pir/utils/general_functions.h" + +#include "paddle/pir/include/pass/pass.h" +#include "paddle/pir/include/pass/pass_registry.h" + +/* +fuse gn + activation block in to group_norm_silu op +For example: +graph: + X + Scale | Bias + \ | / + group norm + / | \ + / | \ + variance | mean + | + silu + | + output +------------------------------------------------------ +After the pass is applied: + X + Scale | Bias + \ | / + group_norm_silu + | + Out +*/ + +namespace { + +class GroupNormSiluPattern : public paddle::drr::DrrPatternBase { + public: + std::string name() const override { return "GroupNormSiluPattern"; } + + void operator()(paddle::drr::DrrPatternContext *ctx) const override { + paddle::drr::SourcePattern pat = ctx->SourcePattern(); + const auto &groupnorm = pat.Op( + paddle::dialect::GroupNormOp::name(), + {{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}); + + const auto &silu = pat.Op(paddle::dialect::SiluOp::name()); + + groupnorm({&pat.Tensor("X"), &pat.Tensor("Scale"), &pat.Tensor("Bias")}, + {&pat.Tensor("Y"), &pat.Tensor("Mean"), &pat.Tensor("Variance")}); + silu({&pat.Tensor("Y")}, {&pat.Tensor("Out")}); + + paddle::drr::ResultPattern res = pat.ResultPattern(); + + const auto &group_norm_silu_xpu = res.Op( + paddle::dialect::GroupNormSiluXpuOp::name(), + {{{"epsilon", pat.Attr("epsilon")}, {"groups", pat.Attr("groups")}}}); + group_norm_silu_xpu( + {&res.Tensor("X"), &res.Tensor("Scale"), &res.Tensor("Bias")}, + {&res.Tensor("Out")}); + } +}; + +class GroupNormSiluXpuFusePass : public pir::PatternRewritePass { + public: + GroupNormSiluXpuFusePass() + : pir::PatternRewritePass("group_norm_silu_xpu_fuse_pass", 2) {} + + pir::RewritePatternSet InitializePatterns(pir::IrContext *context) override { + pir::RewritePatternSet ps(context); + ps.Add(paddle::drr::Create(context)); + return ps; + } +}; + +} // namespace + +namespace pir { +std::unique_ptr CreateGroupNormSiluXpuFusePass() { + return std::make_unique(); +} + +} // namespace pir + +REGISTER_IR_PASS(group_norm_silu_xpu_fuse_pass, GroupNormSiluXpuFusePass); diff --git a/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h new file mode 100644 index 0000000000000..665c7dcb03f16 --- /dev/null +++ b/paddle/fluid/pir/transforms/xpu/group_norm_silu_fuse_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/pir/include/core/dll_decl.h" + +namespace pir { + +class Pass; + +IR_API std::unique_ptr CreateGroupNormSiluXpuFusePass(); + +} // namespace pir diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 2cabc79bb3844..99a6606d1183a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -41,10 +41,10 @@ cc_test( SRCS place_test.cc DEPS place glog phi common) -if(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) +if(WITH_ONEDNN) + set(ONEDNN_CTX_DEPS onednn) else() - set(MKLDNN_CTX_DEPS) + set(ONEDNN_CTX_DEPS) endif() add_subdirectory(device) @@ -126,7 +126,7 @@ cc_library( framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} - ${MKLDNN_CTX_DEPS} + ${ONEDNN_CTX_DEPS} ${dgc_deps} dlpack phi diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc index 36189cc7e4c90..73704b04cf90b 100644 --- a/paddle/fluid/platform/device/gpu/gpu_info.cc +++ b/paddle/fluid/platform/device/gpu/gpu_info.cc @@ -217,6 +217,7 @@ class RecordedGpuMallocHelper { CUDADeviceGuard guard(dev_id_); gpuError_t result; #ifdef PADDLE_WITH_HIP + phi::backends::gpu::CUDAGraphCaptureModeGuard capture_mode_guard; if (UNLIKELY(malloc_managed_memory)) { result = hipMallocManaged(ptr, size); } else { diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/onednn_helper.h similarity index 99% rename from paddle/fluid/platform/mkldnn_helper.h rename to paddle/fluid/platform/onednn_helper.h index 6132aa9292e56..145f42f669d9d 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/onednn_helper.h @@ -52,7 +52,7 @@ inline void DontClearMKLDNNCache(const platform::Place& place) { } } -// If MKLDNN build and CPU place then register suffix in DeviceContext +// If OneDNN build and CPU place then register suffix in DeviceContext inline void AttachPointerHashToMKLDNNKey(void* ptr, const platform::Place& place) { if (platform::is_cpu_place(place)) { diff --git a/paddle/fluid/platform/mkldnn_op_list.h b/paddle/fluid/platform/onednn_op_list.h similarity index 100% rename from paddle/fluid/platform/mkldnn_op_list.h rename to paddle/fluid/platform/onednn_op_list.h diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 169d41d9763e5..4ef0cfee6e283 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -459,6 +459,15 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { } } +template +void rsqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { + if (x_grad) { + // This calculation is important for resnet. + auto x_grad_tmp = -0.5 * out * out * out * out_grad; + set_output(x_grad_tmp, x_grad); + } +} + template void floor_grad(const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { @@ -1123,6 +1132,58 @@ void max_grad(const Tensor& x, set_output(x_grad_tmp, x_grad); } +template +void min_grad(const Tensor& x, + const Tensor& out, + const Tensor& out_grad, + const IntArray& axis, + bool keepdim, + bool reduce_all, + Tensor* x_grad) { + if (!x_grad) { + return; + } + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); + std::vector x_dim = common::vectorize(x.dims()); + int64_t axis_size = axis.size(); + int64_t x_dim_size = x_dim.size(); + reduce_all = false; + if (reduce_all || axis_size == 0 || axis_size == x_dim_size) { + reduce_all = true; + } else { + reduce_all = false; + } + auto x_grad_tmp = Tensor(); + if (x_dim_size == 0 || x_dim_size == 1 || keepdim) { + auto out_grad_tmp = out_grad.expand(IntArray(x_dim)); + auto out_tmp = out.expand(IntArray(x_dim)); + auto mask = equal(x, out_tmp); + x_grad_tmp = where(mask, out_grad_tmp, zero_tensor); + } else { + auto axis_ = std::vector(); + if (reduce_all) { + for (int64_t i = 0; i < x_dim_size; i++) { + axis_.push_back(i); + } + } else { + axis_ = axis.GetData(); + for (int64_t i = 0; i < axis_size; i++) { + if (axis[i] < 0) { + axis_[i] = axis[i] + x_dim_size; + } + } + } + auto out_grad_shape = get_unsqueeze_dims(out_grad, axis_); + auto out_grad_ = reshape(out_grad, out_grad_shape); + auto out_ = reshape(out, out_grad_shape); + auto out_grad_tmp = out_grad_.expand(IntArray(x_dim)); + auto out_tmp = out_.expand(IntArray(x_dim)); + auto mask = equal(x, out_tmp); + x_grad_tmp = where(mask, out_grad_tmp, zero_tensor); + } + set_output(x_grad_tmp, x_grad); +} + template void assign_grad(const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h index 2c5c4fcea8b41..67feb640c9f7a 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h @@ -702,9 +702,9 @@ void add_double_grad(const Tensor& y, if (grad_x_grad && grad_y_grad) { set_output(grad_x_grad.get() + grad_y_grad.get(), grad_out_grad); } else if (grad_x_grad) { - set_output(grad_x_grad.get(), grad_out_grad); + by_pass(grad_x_grad.get(), grad_out_grad); } else if (grad_y_grad) { - set_output(grad_y_grad.get(), grad_out_grad); + by_pass(grad_y_grad.get(), grad_out_grad); } else { set_output(full(common::vectorize(grad_out.dims()), 0.0, y.dtype()), grad_out_grad); @@ -773,9 +773,9 @@ void subtract_double_grad(const Tensor& y, if (grad_x_grad && grad_y_grad) { set_output(grad_x_grad.get() - grad_y_grad.get(), grad_out_grad); } else if (grad_x_grad) { - set_output(grad_x_grad.get(), grad_out_grad); + by_pass(grad_x_grad.get(), grad_out_grad); } else if (grad_y_grad) { - set_output(-grad_y_grad.get(), grad_out_grad); + by_pass(-grad_y_grad.get(), grad_out_grad); } else { set_output( full(common::vectorize(grad_out.dims()), 0, grad_out.dtype()), @@ -784,5 +784,54 @@ void subtract_double_grad(const Tensor& y, } } +template +void exp_double_grad(const Tensor& out, + const Tensor& grad_out, + const Tensor& grad_x_grad, + Tensor* out_grad, + Tensor* grad_out_grad) { + // dout = dout_old * ddx + if (out_grad) { + auto out_grad_tmp = grad_out * grad_x_grad; + set_output(out_grad_tmp, out_grad); + } + + // ddout = out * ddx + if (grad_out_grad) { + auto grad_out_grad_tmp = out * grad_x_grad; + set_output(grad_out_grad_tmp, grad_out_grad); + } +} + +template +void log_double_grad(const Tensor& x, + const Tensor& grad_out, + const Tensor& grad_x_grad, + Tensor* x_grad, + Tensor* grad_out_grad) { + // dx = -dout/x^2 * ddx + if (x_grad) { + auto x_grad_tmp = -grad_out / (x * x) * grad_x_grad; + set_output(x_grad_tmp, x_grad); + } + + // ddout = ddx / x + if (grad_out_grad) { + auto grad_out_grad_tmp = grad_x_grad / x; + set_output(grad_out_grad_tmp, grad_out_grad); + } +} + +template +void abs_triple_grad(const Tensor& x, + const Tensor& grad_out_grad_grad, + Tensor* grad_grad_x_grad) { + // dddx = sign(x) * dddout + if (grad_grad_x_grad) { + auto grad_grad_x_grad_tmp = sign(x) * grad_out_grad_grad; + set_output(grad_grad_x_grad_tmp, grad_grad_x_grad); + } +} + } // namespace prim } // namespace paddle diff --git a/paddle/fluid/prim/api/manual_prim/utils/utils.h b/paddle/fluid/prim/api/manual_prim/utils/utils.h index 9062d979b40db..cbbe846671114 100644 --- a/paddle/fluid/prim/api/manual_prim/utils/utils.h +++ b/paddle/fluid/prim/api/manual_prim/utils/utils.h @@ -88,7 +88,7 @@ static phi::DDim get_reduce_dims(const phi::DDim& x_dims, * y_dims = [2, 1, 6, 1] <-- shaped are right-aligned for comparison * <-- broadcast --> * z_dims = [10, 2, 4, 6, 5] - * ==> reduce_dims_from_z_to_x = [0, 1, 3] + * ==> reduce_dims_from_z_to_x = [1, 3] * ==> reduce_dims_from_z_to_y = [0, 2, 4] */ auto out_dims = paddle::operators::details::BroadcastTwoDims(x_dims, y_dims); diff --git a/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc b/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc index a79e929a6e5cc..a479379cc6ab4 100644 --- a/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc +++ b/paddle/fluid/primitive/backend/manual/manual_static_prim_backend.cc @@ -43,7 +43,7 @@ Tensor full_with_tensor(const Tensor& shape, std::static_pointer_cast(shape.impl())->value(); pir::Value value_res = paddle::dialect::full( std::vector{}, value.to(), dtype, place); - auto op_res = paddle::dialect::full_with_tensor(shape_res, value_res, dtype); + auto op_res = paddle::dialect::full_with_tensor(value_res, shape_res, dtype); Tensor out(std::make_shared(op_res)); return out; } diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index c71da029b4e37..7d8a0b723eceb 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -298,12 +298,12 @@ std::vector DecompProgram::format_decomp_res( return new_decomp_outs; } -std::vector DecompProgram::construct_dst_vars( +void DecompProgram::construct_dst_vars( const std::string& op_name, const std::vector& orig_outs, const std::vector& decomp_outs, - std::unordered_map orig_vars_dict) { - std::vector tar_vars(src_vars_.size()); + std::unordered_map orig_vars_dict, + std::vector* tar_vars) { PADDLE_ENFORCE_EQ( orig_outs.size(), decomp_outs.size(), @@ -315,10 +315,9 @@ std::vector DecompProgram::construct_dst_vars( decomp_outs.size())); for (size_t i = 0; i < orig_outs.size(); i++) { if (orig_vars_dict.find(orig_outs[i]) != orig_vars_dict.end()) { - tar_vars[orig_vars_dict[orig_outs[i]]] = decomp_outs[i]; + (*tar_vars)[orig_vars_dict[orig_outs[i]]] = decomp_outs[i]; } } - return tar_vars; } std::vector DecompProgram::get_dst_vars() { @@ -427,8 +426,11 @@ void DecompProgram::decomp_block( std::vector standard_decomp_res = format_decomp_res(op->name(), orig_outs, decomp_res); check_decomp_outputs(op->name(), orig_outs, standard_decomp_res); - tar_vars = construct_dst_vars( - op->name(), orig_outs, standard_decomp_res, orig_vars_dict); + construct_dst_vars(op->name(), + orig_outs, + standard_decomp_res, + orig_vars_dict, + &tar_vars); op->ReplaceAllUsesWith(standard_decomp_res); bool remove_op = true; diff --git a/paddle/fluid/primitive/base/decomp_trans.h b/paddle/fluid/primitive/base/decomp_trans.h index 21e48d94f97a7..00f538cb0dafc 100644 --- a/paddle/fluid/primitive/base/decomp_trans.h +++ b/paddle/fluid/primitive/base/decomp_trans.h @@ -50,11 +50,11 @@ class DecompProgram { const std::string& op_name, const std::vector& orig_outs, const std::vector>& decomp_outs); - std::vector construct_dst_vars( - const std::string& op_name, - const std::vector& orig_outs, - const std::vector& decomp_outs, - std::unordered_map orig_vars_dict); + void construct_dst_vars(const std::string& op_name, + const std::vector& orig_outs, + const std::vector& decomp_outs, + std::unordered_map orig_vars_dict, + std::vector* tar_vars); bool enable_decomp_by_filter(const std::string& op_name); void set_src_vars(const std::vector& src_vars) { src_vars_ = src_vars; diff --git a/paddle/fluid/primitive/base/primitive_ops.h b/paddle/fluid/primitive/base/primitive_ops.h index aa52907f8f7fe..4aafd7693ae75 100644 --- a/paddle/fluid/primitive/base/primitive_ops.h +++ b/paddle/fluid/primitive/base/primitive_ops.h @@ -46,7 +46,6 @@ const std::set& GetPrimitiveOpNames() { "pd_op.assign_value", "pd_op.concat", "pd_op.elementwise_pow", - "pd_op.rsqrt", "pd_op.floor", "pd_op.gather", "pd_op.gather_nd", @@ -91,6 +90,10 @@ const std::set& GetPrimitiveOpNames() { "pd_op.full_with_tensor", "pd_op.if", "pd_op.while", + /* Considering better performance, such ops are set as primitive ops + temporarily*/ + "pd_op.rsqrt", + "pd_op.sqrt", /* basic ops by PIR*/ "builtin.combine", "builtin.slice", diff --git a/paddle/fluid/primitive/codegen/decomp_gen.py b/paddle/fluid/primitive/codegen/decomp_gen.py index 95b40f9f87506..bfc157d24c3a6 100644 --- a/paddle/fluid/primitive/codegen/decomp_gen.py +++ b/paddle/fluid/primitive/codegen/decomp_gen.py @@ -156,7 +156,7 @@ def gen( Args: prim_path (pathlib.Path): The YAML file path of the primitive API. - fwd_path (pathlib.Path): The YAML file path of the forwad API. + fwd_path (pathlib.Path): The YAML file path of the forward API. rev_path (pathlib.Path): The YAML file path of the backward API. compat_path: (pathlib.Path): The YAML file path of the ops compat. fwd_pd_op_path (pathlib.Path): The YAML file path of the ir forward API. diff --git a/paddle/fluid/primitive/codegen/gen.py b/paddle/fluid/primitive/codegen/gen.py index e4d0e50e60877..dd75859e16b74 100644 --- a/paddle/fluid/primitive/codegen/gen.py +++ b/paddle/fluid/primitive/codegen/gen.py @@ -63,6 +63,7 @@ 'exp_grad', 'floor_grad', 'log_grad', + 'rsqrt_grad', 'sin_grad', 'cos_grad', 'tanh_grad', @@ -117,6 +118,9 @@ 'relu_grad', 'sigmoid_grad', 'silu_grad', + 'exp_grad', + 'log_grad', + 'abs_double_grad', 'softmax_grad', 'sqrt_grad', ] # custom vjp list of composite op diff --git a/paddle/fluid/primitive/composite/composite.h b/paddle/fluid/primitive/composite/composite.h index 539d161243698..67cc7d6388460 100644 --- a/paddle/fluid/primitive/composite/composite.h +++ b/paddle/fluid/primitive/composite/composite.h @@ -127,6 +127,59 @@ static bool valid_type(const DataType& dtype) { } } +template +Tensor p_norm_decomp(const Tensor& x, + const float& porder = 2.0, + const int& axis = -1, + const float epsilon = 1.0e-12f, + const bool& keepdim = false, + const bool& asvector = false) { + auto org_dtype = x.dtype(); + auto x_tmp = x; + + bool need_cast = is_half_dtype(org_dtype); + if (need_cast) { + x_tmp = cast(x, DataType::FLOAT32); + } + + Tensor res; + if (porder == 0.0) { + // 0-norm + auto zero = full(empty_shape, 0, x_tmp.dtype()); + auto none_zero = not_equal(x_tmp, zero); + res = cast(none_zero, x_tmp.dtype()); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + } else if (porder == 1.0) { + // 1-norm + res = abs(x_tmp); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + } else if (porder == 2.0) { + // 2-norm + res = sqrt(sum(x_tmp * x_tmp, {axis}, x_tmp.dtype(), keepdim)); + } else if (porder == INFINITY) { + // +INF-norm + res = abs(x_tmp); + res = max(x_tmp, {axis}, keepdim); + } else if (porder == -INFINITY) { + // -INF-norm + res = abs(x_tmp); + res = min(x_tmp, {axis}, keepdim); + } else { + // vanilla p-norm + auto porder_tensor = full(empty_shape, porder, x_tmp.dtype()); + auto inv_porder_tensor = full(empty_shape, 1 / porder, x_tmp.dtype()); + res = elementwise_pow(x_tmp, porder_tensor); + res = sum(res, {axis}, x_tmp.dtype(), keepdim); + res = elementwise_pow(res, inv_porder_tensor); + } + + if (need_cast) { + return cast(res, org_dtype); + } else { + return res; + } +} + template Tensor pow_decomp(const Tensor& x, const paddle::Scalar& y) { auto org_dtype = x.dtype(); @@ -153,6 +206,11 @@ Tensor pow_decomp(const Tensor& x, const paddle::Scalar& y) { } } +template +Tensor reciprocal_decomp(const Tensor& x) { + return full(empty_shape, 1.0, x.dtype()) / x; +} + template std::tuple batch_norm_decomp( const Tensor& x, @@ -592,24 +650,6 @@ std::tuple dropout_decomp( } } -template -Tensor sqrt_decomp(const Tensor& x) { - auto org_dtype = x.dtype(); - Tensor x_cast = x; - - bool need_cast = is_half_dtype(org_dtype); - if (need_cast) { - x_cast = cast(x, DataType::FLOAT32); - } - - auto ans = 1.0 / rsqrt(x_cast); - if (need_cast) { - return cast(ans, org_dtype); - } else { - return ans; - } -} - template Tensor gelu_decomp(const Tensor& x, bool approximate) { const double PM_2_SQRTPI = 1.12837916709551257390; /* 2/sqrt(pi) */ @@ -778,10 +818,12 @@ std::tuple flatten_decomp(const Tensor& x, for (size_t i = 0; i < x_dim.size();) { if (i == static_cast(start_axis)) { - Tensor flat = - slice(x_shape, {0}, {start_axis}, {end_axis + 1}, {1}, {}); - flat = prod(flat, {0}, false, false); - out_shape.push_back(reshape(flat, {1})); + Tensor flat = get_slice(x_shape, i); + + for (auto t = start_axis + 1; t <= end_axis; ++t) { + flat = flat * get_slice(x_shape, t); + } + out_shape.push_back(flat); i = end_axis + 1; } else { out_shape.push_back(get_slice(x_shape, i)); @@ -839,14 +881,19 @@ std::tuple group_norm_decomp( const float epsilon, const int groups, const std::string& data_format) { - if (data_format != "NCHW") { - // TODO(chengyanfu): support NHWC data format - PADDLE_THROW(phi::errors::Unimplemented("Only support NCHW format.")); + std::vector c_axis; + if (data_format == "NCHW") { + c_axis = {1}; + } else if (data_format == "NHWC") { + c_axis = {1, 3}; + } else { + PADDLE_THROW( + phi::errors::Unimplemented("Only support NCHW and NHWC format.")); } size_t rank = x.shape().size(); - if (rank != 3 && rank != 4) { - PADDLE_THROW( - phi::errors::Unimplemented("Only support NCHW format in rank 3 or 4.")); + if (rank < 3 || rank > 5) { + PADDLE_THROW(phi::errors::Unimplemented( + "Only support NCHW and NHWC format in rank {3, 4, 5}.")); } auto org_dtype = x.dtype(); @@ -856,22 +903,28 @@ std::tuple group_norm_decomp( if (need_cast) { x_cast = cast(x, DataType::FLOAT32); } - if (rank == 3) { - x_cast = unsqueeze(x_cast, {-1}); - } + Tensor x_dim_t; Tensor out, mean_, var_; if (has_dynamic_shape(x_cast.shape())) { - Tensor x_dim_t = shape(x_cast); - std::vector one_axis(1, 1); - Tensor x_shape = get_slice(x_dim_t, 0) * groups; - Tensor dim_1 = full({1}, -1, x_dim_t.type()); - x_shape = concat({x_shape, dim_1}); - x_cast = backend::reshape(x_cast, x_shape); - mean_ = mean_decomp(x_cast, IntArray(one_axis), true); + x_dim_t = shape(x_cast); + Tensor tar_shape; + if (data_format == "NCHW") { + tar_shape = get_slice(x_dim_t, 0) * groups; + Tensor dim_1 = full({1}, -1, x_dim_t.type()); + tar_shape = concat({tar_shape, dim_1}); + } else { + Tensor N_shape = get_slice(x_dim_t, 0); + Tensor dim_1 = full({1}, -1, x_dim_t.type()); + Tensor C_shape = get_slice(x_dim_t, rank - 1); + Tensor dim_g = full({1}, groups, x_dim_t.type()); + Tensor dim_c_div_g = cast(C_shape / dim_g, x_dim_t.type()); + tar_shape = concat({N_shape, dim_1, dim_g, dim_c_div_g}); + } + x_cast = backend::reshape(x_cast, tar_shape); + mean_ = mean_decomp(x_cast, c_axis, true); Tensor var_tmp_ = - mean_decomp(x_cast * x_cast, IntArray(one_axis), true) - - mean_ * mean_; + mean_decomp(x_cast * x_cast, c_axis, true) - mean_ * mean_; var_ = maximum( var_tmp_, backend::full_with_tensor(shape(var_tmp_), 0, var_tmp_.dtype())); @@ -881,23 +934,33 @@ std::tuple group_norm_decomp( out = backend::reshape(res, x_dim_t); } else { auto x_dim = x_cast.shape(); - std::vector one_axis(1, 1); - - std::vector x_shape{x_dim[0] * groups, -1}; - x_cast = reshape(x_cast, x_shape); - mean_ = mean_decomp(x_cast, IntArray(one_axis), true); - auto var_tmp_ = mean_decomp(x_cast * x_cast, IntArray(one_axis), true) - - mean_ * mean_; + if (data_format == "NCHW") { + x_cast = reshape(x_cast, {x_dim[0] * groups, -1}); + } else { + int c_div_g = x_dim[rank - 1] / groups; + x_cast = reshape(x_cast, {x_dim[0], -1, groups, c_div_g}); + } + mean_ = mean_decomp(x_cast, c_axis, true); + auto var_tmp_ = + mean_decomp(x_cast * x_cast, c_axis, true) - mean_ * mean_; var_ = maximum(var_tmp_, full(var_tmp_.shape(), 0, var_tmp_.dtype())); auto var_inv = rsqrt(var_ + full(empty_shape, epsilon, var_.dtype())); auto res = (x_cast - mean_) * var_inv; out = reshape(res, x_dim); } - std::vector slice_bias_shape{-1, 1, 1}; + std::vector slice_bias_shape; + slice_bias_shape = {-1}; + for (size_t i = 0; i < rank - 2; i++) { + slice_bias_shape.push_back(1); + } Tensor scale_cast; if (scale) { - scale_cast = reshape(scale.get(), slice_bias_shape); + if (data_format == "NCHW") { + scale_cast = reshape(scale.get(), slice_bias_shape); + } else { + scale_cast = scale.get(); + } if (need_cast) { scale_cast = cast(scale_cast, DataType::FLOAT32); } @@ -905,121 +968,35 @@ std::tuple group_norm_decomp( } Tensor bias_cast; if (bias) { - bias_cast = reshape(bias.get(), slice_bias_shape); + if (data_format == "NCHW") { + bias_cast = reshape(bias.get(), slice_bias_shape); + } else { + bias_cast = bias.get(); + } if (need_cast) { bias_cast = cast(bias_cast, DataType::FLOAT32); } out = out + bias_cast; } Tensor mean_out, var_out; - if (has_dynamic_shape(x.shape())) { + if (has_dynamic_shape(x_cast.shape())) { Tensor x_shape = get_slice(x_dim_t, 0); Tensor dim_1 = full({1}, groups, x_shape.type()); x_shape = concat({x_shape, dim_1}); mean_out = backend::reshape(mean_, x_shape); var_out = backend::reshape(var_, x_shape); } else { - auto x_dim = x.shape(); - std::vector res_shape{x_dim[0], groups}; + std::vector res_shape{x.shape().at(0), groups}; mean_out = reshape(mean_, res_shape); var_out = reshape(var_, res_shape); } if (need_cast) { out = cast(out, org_dtype); } - if (rank == 3) { - out = squeeze(out, {-1}); - } return std::make_tuple(out, mean_out, var_out); } -template -Tensor tile_decomp(const Tensor& x, const IntArray& repeat_times) { - // x.shape = [3,4] repeat_time=(a,b,c) - // shape1 = [1,3,4] - // shape2 = [1,1,1,3,1,4] - // shape3 = [a,1,b,3,c,4] - // shape4 = shape1 -> [a, b*3, c*4] - // t1 = x.reshape(shape1) - // t2 = t1.reshape(shape2) - // t3 = t2.expand(shape3) - // res = t3.reshape(t3) - std::vector repeat_times_ = repeat_times.GetData(); - std::vector shape1 = x.shape(); - auto diff = int64_t(repeat_times_.size()) - int64_t(shape1.size()); - Tensor t1; - if (has_dynamic_shape(shape1)) { - size_t repeat_time_length = repeat_times_.size(); - std::vector unsqueeze_idx2; - if (diff > 0) { - std::vector unsqueeze_idx1(diff); - std::iota(unsqueeze_idx1.begin(), unsqueeze_idx1.end(), 0); - t1 = unsqueeze(x, unsqueeze_idx1); - } else { - t1 = x; - } - auto length2 = t1.dims().size(); - for (size_t i = 0; i < repeat_times_.size(); i++) { - unsqueeze_idx2.push_back(length2 - repeat_times_.size() + i * 2); - } - - Tensor t2 = unsqueeze(t1, unsqueeze_idx2); - std::vector ref_shape(t2.dims().size(), 1); - for (size_t i = 0; i < unsqueeze_idx2.size(); i++) { - ref_shape[unsqueeze_idx2[i]] = repeat_times_[i]; - } - Tensor ref_t = full(ref_shape, 1.0, t2.dtype()); - Tensor t3 = t2 * ref_t; - Tensor origin_shape_t = shape(t1); - std::vector res_s; - for (int64_t i = int64_t(length2) - 1; i >= 0; i--) { - auto relative_idx = - int64_t(repeat_time_length) - 1 - int64_t(length2 - i - 1); - - if (relative_idx >= 0) { - res_s.insert( - res_s.begin(), - get_slice(origin_shape_t, i) * repeat_times_[relative_idx]); - } else { - res_s.insert(res_s.begin(), get_slice(origin_shape_t, i)); - } - } - Tensor s4 = concat(res_s, 0); - return backend::reshape_with_tensor(t3, s4); - - } else { - if (diff > 0) { - for (int64_t i = 0; i < diff; i++) { - shape1.insert(shape1.begin(), 1); - } - } - - auto length = int64_t(shape1.size()); - std::vector shape2 = shape1; - std::vector shape3 = shape1; - std::vector final_shape = shape1; - auto r_length = repeat_times_.size(); - for (size_t j = 0; j < repeat_times_.size(); j++) { - int64_t i = int64_t(j); - - shape2.insert(shape2.begin() + (length - 1 - i), 1); - shape3.insert(shape3.begin() + (length - 1 - i), - repeat_times_[r_length - i - 1]); - - final_shape[length - i - 1] = - final_shape[length - i - 1] * repeat_times_[r_length - i - 1]; - } - - t1 = reshape(x, shape1); - - auto t2 = reshape(t1, shape2); - auto t3 = t2.expand(shape3); - auto res = reshape(t3, final_shape); - return res; - } -} - template Tensor square_decomp(const Tensor& x) { auto org_dtype = x.dtype(); diff --git a/paddle/fluid/primitive/primitive.yaml b/paddle/fluid/primitive/primitive.yaml index 58c3ac09b782a..f5e99706faf97 100644 --- a/paddle/fluid/primitive/primitive.yaml +++ b/paddle/fluid/primitive/primitive.yaml @@ -4,6 +4,7 @@ - divide - elementwise_pow - rsqrt +- sqrt - sin - sinh - asin diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h index 467469106f064..f12626d95257d 100644 --- a/paddle/fluid/primitive/rule/vjp/details.h +++ b/paddle/fluid/primitive/rule/vjp/details.h @@ -69,7 +69,7 @@ void divide_grad(const Tensor& x, Tensor* dy) { if (dy) { // dy = -(x/y^2) * dout - auto dy_res = -(x / y.pow(2.0)) * out_grad; + auto dy_res = -(x / (y * y)) * out_grad; if (out_grad.dims() != y.dims()) { phi::DDim reduce_dim = get_reduce_dims_from_out(out_grad.dims(), y.dims()); @@ -163,8 +163,7 @@ void gelu_grad(const Tensor& x, // Promote to fp32 when the input type is fp16 for keeping consistent with // phi kernel - if (x.dtype() == phi::DataType::FLOAT16 || - x.dtype() == phi::DataType::BFLOAT16) { + if (is_half_dtype(x.dtype())) { auto promoted_x = cast(x, phi::DataType::FLOAT32); auto promoted_out_grad = cast(out_grad, phi::DataType::FLOAT32); if (approximate) { @@ -566,9 +565,7 @@ void layer_norm_grad(const Tensor& x, auto x_sub_mean = x_cast - mean_; // M,N auto tmp = (1.0 / (variance_ + epsilon)); // M,1 - // auto sqrt_var_1 = sqrt(tmp); // M,1 - auto sqrt_var_1 = elementwise_pow( - tmp, full(common::vectorize(tmp.dims()), 0.5, tmp.dtype())); + auto sqrt_var_1 = sqrt(tmp); // M,1 auto x_sub_mean_mul_sqrt_var_1 = x_sub_mean * sqrt_var_1; if (x_grad) { @@ -721,6 +718,15 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { } } +template +void rsqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { + if (x_grad) { + // This calculation is important for resnet. + auto x_grad_tmp = -0.5 * out * out * out * out_grad; + set_output(x_grad_tmp, x_grad); + } +} + template void silu_grad(const Tensor& x, const Tensor& out, @@ -1254,7 +1260,7 @@ void batch_norm_grad(const Tensor& x, auto eps = full(common::vectorize(run_var.dims()), epsilon, run_var.dtype()); mean_data = run_mean; - rsqrt_var = (run_var + eps).pow(-0.5); + rsqrt_var = rsqrt(run_var + eps); } else { mean_data = saved_mean; rsqrt_var = saved_variance; @@ -1513,11 +1519,36 @@ void group_norm_grad(const Tensor& x, // cal d_bias: // d_bias = sum(dy, axes=(0,2,3)) DataLayout data_layout_ = common::StringToDataLayout(data_layout); - if (data_layout_ != DataLayout::kNCHW) { - // TODO(chengyanfu): Subsequent support NHWC + std::vector x_dims = x.shape(); + int rank = x_dims.size(); + if (rank < 3 || rank > 5) { + PADDLE_THROW(phi::errors::Unimplemented( + "Only support NCHW and NHWC format in rank {3, 4, 5}.")); + } + int N = x_dims[0]; + int C; + int hw = 1; + std::vector reduce_axis; + + if (data_layout_ == DataLayout::kNCHW) { + C = x_dims[1]; + for (int i = 2; i < rank; ++i) { + hw *= x_dims[i]; + reduce_axis.push_back(i); + } + } else if (data_layout_ == DataLayout::kNHWC) { + C = x_dims[rank - 1]; + for (int i = 1; i < (rank - 1); ++i) { + hw *= x_dims[i]; + reduce_axis.push_back(i); + } + } else { PADDLE_THROW(phi::errors::InvalidArgument("Unsupported storage order: %s", data_layout)); } + + int g_num = C / groups; + Tensor x_data = x; Tensor out_grad_data = out_grad; @@ -1531,37 +1562,38 @@ void group_norm_grad(const Tensor& x, out_grad_data = cast(out_grad, phi::DataType::FLOAT32); } - std::vector x_dims = common::vectorize(x.dims()); - auto add_axis = std::vector({-1}); - const int N = x_dims[0]; - const int C = x_dims[1]; + auto shape_group = std::vector({N, groups, g_num}); - const int hw = x_dims[2] * x_dims[3]; - const int g_num = C / groups; + std::vector whole_group_shape; + if (data_layout_ == DataLayout::kNCHW) { + whole_group_shape = std::vector({N, groups, g_num, -1}); + } else { + whole_group_shape = std::vector({N, -1, groups, g_num}); + } + auto var_eps = variance + epsilon; - auto reduce_axis = IntArray(std::vector({2, 3})); - auto shape_group = IntArray(std::vector({N, groups, g_num})); - auto whole_group_shape = - IntArray(std::vector({N, groups, g_num, hw})); + auto inv_std = rsqrt(var_eps); - auto scale_ptr = scale.get_ptr(); - auto bias_ptr = bias.get_ptr(); - auto sqrt_element = 1.0 / (variance + epsilon); - auto inv_std = elementwise_pow( - sqrt_element, - full( - common::vectorize(sqrt_element.dims()), 0.5, sqrt_element.dtype())); auto inv_std_mul_s = inv_std / hw / g_num; auto dtype = x_data.dtype(); auto sum_y_grad_mul_x = sum(out_grad_data * x_data, reduce_axis, dtype, false); auto sum_y_grad = sum(out_grad_data, reduce_axis, dtype, false); + + Tensor scale_data; + if (scale) { + scale_data = scale.get(); + } + Tensor bias_data; + if (bias) { + bias_data = bias.get(); + } + if (x_grad) { Tensor d1; Tensor d2; Tensor p1; - if (scale_ptr) { - auto scale_data = scale.get(); + if (scale) { if (scale_data.dtype() == phi::DataType::FLOAT16 || scale_data.dtype() == phi::DataType::BFLOAT16) { scale_data = cast(scale_data, phi::DataType::FLOAT32); @@ -1573,22 +1605,29 @@ void group_norm_grad(const Tensor& x, p1 = reshape(inv_std, std::vector({N, groups, 1})) * reshape(scale_data, std::vector({1, groups, g_num})); } else { - d1 = (reshape(sum_y_grad_mul_x, shape_group)) - .sum(std::vector({2}), dtype, false); - d2 = (reshape(sum_y_grad, shape_group)) - .sum(std::vector({2}), dtype, false); - p1 = (reshape(inv_std, std::vector({N, groups, 1}))) - .expand(IntArray(shape_group)); + d1 = (reshape(sum_y_grad_mul_x, shape_group)).sum({2}, dtype, false); + d2 = (reshape(sum_y_grad, shape_group)).sum({2}, dtype, false); + p1 = (reshape(inv_std, {N, groups, 1})) + .expand(shape_group); // [n, g, g_n] } - auto p2 = (d2 * mean - d1) * (inv_std_mul_s * inv_std * inv_std); + auto p2 = (d2 * mean - d1) * (inv_std_mul_s / var_eps); // [n, g] auto p3 = -p2 * mean - d2 * inv_std_mul_s; - auto first_shape = get_unsqueeze_dims(p1, std::vector({3})); - auto second_shape = get_unsqueeze_dims(p2, std::vector({2, 3})); + std::vector first_shape; + std::vector second_shape; + if (data_layout_ == DataLayout::kNCHW) { + first_shape = get_unsqueeze_dims(p1, {3}); // [n, g, g_n, 1] + second_shape = get_unsqueeze_dims(p2, {2, 3}); // [n, g, 1, 1] + } else { + first_shape = get_unsqueeze_dims(p1, {1}); // [n, 1, g, g_n] + second_shape = get_unsqueeze_dims(p2, {1, 3}); // [n, 1, g, 1] + } + p1 = reshape(p1, first_shape); p2 = reshape(p2, second_shape); p3 = reshape(p3, second_shape); - auto tmp_1 = reshape(out_grad_data, whole_group_shape) * p1; + auto tmp_1 = + reshape(out_grad_data, whole_group_shape) * p1; // [n, hw, g, g_n] auto tmp_2 = reshape(x_data, whole_group_shape) * p2 + p3; auto x_grad_data = tmp_1 + tmp_2; x_grad_data = reshape(x_grad_data, x.shape()); @@ -1599,29 +1638,24 @@ void group_norm_grad(const Tensor& x, set_output(x_grad_data, x_grad); } + if (scale_grad) { - if (scale_ptr) { - auto third_shape = get_unsqueeze_dims(mean, std::vector({2})); + if (scale) { + auto third_shape = get_unsqueeze_dims(mean, {2}); auto tmp1 = (reshape(sum_y_grad_mul_x, shape_group) - reshape(sum_y_grad, shape_group) * reshape(mean, third_shape)) * reshape(inv_std, third_shape); - auto scale_grad_tmp = reshape( - tmp1.sum(std::vector({0}), scale_ptr->dtype(), false), - IntArray(std::vector({C}))); + auto scale_grad_tmp = + reshape(tmp1.sum({0}, scale->dtype(), false), {C}); set_output(scale_grad_tmp, scale_grad); - } else { - scale_grad = nullptr; } } if (bias_grad) { - if (bias_ptr) { - auto bias_grad_tmp = - sum_y_grad.sum(std::vector({0}), bias_ptr->dtype(), false); + if (bias) { + auto bias_grad_tmp = sum_y_grad.sum({0}, bias->dtype(), false); set_output(bias_grad_tmp, bias_grad); - } else { - bias_grad = nullptr; } } } diff --git a/paddle/fluid/primitive/utils/utils.h b/paddle/fluid/primitive/utils/utils.h index c67886bc2ed2c..42f1533db723e 100644 --- a/paddle/fluid/primitive/utils/utils.h +++ b/paddle/fluid/primitive/utils/utils.h @@ -138,7 +138,7 @@ static phi::DDim get_reduce_dims_from_out(const phi::DDim& dout_dims, result.push_back(i); } for (int i = 0; i < in_dims.size(); ++i) { - if (in_dims[i] == 1 && dout_dims[i] != 1) { + if (in_dims[i] == 1 && dout_dims[i + bat] != 1) { result.push_back(i + bat); } else { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index ecf95eb234972..0a32e0ea8f9ff 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -41,6 +41,7 @@ set(PYBIND_DEPS op_dialect_vjp program_translator pir_transforms + pir_save_load new_profiler fluid_jit prim_utils @@ -399,11 +400,11 @@ if(WITH_PYTHON) list(APPEND OP_IMPL_DEPS ${op_impl_path}/openblas.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/openblas.dll) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_custom_command( OUTPUT ${op_impl_path}/mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${op_impl_path} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) endif() @@ -474,12 +475,12 @@ if(WITH_PYTHON) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) endif() - if(WITH_MKLDNN) + if(WITH_ONEDNN) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0 COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS mkldnn) + DEPENDS onednn) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) endif() diff --git a/paddle/fluid/pybind/dist_api.cc b/paddle/fluid/pybind/dist_api.cc index 44feb061438e8..fd4066682161e 100644 --- a/paddle/fluid/pybind/dist_api.cc +++ b/paddle/fluid/pybind/dist_api.cc @@ -17,6 +17,8 @@ #include "paddle/fluid/pir/dialect/distributed/ir/dist_api.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h" +#include "paddle/fluid/pir/dialect/distributed/transforms/dist_to_dense_pass.h" +#include "paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h" #include "paddle/fluid/pybind/dist_api.h" #include "paddle/fluid/pybind/dist_static_op_function.h" #include "paddle/phi/core/enforce.h" @@ -105,6 +107,11 @@ void BindDistOpsAPI(pybind11::module *module) { } } +void BindDistPassAPI(pybind11::module *module) { + module->def("apply_mix2dist_pass", paddle::dialect::MixToDistPass); + module->def("apply_dist2dense_pass", paddle::dialect::DistToDensePass); +} + void BindOpsFunction(py::module *m) { m->def("reshard_v2", [](const pir::Value &x, const TensorDistAttribute &dist_attr) { @@ -116,6 +123,7 @@ void BindDistApi(pybind11::module *module) { auto ir_module = module->def_submodule("pir"); BindOperationDistAttribute(&ir_module); BindTensorDistAttribute(&ir_module); + BindDistPassAPI(&ir_module); auto ops_modules = ir_module.def_submodule("ops"); BindDistOpsAPI(&ops_modules); BindOpsFunction(&ops_modules); diff --git a/paddle/fluid/pybind/dist_static_op_function.h b/paddle/fluid/pybind/dist_static_op_function.h index afd71b7521567..c23a16bca2730 100644 --- a/paddle/fluid/pybind/dist_static_op_function.h +++ b/paddle/fluid/pybind/dist_static_op_function.h @@ -18,6 +18,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/exception.h" +#include "paddle/fluid/pybind/pir.h" #include "paddle/phi/core/enforce.h" namespace paddle { @@ -66,12 +67,42 @@ static PyObject *static_api_reshard(PyObject *self, PyObject *process_mesh_obj = PyTuple_GET_ITEM(args, 1); auto process_mesh = CastPyArg2ProcessMesh(process_mesh_obj, 1); - PyObject *dims_mapping_obj = PyTuple_GET_ITEM(args, 2); - auto dims_mapping = CastPyArg2VectorOfInt64(dims_mapping_obj, 2); + PyObject *placements_obj = PyTuple_GET_ITEM(args, 2); + auto placements = CastPyArg2VectorOfPlacement(placements_obj, 2); + + int64_t ndim = GetValueDims(input).size(); + std::vector dim_map(ndim, -1); + for (size_t i = 0; i < placements.size(); i++) { + auto &placement = placements[i]; + if (placement->is_shard()) { + auto shard_dim = + dynamic_cast(*placement).get_dim(); + PADDLE_ENFORCE_EQ( + dim_map[shard_dim], + -1, + common::errors::InvalidArgument( + "Tensor dim %lld is already sharded on mesh dim %lld," + " DistTensor operator implementation does not support things " + "like hybrid" + " sharding strategies yet (i.e. [Shard(0), Shard(0)])", + shard_dim, + dim_map[shard_dim])); + dim_map[shard_dim] = i; + } + } + paddle::flat_hash_map partial_status; + for (size_t i = 0; i < placements.size(); ++i) { + auto &p = placements[i]; + if (p->is_partial()) { + partial_status.insert( + {i, + dynamic_cast(*p).get_reduce_type()}); + } + } // Call ir static api auto static_api_out = - paddle::dialect::reshard(input, process_mesh, dims_mapping); + paddle::dialect::reshard(input, process_mesh, dim_map, partial_status); return ToPyObject(static_api_out); } catch (...) { diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index daaac0c20e780..fb4235f619e99 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -478,9 +478,11 @@ PyObject* pylayer_method_apply(PyObject* cls, for (size_t i = 0; i < inputs_autograd_meta.size(); i++) { if (ctx->forward_input_tensor_is_duplicable[i]) { + std::vector tmp; for (auto t : inputs_tensor[i]) { - grad_node->SetGradOutMeta(*t, i); + tmp.push_back(t); } + grad_node->SetGradOutMeta(tmp, i); } else { grad_node->SetGradOutMeta(*inputs_tensor[i][0], i); } @@ -490,9 +492,7 @@ PyObject* pylayer_method_apply(PyObject* cls, if (ctx->forward_output_tensor_is_duplicable[i]) { egr::EagerUtils::SetOutRankWithSlot(&outputs_autograd_meta[i], i); egr::EagerUtils::SetHistory(&outputs_autograd_meta[i], grad_node); - for (auto t : outputs_tensor[i]) { - grad_node->SetGradInMeta(*t, i); - } + grad_node->SetGradInMeta(outputs_tensor[i], i); } else { egr::EagerUtils::SetOutRankWithSlot(outputs_autograd_meta[i][0], i); egr::EagerUtils::SetHistory(outputs_autograd_meta[i][0], grad_node); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index aba7c99662bbe..48f0168196949 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -32,7 +32,6 @@ limitations under the License. */ #include "paddle/fluid/jit/function.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/operators/py_func_op.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/utils/utils.h" #include "paddle/fluid/platform/enforce.h" @@ -47,6 +46,7 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/placement_types.h" #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/pir/include/core/attribute.h" COMMON_DECLARE_bool(check_nan_inf); @@ -2478,6 +2478,7 @@ PyObjectHolder::PyObjectHolder(PyObject* ptr) { ptr_ = ptr; } PyObjectHolder::~PyObjectHolder() { // NOLINT ::pybind11::gil_scoped_acquire gil; + // NOTE(deepllz): ptr_ is owned by this object, so release it in destructor. Py_XDECREF(ptr_); } @@ -2512,7 +2513,10 @@ std::shared_ptr PackHook::operator()( bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); PyTuple_SET_ITEM(args, 0, paddle::pybind::ToPyObject(tensor)); PyObject* ret = PyObject_Call(hook_, args, nullptr); PADDLE_ENFORCE_NOT_NULL(ret, @@ -2527,7 +2531,10 @@ void* PackHook::operator()(void* py_tensor) { bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); Py_INCREF(reinterpret_cast(py_tensor)); PyTuple_SET_ITEM(args, 0, reinterpret_cast(py_tensor)); PyObject* ret = PyObject_Call(hook_, args, nullptr); @@ -2551,13 +2558,20 @@ paddle::Tensor UnPackHook::operator()( bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); - Py_INCREF(reinterpret_cast(packed_value->get())); - PyTuple_SET_ITEM(args, 0, reinterpret_cast(packed_value->get())); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); + PyObject* py_packed_value = reinterpret_cast(packed_value->get()); + Py_INCREF(py_packed_value); + PyTuple_SET_ITEM(args, 0, py_packed_value); PyObject* ret = PyObject_Call(hook_, args, nullptr); PADDLE_ENFORCE_NOT_NULL(ret, paddle::platform::errors::External( pybind11::detail::error_string().c_str())); + // NOTE(deepllz): tupledealloc will cause the reference count of the objects + // in it to be decremented by one, so no need to call + // Py_XDECREF(py_packed_value) Py_XDECREF(args); egr::Controller::Instance().SetHasGrad(grad_tmp); @@ -2576,7 +2590,10 @@ void* UnPackHook::operator()(void* packed_value, void* other) { bool grad_tmp = egr::Controller::Instance().HasGrad(); egr::Controller::Instance().SetHasGrad(false); ::pybind11::gil_scoped_acquire gil; - auto args = PyTuple_New(1); + PyObject* args = PyTuple_New(1); + PADDLE_ENFORCE_NOT_NULL(args, + paddle::platform::errors::External( + pybind11::detail::error_string().c_str())); Py_INCREF(reinterpret_cast(packed_value)); PyTuple_SET_ITEM(args, 0, reinterpret_cast(packed_value)); PyObject* ret = PyObject_Call(hook_, args, nullptr); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index b70efdbabbebc..55173bad9a1c8 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -54,7 +54,6 @@ limitations under the License. */ #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/xccl_context.h" #include "paddle/fluid/memory/allocation/mmap_allocator.h" -#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/pybind/cuda_streams_py.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/pybind_variant_caster.h" @@ -62,6 +61,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/pybind/uva_utils.h" #include "paddle/phi/core/compat/arg_map_context.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/type_defs.h" namespace paddle { diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 2996133948cc6..457bc649f98d1 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -1225,8 +1225,8 @@ void BindPaddleInferPredictor(py::module *m) { .def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory) .def("clear_intermediate_tensor", &paddle_infer::Predictor::ClearIntermediateTensor) - .def("register_output_hook", - &paddle_infer::Predictor::RegisterOutputHook); + .def("register_output_hook", &paddle_infer::Predictor::RegisterOutputHook) + .def("register_input_hook", &paddle_infer::Predictor::RegisterInputHook); } void BindZeroCopyTensor(py::module *m) { diff --git a/paddle/fluid/pybind/io.cc b/paddle/fluid/pybind/io.cc index 9075e904ef4b8..d38dbf72643ce 100644 --- a/paddle/fluid/pybind/io.cc +++ b/paddle/fluid/pybind/io.cc @@ -17,6 +17,8 @@ limitations under the License. */ #include "paddle/fluid/framework/io/save_load_tensor.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" +#include "paddle/fluid/pir/serialize_deserialize/include/interface.h" +#include "paddle/fluid/pir/serialize_deserialize/include/save_load_parameters.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/pybind/pybind_variant_caster.h" #include "paddle/utils/pybind.h" @@ -122,6 +124,24 @@ void BindIO(pybind11::module *m) { paddle::framework::LoadTensor(path, &tensor_load); return tensor_load; }); + + m->def("save_func", &pir::SaveFunction); + + m->def("save_combine_func", &pir::SaveCombineFunction); + + m->def("load_func", &pir::LoadFunction); + + m->def("load_combine_func", &pir::LoadCombineFunction); + + m->def("serialize_pir_program", + &pir::WriteModule, + py::arg("program"), + py::arg("file_path"), + py::arg("pir_version"), + py::arg("overwrite") = true, + py::arg("readable") = false, + py::arg("trainable") = true); + m->def("deserialize_pir_program", &pir::ReadModule); } } // namespace pybind } // namespace paddle diff --git a/paddle/fluid/pybind/manual_static_op_function.h b/paddle/fluid/pybind/manual_static_op_function.h index 7767c4a4569b3..8943633fb4cda 100644 --- a/paddle/fluid/pybind/manual_static_op_function.h +++ b/paddle/fluid/pybind/manual_static_op_function.h @@ -159,7 +159,7 @@ PyObject *static_api_full(PyObject *self, PyObject *args, PyObject *kwargs) { CallStackRecorder callstack_recoder("full_with_tensor"); callstack_recoder.Record(); auto static_api_out = - paddle::dialect::full_with_tensor(shape, value, dtype); + paddle::dialect::full_with_tensor(value, shape, dtype); callstack_recoder.AttachToOps(); return ToPyObject(static_api_out); diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc index 2568e5eef4c5e..4176ecf0bbcbb 100644 --- a/paddle/fluid/pybind/pir.cc +++ b/paddle/fluid/pybind/pir.cc @@ -32,7 +32,6 @@ #include "paddle/fluid/pir/dialect/distributed/ir/dist_attribute.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_dialect.h" #include "paddle/fluid/pir/dialect/distributed/ir/dist_type.h" -#include "paddle/fluid/pir/dialect/distributed/transforms/mix_to_dist_pass.h" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_type.h" #include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h" #include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" @@ -72,6 +71,7 @@ #ifdef PADDLE_WITH_CINN #include "paddle/cinn/hlir/dialect/operator/ir/op_dialect.h" #include "paddle/cinn/hlir/dialect/operator/transforms/add_cinn_pass.h" +#include "paddle/cinn/hlir/dialect/operator/transforms/check_infer_symbolic_util.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" #endif @@ -301,6 +301,15 @@ void BindProgram(py::module *m) { [](std::shared_ptr self, int64_t random_seed) { SetProgramInt64Attr(self, "random_seed", random_seed); }) + .def_property_readonly( + "blocks", + [](const std::shared_ptr &self) { + // Note: We only return global block currently. + py::list op_list; + op_list.append(self->block()); + return op_list; + }, + return_value_policy::reference) .def("get_output_value_by_name", [](Program &self, const std::string &name) { return GetOutputValueByName(self, name); @@ -638,6 +647,10 @@ void BindOperation(py::module *m) { "callstack", [](Operation &self) -> py::list { py::list callstack_list; + if (!self.HasAttribute(paddle::framework::OpProtoAndCheckerMaker:: + OpCreationCallstackAttrName())) { + return callstack_list; + } pir::Attribute op_callstack = self.attribute( paddle::framework::OpProtoAndCheckerMaker:: OpCreationCallstackAttrName()); @@ -675,14 +688,19 @@ void BindOperation(py::module *m) { pir::ArrayAttribute::get(pir::IrContext::Instance(), op_callstack_infos)); }) - .def("dist_attr", [](Operation &self) { - if (self.HasAttribute(kAttrOpDistAttr)) { - return self.attribute(kAttrOpDistAttr); - } else { - PADDLE_THROW( - phi::errors::InvalidArgument("dist_attr is only for dist op.")); - } - }); + .def_property( + "dist_attr", + [](Operation &self) { + if (self.HasAttribute(kAttrOpDistAttr)) { + return self.attribute(kAttrOpDistAttr); + } else { + PADDLE_THROW(phi::errors::InvalidArgument( + "dist_attr is only for dist op.")); + } + }, + [](Operation &self, OperationDistAttribute op_dist_attr) { + self.set_attribute(kAttrOpDistAttr, op_dist_attr); + }); py::class_ block_container( *m, "Operation_BlockContainer", R"DOC( The Operation_BlockContainer only use to walk all blocks in the operation. @@ -1072,6 +1090,131 @@ void range_block_do(const Block *block, std::vector range, F fn) { } } +template +bool ExistsInMapValues(const std::map &m, V value) { + for (const auto &[k, v] : m) { + if (v == value) { + return true; + } + } + return false; +} + +std::map GetOpInplaceInfo(const pir::Operation *op) { + std::map inplace_info; + if (!op->HasTrait()) { + return inplace_info; + } + pir::IrContext *ctx = pir::IrContext::Instance(); + std::string op_name = op->name(); + if (op->attributes().count("op_name")) { + op_name = + op->attributes().at("op_name").dyn_cast().AsString(); + } + + pir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_name); + paddle::dialect::OpYamlInfoParser yaml_parser( + op_info.GetInterfaceImpl() + ->get_op_info_(op_name), + paddle::dialect::IsLegacyOp(op_name)); + + for (size_t i = 0; i < op->num_results(); ++i) { + std::string value_name = yaml_parser.OutputNames()[i]; + if (yaml_parser.HasInplace(value_name)) { + const std::string &inplace_name = yaml_parser.InplaceName(value_name); + inplace_info[i] = yaml_parser.InputName2Id().at(inplace_name); + } + if (yaml_parser.HasView(value_name)) { + const std::string &view_name = yaml_parser.ViewName(value_name); + inplace_info[i] = yaml_parser.InputName2Id().at(view_name); + } + } + + return inplace_info; +} + +std::vector> GetOpInplaceChains(const Block *block) { + std::vector> inplace_chains; + std::map value_to_inplace_chain_index; + + for (auto &op : *block) { + pir::Walk(&op, [&](Operation *inner_op) { + auto op_inplace_info = GetOpInplaceInfo(inner_op); + for (auto &[out_idx, in_idx] : op_inplace_info) { + auto target_value = inner_op->results()[out_idx]; + auto source_value = inner_op->operands()[in_idx].source(); + VLOG(8) << "Inplace Mapping: " << Value2String(source_value) << " -> " + << Value2String(target_value); + + if (value_to_inplace_chain_index.count(source_value) == 0 && + value_to_inplace_chain_index.count(target_value) == 0) { + size_t chain_insertion_idx = inplace_chains.size(); + inplace_chains.push_back({source_value, target_value}); + value_to_inplace_chain_index.insert( + {source_value, chain_insertion_idx}); + value_to_inplace_chain_index.insert( + {target_value, chain_insertion_idx}); + } else { + PADDLE_ENFORCE_NE( + value_to_inplace_chain_index.count(source_value), + 0, + phi::errors::Unavailable("source value should be in the chain")); + PADDLE_ENFORCE_EQ(value_to_inplace_chain_index.count(target_value), + 0, + phi::errors::Unavailable( + "target value should not be in the chain")); + size_t chain_insertion_idx = + value_to_inplace_chain_index[source_value]; + inplace_chains[chain_insertion_idx].push_back(target_value); + value_to_inplace_chain_index.insert( + {target_value, chain_insertion_idx}); + } + } + }); + } + return inplace_chains; +} + +std::optional FindInplaceSource( + const std::vector> inplace_chains, + pir::Value value) { + if (value.impl() == nullptr) { + return std::nullopt; + } + for (auto &chain : inplace_chains) { + for (auto &v : chain) { + if (v == value) { + return chain[0]; + } + } + } + return std::nullopt; +} + +std::map ReplaceValueWithInplaceSource( + const std::vector> &source_domain, + std::vector *target_values, + const std::vector> inplace_chains) { + std::map replacements; + for (auto &target_value : *target_values) { + auto inplace_source = FindInplaceSource(inplace_chains, target_value); + if (!inplace_source.has_value()) { + continue; + } + for (auto &source_values : source_domain) { + if (std::find(source_values.begin(), + source_values.end(), + inplace_source.value()) != source_values.end()) { + VLOG(4) << "Replace " << Value2String(target_value) << " with " + << Value2String(inplace_source.value()); + replacements.insert({target_value, inplace_source.value()}); + target_value = inplace_source.value(); + } + } + } + return replacements; +} + std::pair, std::unordered_set> AnalysisMiddleVariable(const Program &program, const std::vector &forward_inputs, @@ -1145,6 +1288,10 @@ static auto GetNoNeedBufferValue(const ::pir::Block *whole_block, std::unordered_set<::pir::Value> no_need_buffer_values; range_block_do( whole_block, range, [&need_buffer_values](::pir::Operation *op) { + // NOTE(SigureMo): We should process the CombineOp in it's users. + if (op->isa()) { + return; + } if (op->HasInterface() == false) { // not a OpYamlInfoInterface, can't have no_need_buffer. for (const auto &operand : op->operands_source()) { @@ -1155,8 +1302,16 @@ static auto GetNoNeedBufferValue(const ::pir::Block *whole_block, op->dyn_cast().GetOpInfo(); int counter = 0; for (const auto &op_input_info : std::get<0>(opinfo)) { + auto value = op->operand_source(counter); if (!op_input_info.no_need_buffer) { - need_buffer_values.insert(op->operand_source(counter)); + need_buffer_values.insert(value); + if (!IsFakeValue(value) && value.defining_op() && + value.defining_op()->isa()) { + for (const auto &combine_value : + value.defining_op()->operands_source()) { + need_buffer_values.insert(combine_value); + } + } } counter += 1; } @@ -1255,10 +1410,26 @@ SplitedResult SplitForwardBackward( pir::IrContext *ctx = pir::IrContext::Instance(); auto forward_program = std::make_shared(ctx); auto backward_program = std::make_shared(ctx); + std::vector forward_outputs_mutable = forward_outputs; std::vector middle_values; std::unordered_set backward_inputs; + const auto &inplace_chains = GetOpInplaceChains(program.block()); std::tie(middle_values, backward_inputs) = AnalysisMiddleVariable( program, forward_in_out_values, forward_range, backward_range); + + // Replace inplace value with source value. + // NOTE(SigureMo): Why not process inplace value for forward_inputs in + // forward? + // Because all forward_inputs uses data op, after lower to kernel + // pass, the data op will following a non-inplace op shadow_feed, so we don't + // need to process inplace for forward_inputs in forward. + // Same reason for whole backward program, because all backward inputs are + // created by block kwargs, it also add a shadow_feed op after lower to kernel + // pass. + auto replacement_for_forward_middles = ReplaceValueWithInplaceSource( + {forward_params}, &middle_values, inplace_chains); + auto replacement_for_forward_outputs = ReplaceValueWithInplaceSource( + {forward_params}, &forward_outputs_mutable, inplace_chains); pir::Block &backward_block = *backward_program->block(); bool has_backward = (backward_range[1] > backward_range[0]); @@ -1283,8 +1454,13 @@ SplitedResult SplitForwardBackward( auto create_kwarg_fn = [&backward_block, &backward_inputs, &backward_value_map, + &replacement_for_forward_middles, + &replacement_for_forward_outputs, &counter](const pir::Value &v) { - if (v && backward_inputs.count(v)) { + if (v && !backward_value_map.count(v) && + (backward_inputs.count(v) || + ExistsInMapValues(replacement_for_forward_middles, v) || + ExistsInMapValues(replacement_for_forward_outputs, v))) { backward_value_map[v] = backward_block.AddKwarg( "input_" + std::to_string(counter++), v.type()); } @@ -1293,10 +1469,19 @@ SplitedResult SplitForwardBackward( auto create_output_fn_forward = [&ctx, &forward_value_map, &counter, - &forward_program](const pir::Value &v) { + &forward_program, + &forward_inputs, + &forward_params](const pir::Value &v) { if (v.impl() == nullptr) { return; } + // Skip the value that already in forward_inputs or forward_params. + if (std::find(forward_inputs.begin(), forward_inputs.end(), v) != + forward_inputs.end() || + std::find(forward_params.begin(), forward_params.end(), v) != + forward_params.end()) { + return; + } // NOTE(Aurelius84): we should skip insert ShadowOutputOp repeatedly by // calling SplitForwardBackward multi-times. std::string shadow_output_name = @@ -1350,14 +1535,14 @@ SplitedResult SplitForwardBackward( counter += 1; }; - // counter = 0; if (has_backward) { VLOG(4) << "start create backward inputs, creating keyword argument."; VLOG(4) << "Create keyword argument for backward program: fo, start with input_" << counter; - std::for_each( - forward_outputs.begin(), forward_outputs.end(), create_kwarg_fn); + std::for_each(forward_outputs_mutable.begin(), + forward_outputs_mutable.end(), + create_kwarg_fn); VLOG(4) << "Create keyword argument for backward program: fx, start with input_" << counter; @@ -1380,14 +1565,27 @@ SplitedResult SplitForwardBackward( create_kwarg_fn); VLOG(4) << "Create keyword argument for backward program end. input_" << counter; + + // Update the value map with inplace source value. + VLOG(4) << "start update inplace names"; + VLOG(4) << "replacement_for_forward_middles size is: " + << replacement_for_forward_middles.size(); + for (auto &[target, source] : replacement_for_forward_middles) { + backward_value_map[target] = backward_value_map.at(source); + } + VLOG(4) << "replacement_for_forward_outputs size is: " + << replacement_for_forward_outputs.size(); + for (auto &[target, source] : replacement_for_forward_outputs) { + backward_value_map[target] = backward_value_map.at(source); + } } - // counter = 0; VLOG(4) << "start create forward outputs, inserting set_parameter ops."; std::for_each( middle_values.begin(), middle_values.end(), create_output_fn_forward); - std::for_each( - forward_outputs.begin(), forward_outputs.end(), create_output_fn_forward); + std::for_each(forward_outputs_mutable.begin(), + forward_outputs_mutable.end(), + create_output_fn_forward); // Step2. copy backward ops . VLOG(4) << "start copy backward ops"; @@ -1398,7 +1596,6 @@ SplitedResult SplitForwardBackward( auto *cloned_op = op->Clone(backward_mapper, clone_options); backward_program->block()->push_back(cloned_op); }); - // counter = 0; VLOG(4) << "start create backward outputs, inserting set_parameter ops."; if (has_backward) { std::for_each(forward_inputs_grads.begin(), @@ -1423,20 +1620,20 @@ SplitedResult SplitForwardBackward( // construct all attributes we needed. - mapping_value(middle_values, forward_value_map, fm); // write 'fm' - mapping_value(middle_values, backward_value_map, bm); // write 'bm' - mapping_value(forward_inputs, forward_value_map, fx); // write 'fx' - mapping_value(forward_inputs, backward_value_map, bx); // write 'bx' - mapping_value(forward_params, forward_value_map, fp); // write 'fp' - mapping_value(forward_params, backward_value_map, bp); // write 'bp' - mapping_value(forward_outputs, forward_value_map, fo); // write 'fo' + mapping_value(middle_values, forward_value_map, fm); // write 'fm' + mapping_value(middle_values, backward_value_map, bm); // write 'bm' + mapping_value(forward_inputs, forward_value_map, fx); // write 'fx' + mapping_value(forward_inputs, backward_value_map, bx); // write 'bx' + mapping_value(forward_params, forward_value_map, fp); // write 'fp' + mapping_value(forward_params, backward_value_map, bp); // write 'bp' + mapping_value(forward_outputs_mutable, forward_value_map, fo); // write 'fo' mapping_value( forward_inputs_grads, backward_value_map, bx_g); // write 'bx_g' mapping_value( forward_params_grads, backward_value_map, bp_g); // write 'bp_g' mapping_value( - forward_outputs_grads, backward_value_map, bo_g); // write 'bo_g' - mapping_value(forward_outputs, backward_value_map, bo); // write 'bo' + forward_outputs_grads, backward_value_map, bo_g); // write 'bo_g' + mapping_value(forward_outputs_mutable, backward_value_map, bo); // write 'bo' mapping_value(GetNoNeedBufferValue(program.block(), backward_range), forward_value_map, no_need_buffer_values); // write 'no_need_buffers' @@ -1502,39 +1699,6 @@ void ResetShadowOutputName(pir::Operation *op, const std::string &name) { } } -std::map GetOpInplaceInfo(const pir::Operation *op) { - std::map inplace_info; - if (!op->HasTrait()) { - return inplace_info; - } - pir::IrContext *ctx = pir::IrContext::Instance(); - std::string op_name = op->name(); - if (op->attributes().count("op_name")) { - op_name = - op->attributes().at("op_name").dyn_cast().AsString(); - } - - pir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_name); - paddle::dialect::OpYamlInfoParser yaml_parser( - op_info.GetInterfaceImpl() - ->get_op_info_(op_name), - paddle::dialect::IsLegacyOp(op_name)); - - for (size_t i = 0; i < op->num_results(); ++i) { - std::string value_name = yaml_parser.OutputNames()[i]; - if (yaml_parser.HasInplace(value_name)) { - const std::string &inplace_name = yaml_parser.InplaceName(value_name); - inplace_info[i] = yaml_parser.InputName2Id().at(inplace_name); - } - if (yaml_parser.HasView(value_name)) { - const std::string &view_name = yaml_parser.ViewName(value_name); - inplace_info[i] = yaml_parser.InputName2Id().at(view_name); - } - } - - return inplace_info; -} - void BindUtils(pybind11::module *m) { m->def("clone_program", CloneProgram); m->def("get_op_inplace_info", GetOpInplaceInfo); @@ -1702,33 +1866,34 @@ void BindUtils(pybind11::module *m) { >>> print(mappings) {'matmul_v2_0.tmp_0': [Value(define_op_name=pd_op.matmul, index=0, dtype=builtin.tensor<4x4xf32>)], 'x': [Value(define_op_name=pd_op.data, index=0, dtype=builtin.tensor<4x4xf32>)], 'tanh_0.tmp_0': [Value(define_op_name=pd_op.tanh, index=0, dtype=builtin.tensor<4x4xf32>)], 'elementwise_add_0': [Value(define_op_name=pd_op.add, index=0, dtype=builtin.tensor<4x4xf32>)]} )DOC"); - - m->def("clear_cinn_compilation_cache", - []() { + m->def("clear_cinn_compilation_cache", []() { #ifdef PADDLE_WITH_CINN - pybind11::gil_scoped_release release; - VLOG(4) << "clear CINN CompilationCache and free BackendResource."; - cinn::hlir::framework::CompilationCache::Instance().Clear(); + pybind11::gil_scoped_release release; + VLOG(4) << "clear CINN CompilationCache and free BackendResource."; + cinn::hlir::framework::CompilationCache::Instance().Clear(); #endif - }), - m->def("apply_mix2dist_pass", paddle::dialect::MixToDistPass); + }); } namespace { +#ifdef PADDLE_WITH_CINN +std::shared_ptr CreatePassManager() { + pir::IrContext *ctx = pir::IrContext::Instance(); + ctx->GetOrRegisterDialect(); + ctx->GetOrRegisterDialect(); + ctx->GetOrRegisterDialect(); + auto pass_manager = std::make_shared(ctx); + if (FLAGS_print_ir) { + pass_manager->EnableIRPrinting(); + } + return pass_manager; +} +#endif + void ApplyCinnPass(Program &program) { // NOLINT #ifdef PADDLE_WITH_CINN - cinn::dialect::ir::ApplyCinnPass(&program, [] { - pir::IrContext *ctx = pir::IrContext::Instance(); - ctx->GetOrRegisterDialect(); - ctx->GetOrRegisterDialect(); - ctx->GetOrRegisterDialect(); - auto pass_manager = std::make_shared(ctx); - if (FLAGS_print_ir) { - pass_manager->EnableIRPrinting(); - } - return pass_manager; - }); + cinn::dialect::ir::ApplyCinnPass(&program, CreatePassManager); #else PADDLE_THROW(common::errors::Unimplemented( "Currently we only support CINN Pass for Pir under @to_static, please " @@ -1736,6 +1901,14 @@ void ApplyCinnPass(Program &program) { // NOLINT #endif } +void CheckInferSymbolicIfNeed(Program &program) { // NOLINT +#ifdef PADDLE_WITH_CINN + cinn::dialect::ir::CheckInferSymbolicIfNeed(&program, CreatePassManager); +#else + // Do nothing. +#endif +} + } // namespace void InferSymbolicShapePass( @@ -1751,6 +1924,7 @@ void InferSymbolicShapePass( void BindIrPass(pybind11::module *m) { m->def("apply_cinn_pass", ApplyCinnPass); + m->def("check_infer_symbolic_if_need", CheckInferSymbolicIfNeed); m->def("infer_symbolic_shape_pass", InferSymbolicShapePass); py::class_> pass(*m, @@ -1781,8 +1955,26 @@ void BindPassManager(pybind11::module *m) { }), py::arg("opt_level") = 2) .def("add_pass", - [](PassManager &self, const std::string &pass_name) { - self.AddPass(pir::PassRegistry::Instance().Get(pass_name)); + [](PassManager &self, + const std::string &pass_name, + const std::unordered_map attrs = {}) { + auto pass = pir::PassRegistry::Instance().Get(pass_name); + for (const auto &attr : attrs) { + if (py::isinstance(attr.second)) { + pass->Set(attr.first, + new std::string(attr.second.cast())); + } else if (py::isinstance(attr.second)) { + pass->Set(attr.first, new bool(attr.second.cast())); + } else if (py::isinstance(attr.second)) { + pass->Set(attr.first, new int(attr.second.cast())); + } else if (py::isinstance(attr.second)) { + pass->Set(attr.first, new float(attr.second.cast())); + } else { + PADDLE_THROW(phi::errors::InvalidArgument( + "The pass attr is not supported this type.")); + } + } + self.AddPass(std::move(pass)); }) .def("passes", [](PassManager &self) { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5470f4d7ec4f2..35d1a297720b4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -190,6 +190,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_CINN +#include "paddle/cinn/pybind/bind.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/pybind/test.h" #endif @@ -405,6 +406,10 @@ bool SupportsInt8() { #endif } +bool SupportsAvx512F() { + return phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx512f); +} + bool SupportsVNNI() { #ifndef PADDLE_WITH_DNNL return false; @@ -2153,6 +2158,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); m.def("supports_int8", SupportsInt8); + m.def("supports_avx512f", SupportsAvx512F); m.def("supports_vnni", SupportsVNNI); m.def("op_supported_infos", imperative::OpSupportedInfos); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); @@ -3053,6 +3059,7 @@ All parameter, weight, gradient are variables in Paddle. #if defined(PADDLE_WITH_CINN) BindTest(&m); + cinn::pybind::BindCINN(&m); #endif BindPir(&m); diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index c66cd9d0dc81f..bf3d025b228cc 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -859,7 +859,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") #endif .def("_share_filename", - [](phi::DenseTensor &self) { + [](phi::DenseTensor &self, bool use_file_descriptor) { if (!self.IsInitialized() || self.numel() == 0) throw std::runtime_error( "Tensor not initialized or numel is 0. could not pass to " @@ -886,6 +886,10 @@ void BindTensor(pybind11::module &m) { // NOLINT int flags = memory::allocation::MAPPED_SHAREDMEM | memory::allocation::MAPPED_EXCLUSIVE; + if (use_file_descriptor) { + flags = flags | memory::allocation::MAPPED_KEEPFD | + memory::allocation::MAPPED_UNLINK; + } std::string handle = memory::allocation::GetIPCName(); int find_id = -1; if (FLAGS_use_shm_cache) { @@ -894,9 +898,10 @@ void BindTensor(pybind11::module &m) { // NOLINT if (find_id != -1) { handle = memory::allocation::MemoryMapAllocationPool::Instance().GetById(find_id).file_name_; // NOLINT } + int shared_fd = -1; auto shared_holder = memory::allocation::AllocateRefcountedMemoryMapAllocation( - handle, flags, data_size, find_id); + handle, shared_fd, flags, data_size, find_id); // copy data & reset holder if (platform::is_cuda_pinned_place(holder->place())) { @@ -914,8 +919,10 @@ void BindTensor(pybind11::module &m) { // NOLINT int type_idx = static_cast(self.type()); return py::make_tuple(mmap_allocation->ipc_name(), + mmap_allocation->shared_fd(), mmap_allocation->size(), type_idx, - common::vectorize(self.dims()), self.lod()); + common::vectorize(self.dims()), self.lod(), + use_file_descriptor); }, R"DOC( Serialize CPU lod tensor in shared memory to tuple. @@ -935,30 +942,37 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def("_new_shared_filename", [](py::tuple t) { // __setstate__ - if (t.size() != 5) + if (t.size() != 7) throw std::runtime_error("Invalid Tensor meta info state!"); phi::DenseTensor tensor; // 2. Rebuild Allocation const std::string &ipc_name = t[0].cast(); - size_t size = t[1].cast(); + const int shared_fd = t[1].cast(); + const bool use_file_descriptor = t[6].cast(); + + size_t size = t[2].cast(); int flags = memory::allocation::MAPPED_SHAREDMEM | memory::allocation::MAPPED_NOCREATE; + if (use_file_descriptor) { + flags = flags | memory::allocation::MAPPED_KEEPFD | + memory::allocation::MAPPED_UNLINK; + } int find_id = -1; if (FLAGS_use_shm_cache) { find_id = memory::allocation::MemoryMapAllocationPool::Instance().FindFromCache(flags, size, ipc_name, /*check_refcount*/ false); // NOLINT } auto shared_holder = memory::allocation::AllocateRefcountedMemoryMapAllocation( - ipc_name, flags, size, find_id); + ipc_name, shared_fd, flags, size, find_id); // 3. Rebuild Tensor tensor.ResetHolderWithType( shared_holder, - static_cast(t[2].cast())); - tensor.Resize(common::make_ddim(t[3].cast>())); - tensor.set_lod(t[4].cast()); + static_cast(t[3].cast())); + tensor.Resize(common::make_ddim(t[4].cast>())); + tensor.set_lod(t[5].cast()); return tensor; }, diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index ba3a466fba219..c93588f73d6f3 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -31,11 +31,11 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/pybind/complex.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/strided_memcpy.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/cuda_device_guard.h" @@ -696,7 +696,7 @@ void _sliceCompute(const phi::DenseTensor *in, auto out_t = framework::EigenTensor::From( *out); - operators::EigenSlice, T, D>::Eval( + phi::funcs::EigenSlice, T, D>::Eval( eigen_place, out_t, in_t, offsets, extents); } diff --git a/paddle/fluid/pybind/uva_utils.h b/paddle/fluid/pybind/uva_utils.h index 7f29814bcecb5..4d46a2398056d 100644 --- a/paddle/fluid/pybind/uva_utils.h +++ b/paddle/fluid/pybind/uva_utils.h @@ -20,10 +20,10 @@ #undef copysign #endif -#include "paddle/fluid/operators/utils.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace pybind { diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 7325aef2202b5..ee4fcec12c257 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -62,8 +62,8 @@ if(WITH_XBYAK) list(APPEND PHI_DEPS xbyak) endif() -if(WITH_MKLDNN) - list(APPEND PHI_DEPS mkldnn) +if(WITH_ONEDNN) + list(APPEND PHI_DEPS onednn) endif() if(WITH_GLOO) @@ -119,8 +119,11 @@ if(WITH_AVX AND AVX512F_FLAG AND WITH_MKL) set_source_files_properties( + kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc kernels/fusion/cpu/self_dp_attention_kernel.cc - PROPERTIES COMPILE_FLAGS "-Wno-maybe-uninitialized -mfma ${AVX512F_FLAG}") + kernels/fusion/cpu/rms_norm_avx_kernel.cc + PROPERTIES COMPILE_FLAGS + "${Wno_Maybe_Uninitialized} ${FMA_FLAG} ${AVX512F_FLAG}") endif() if(WITH_GPU) diff --git a/paddle/phi/api/CMakeLists.txt b/paddle/phi/api/CMakeLists.txt index 1827dfbeb7f64..b06c40cf41a6e 100644 --- a/paddle/phi/api/CMakeLists.txt +++ b/paddle/phi/api/CMakeLists.txt @@ -1,2 +1,9 @@ add_subdirectory(profiler) add_subdirectory(lib) +if(WIN32) + file(GLOB YAML_FILE "${CMAKE_CURRENT_SOURCE_DIR}/yaml/*.yaml") + set_property( + DIRECTORY + APPEND + PROPERTY CMAKE_CONFIGURE_DEPENDS ${YAML_FILE}) +endif() diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index af4de2580f578..13077fb9167ad 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -26,7 +26,7 @@ namespace experimental { * and the higher backend bit has a higher priority. * * A Tensor may belong to multiple backends at the same time, such CPU and - * MKLDNN. Only one backend value cannot + * OneDNN. Only one backend value cannot */ class BackendSet final { public: diff --git a/paddle/phi/api/profiler/device_tracer.cc b/paddle/phi/api/profiler/device_tracer.cc index e1c009fa9cad0..085d28220a6a9 100644 --- a/paddle/phi/api/profiler/device_tracer.cc +++ b/paddle/phi/api/profiler/device_tracer.cc @@ -834,7 +834,7 @@ uint32_t GetCurSystemThreadId() { return id; } -void RecoreCurThreadId(uint64_t id) { +void RecordCurThreadId(uint64_t id) { std::lock_guard lock(system_thread_id_map_mutex); auto gid = GetCurSystemThreadId(); system_thread_id_map[gid] = id; diff --git a/paddle/phi/api/profiler/device_tracer.h b/paddle/phi/api/profiler/device_tracer.h index bde73357f2075..a0f4b5c54670e 100644 --- a/paddle/phi/api/profiler/device_tracer.h +++ b/paddle/phi/api/profiler/device_tracer.h @@ -162,5 +162,5 @@ void ClearCurBlock(); int BlockDepth(); // Set current thread id, so we can map the system thread id to thread id. -void RecoreCurThreadId(uint64_t id); +void RecordCurThreadId(uint64_t id); } // namespace phi diff --git a/paddle/phi/api/profiler/profiler_helper.h b/paddle/phi/api/profiler/profiler_helper.h index 31ccbbb12fb6f..16ae735fccc1e 100644 --- a/paddle/phi/api/profiler/profiler_helper.h +++ b/paddle/phi/api/profiler/profiler_helper.h @@ -73,7 +73,7 @@ inline EventList &GetEventList() { ProfilerHelper::g_thread_id = ProfilerHelper::g_next_thread_id++; ProfilerHelper::g_all_event_lists.emplace_front( ProfilerHelper::g_event_list); - RecoreCurThreadId(ProfilerHelper::g_thread_id); + RecordCurThreadId(ProfilerHelper::g_thread_id); } return *ProfilerHelper::g_event_list; } diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 25bd37ab01f87..3937464fbce49 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -13,6 +13,7 @@ kernel : func : abs_double_grad data_type : grad_x_grad + backward : abs_triple_grad - backward_op : abs_grad forward : abs (Tensor x) -> Tensor(out) @@ -27,6 +28,17 @@ composite : abs_grad(x, out_grad, x_grad) backward : abs_double_grad +- backward_op : abs_triple_grad + forward : abs_double_grad (Tensor x, Tensor grad_x_grad) -> Tensor(grad_out_grad) + args : (Tensor x, Tensor grad_out_grad_grad) + output : Tensor(grad_x_grad_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + data_transform : + support_trans_dtype : x + composite : abs_triple_grad(x, grad_out_grad_grad, grad_x_grad_grad) + - backward_op : acos_grad forward : acos (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) @@ -717,6 +729,16 @@ kernel : func : erfinv_grad +- backward_op : exp_double_grad + forward : exp_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor out, Tensor grad_out, Tensor grad_x_grad) + output : Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [out, out] + composite : exp_double_grad(out, grad_out, grad_x_grad, out_grad, grad_out_grad) + inplace : (grad_x_grad -> grad_out_grad) + - backward_op : exp_grad forward : exp (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) @@ -728,6 +750,7 @@ kernel : func : exp_grad inplace : (out_grad -> x_grad) + backward : exp_double_grad composite : exp_grad(out, out_grad, x_grad) - backward_op : expand_as_grad @@ -1434,6 +1457,7 @@ param : [x, x] kernel : func : log_double_grad + composite : log_double_grad(x, grad_out, grad_x_grad, x_grad, grad_out_grad) inplace : (grad_x_grad -> grad_out_grad) - backward_op : log_grad @@ -2010,6 +2034,7 @@ spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : rsqrt_grad + composite : rsqrt_grad(out, out_grad, x_grad) backward : rsqrt_double_grad inplace : (out_grad -> x_grad) @@ -2375,6 +2400,12 @@ inplace : (out_grad -> x_grad) backward: squeeze_double_grad +- backward_op : stack_double_grad + forward : stack_grad (Tensor[] x, Tensor grad_out, int axis=0) -> Tensor[](grad_x) + args : (Tensor[] grad_x_grad, int axis = 0) + output : Tensor(grad_out_grad) + invoke : stack(grad_x_grad, axis) + - backward_op : stack_grad forward : stack (Tensor[] x, int axis) -> Tensor(out) args : (Tensor[] x, Tensor out_grad, int axis) @@ -2389,6 +2420,7 @@ data_type : out_grad no_need_buffer : x composite : stack_grad(x, out_grad, axis, x_grad) + backward: stack_double_grad - backward_op : stanh_grad forward : stanh(Tensor x, float scale_a, float scale_b) -> Tensor(out) @@ -2727,6 +2759,9 @@ forward: silu_grad (Tensor x, Tensor out, Tensor grad_out) -> Tensor(grad_x) args: (Tensor x, Tensor out, Tensor grad_out, Tensor grad_x_grad) output: Tensor(x_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, x] composite: silu_double_grad(x, out, grad_out, grad_x_grad, x_grad, grad_out_grad) - backward_op: unpool3d_grad diff --git a/paddle/phi/api/yaml/fused_backward.yaml b/paddle/phi/api/yaml/fused_backward.yaml index 36c3c0dde5191..235864c4c9d8b 100644 --- a/paddle/phi/api/yaml/fused_backward.yaml +++ b/paddle/phi/api/yaml/fused_backward.yaml @@ -41,8 +41,8 @@ support_dygraph_mode : true - backward_op : fused_rotary_position_embedding_grad - forward: fused_rotary_position_embedding (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos, Tensor position_ids, bool use_neox_rotary_style, bool time_major) -> Tensor(out_q), Tensor(out_k), Tensor(out_v) - args : (Tensor sin, Tensor cos, Tensor position_ids, Tensor out_q_grad, Tensor out_k_grad,Tensor out_v_grad, bool use_neox_rotary_style, bool time_major) + forward: fused_rotary_position_embedding (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos, Tensor position_ids, bool use_neox_rotary_style, bool time_major, float rotary_emb_base) -> Tensor(out_q), Tensor(out_k), Tensor(out_v) + args : (Tensor sin, Tensor cos, Tensor position_ids, Tensor out_q_grad, Tensor out_k_grad,Tensor out_v_grad, bool use_neox_rotary_style, bool time_major, float rotary_emb_base) output : Tensor(q_grad), Tensor(k_grad), Tensor(v_grad) optional : sin, cos, position_ids, out_k_grad, out_v_grad, k_grad, v_grad infer_meta : diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index ff6969194f6d6..9b03721fb284b 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -1,4 +1,4 @@ -# This file is designed for fusion C++ farward operators, which manages the +# This file is designed for fusion C++ forward operators, which manages the # generated code for static mode and dynamic mode (when `support_dygraph_mode` is true). # "support_dygraph_mode" is an extra configuration item in this file, # if one operator have "support_dygraph_mode : true", it supports dygraph mode, @@ -83,6 +83,15 @@ data_type : x optional : bias, branch, branch_max ,x_max, scale_max, out_max_in +- op : cross_attention_xpu + args : (Tensor input_q, Tensor input_kv, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor mask, int head_num, int head_dim, float alpha, DataType out_dtype) + output : Tensor(qkv), Tensor(qkv_max) + infer_meta : + func : CrossAttentionXPUInferMeta + kernel : + func : cross_attention_xpu + data_type : input_q + - op : dequantize_xpu args : (Tensor x, DataType out_dtype, float scale = 1.0f) output : Tensor(y) @@ -186,6 +195,7 @@ func : fused_conv2d_add_act data_type : input optional : bias, residual_data, outputs + interfaces : paddle::dialect::LayoutTransformationInterface - op : fused_dconv_drelu_dbn args : (Tensor grad_output, Tensor weight, Tensor grad_output_add, Tensor residual_input, Tensor bn1_eqscale, Tensor bn1_eqbias, Tensor conv_input, Tensor bn1_mean, Tensor bn1_inv_std, Tensor bn1_gamma, Tensor bn1_beta, Tensor bn1_input, Tensor bn2_mean, Tensor bn2_inv_std, Tensor bn2_gamma, Tensor bn2_beta, Tensor bn2_input, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, str data_format, bool fuse_shortcut, bool fuse_dual, bool fuse_add, bool exhaustive_search) @@ -273,7 +283,7 @@ optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask, gather_index - op : fused_rotary_position_embedding - args : (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos, Tensor position_ids, bool use_neox_rotary_style = true, bool time_major = false) + args : (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos, Tensor position_ids, bool use_neox_rotary_style = true, bool time_major = false, float rotary_emb_base = 10000.0) output : Tensor(out_q), Tensor(out_k), Tensor(out_v) infer_meta : func : FusedRopeInferMeta @@ -375,6 +385,15 @@ func : generate_sequence_xpu data_type : dtype +- op : group_norm_silu_xpu + args : (Tensor x, Tensor scale, Tensor bias, int groups = -1, float epsilon = 1e-5) + output : Tensor(out) + infer_meta : + func : GroupNormalizeSiluXPUInferMeta + kernel : + func : group_norm_silu_xpu + data_type : x + - op : layer_norm_act_xpu args : (Tensor x, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon, int act_type, float act_param) output : Tensor(out) @@ -420,14 +439,14 @@ optional : bias_qk - op : qkv_attention_xpu - args : (Tensor q, Tensor k, Tensor v, Tensor q_max, Tensor k_max, Tensor v_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype) - output : Tensor(qkv), Tensor(qkv_max) + args : (Tensor q, Tensor k, Tensor v, Tensor q_max, Tensor k_max, Tensor v_max, Tensor qk_max, Tensor qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype) + output : Tensor(qkv) infer_meta : func : QKVAttentionXPUInferMeta kernel : func : qkv_attention_xpu data_type : q - optional : q_max, k_max, v_max + optional : q_max, k_max, v_max, qk_max, qkv_max - op : quantize_xpu args : (Tensor x, DataType out_dtype, float scale = 1.0f) @@ -474,6 +493,15 @@ func : skip_layernorm data_type : x +- op : spatial_transformer_resblock_xpu + args : (Tensor x, Tensor[] x_max, Tensor[] conv_bias, Tensor[] conv_filter, Tensor[] conv_filter_max, Tensor[] gn_bias, Tensor[] gn_scale, int[] dilations, int[] paddings, int[] strides, float[] gn_eps, int[] gn_groups, int[] groups, bool conv_fix, bool has_silu_fc_input, bool include_silu) + output : Tensor(out), Tensor(out_max) + infer_meta : + func : SpatialTransformerResblockXPUInferMeta + kernel : + func : spatial_transformer_resblock_xpu + data_type : x + - op : squeeze_excitation_block args : (Tensor x, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] act_type, float[] act_param, int[] filter_dims) output : Tensor(out) diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py index 3e144fa27d986..59eedd4a83de4 100644 --- a/paddle/phi/api/yaml/generator/api_gen.py +++ b/paddle/phi/api/yaml/generator/api_gen.py @@ -340,9 +340,7 @@ def gene_output( ) else: raise ValueError( - "{} : Output error: only support Tensor type when use view in yaml. But get {}".format( - self.api, out_dtype_list[i] - ) + f"{self.api} : Output error: only support Tensor type when use view in yaml. But get {out_dtype_list[i]}" ) else: raise ValueError( diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index e2f4cca95c923..b24b3a20c37eb 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -175,15 +175,15 @@ - backward_op : divide_double_grad forward : divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y) - args : (Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) + args : (Tensor y, Tensor out, Tensor grad_out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1) output : Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad) infer_meta : func : GeneralTernaryGradInferMeta - param : [y, grad_x, grad_x] + param : [y, out, out] kernel : func : divide_double_grad data_type : out - optional : grad_x_grad, grad_y_grad + optional : grad_x, grad_x_grad, grad_y_grad inplace : (grad_x_grad -> grad_out_grad) - backward_op : divide_grad @@ -411,6 +411,7 @@ param: [x] kernel : func : min_grad + composite : min_grad(x, out, out_grad, axis, keepdim, reduce_all, x_grad) - backward_op : minimum_grad forward : minimum(Tensor x, Tensor y) -> Tensor(out) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 142814e1cc01e..188367817803a 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -77,7 +77,6 @@ backend : place data_transform : support_trans_dtype : start, end, step - interfaces : paddle::dialect::InferSymbolicShapeInterface - op : assign args : (Tensor x) @@ -551,7 +550,7 @@ skip_transform : x - op : full_with_tensor - args : (Tensor shape, Tensor value, DataType dtype=DataType::FLOAT32) + args : (Tensor value, IntArray shape, DataType dtype=DataType::FLOAT32) output: Tensor(out) infer_meta : func : FullWithTensorInferMeta @@ -1099,7 +1098,6 @@ kernel : func : split backward : split_grad - interfaces : paddle::dialect::InferSymbolicShapeInterface - op : split_with_num args : (Tensor x, int num, Scalar(int) axis) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index ab6161e0b0765..56dad40de1353 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -15,7 +15,7 @@ # attrs : [bool is_test = false] - op : abs - backward : abs_grad + backward : abs_grad, abs_double_grad, abs_triple_grad inputs : x : X outputs : @@ -296,6 +296,12 @@ get_expected_kernel_type : assign : GetAssignExpectedKernelType +- op : assign_pos + inputs : + {x : X} + outputs : + out : Out + - op : assign_value outputs : out : Out @@ -818,6 +824,12 @@ outputs : out : Out +- op : dgc_momentum + inputs : + {param : Param, grad : Grad, velocity : Velocity, learning_rate : LearningRate, master_param : MasterParam, current_step_tensor : current_step, nranks_tensor : nranks} + outputs : + {param_out : ParamOut, velocity_out : VelocityOut, master_param_out : MasterParamOut, grad_out : Grad_out} + - op : diag (diag_v2) backward : diag_grad (diag_v2_grad) inputs : @@ -1019,7 +1031,7 @@ out : Out - op : exp - backward : exp_grad + backward : exp_grad, exp_double_grad inputs : x : X outputs : @@ -1267,6 +1279,12 @@ data_type : float support_tensor : true +- op : full_with_tensor + int_array: + shape : + data_type : int64_t + support_tensor : true + - op : fused_adam_(fused_adam) inputs : {params : Params, grads : Grads, learning_rate : LearningRate, moments1 : Moments1, @@ -3225,7 +3243,7 @@ outputs : [xshape] - op : stack - backward : stack_grad + backward : stack_grad, stack_double_grad inputs : x : X outputs : @@ -3705,6 +3723,12 @@ outputs: {param_out : ParamOut, moment_out : MomentOut} +- op: dgc + inputs: + {u: U, v: V, grad: Grad} + outputs: + {u_out: U_out, v_out: V_out, encode_grad: EncodeGrad, grad_out: Grad_out, gather_buff: GatherBuff} + - op: distribute_fpn_proposals inputs : {fpn_rois: FpnRois, rois_num: RoisNum} @@ -3713,6 +3737,12 @@ multi_level_rois_num: MultiLevelRoIsNum restore_index: RestoreIndex +- op: distributed_fused_lamb + inputs: + {param: Param, grad: Grad, fp32_fused_param: FP32FusedParam, fp32_fused_grad: FP32FusedGrad, fp16_fused_param: FP16FusedParam, fp16_fused_grad: FP16FusedGrad, moment1: Moment1, moment2: Moment2, beta1pow: Beta1Pow, beta2pow: Beta2Pow, fused_param_offsets: FusedParamOffsets, fp32_shard_fused_param_offsets: FP32ShardFusedParamOffsets, fp16_shard_fused_param_offsets: FP16ShardFusedParamOffsets, param_info: ParamInfo, param_order: ParamOrder, learning_rate: LearningRate, global_scale: GlobalScale} + outputs: + {param_out : ParamOut, fp32_fused_param_out: FP32FusedParamOut, fp16_fused_param_out: FP16FusedParamOut, fp32_acc_fused_grad: FP32AccFusedGrad, fp16_acc_fused_grad: FP16AccFusedGrad, moment1_out: Moment1Out, moment2_out: Moment2Out, beta1pow_out: Beta1PowOut, beta2pow_out: Beta2PowOut, found_inf: FoundInf, acc_step: AccStep, stop_update: StopUpdate, step: Step} + - op: distributed_fused_lamb_init inputs: {param: Param, grad: Grad} diff --git a/paddle/phi/api/yaml/op_version.yaml b/paddle/phi/api/yaml/op_version.yaml index 2bd09abd311ae..6e7a2cff79764 100644 --- a/paddle/phi/api/yaml/op_version.yaml +++ b/paddle/phi/api/yaml/op_version.yaml @@ -274,7 +274,7 @@ - op : generate_proposals version : - - checkpoint : Registe generate_proposals_v2 for adding the attribute of pixel_offset + - checkpoint : Register generate_proposals_v2 for adding the attribute of pixel_offset action : - add_attr : pixel_offset comment : If true, im_shape pixel offset is 1. diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index d6f4c6cddfb27..9830d7ae3a7a4 100755 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1,7 +1,7 @@ # This file is designed for C++ operators, which manages the # generated code for dynamic mode and static mode. If you want # to add the new operator configuration, make sure an operator's -# Python API, dynamic graph API, and static graph Opertaor parameters +# Python API, dynamic graph API, and static graph Operator parameters # are consistent and correspond one-to-one. It's forbidden that the # operator configured in this yaml file does not have Python API. @@ -327,6 +327,7 @@ backward : bicubic_interp_grad data_transform : skip_transform : out_size, size_tensor, scale_tensor + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : bilinear args : (Tensor x, Tensor y, Tensor weight, Tensor bias) @@ -350,6 +351,7 @@ backward : bilinear_interp_grad data_transform : skip_transform : out_size, size_tensor, scale_tensor + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : bincount args: (Tensor x, Tensor weights, Scalar(int) minlength = 0) @@ -1658,6 +1660,7 @@ backward : linear_interp_grad data_transform : skip_transform : out_size, size_tensor, scale_tensor + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : llm_int8_linear args : (Tensor x, Tensor weight, Tensor bias, Tensor weight_scale, float threshold=6.0) @@ -1976,6 +1979,7 @@ func : meshgrid data_type : inputs backward : meshgrid_grad + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : mode args : (Tensor x, int axis = -1, bool keepdim = false) @@ -2068,6 +2072,7 @@ backward : nearest_interp_grad data_transform : skip_transform : out_size, size_tensor, scale_tensor + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : nextafter args : (Tensor x, Tensor y) @@ -2314,6 +2319,7 @@ kernel : func : relu6 backward : relu6_grad + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : renorm args : (Tensor x, float p, int axis, float max_norm) @@ -2555,7 +2561,7 @@ kernel : func : shape {dense -> dense}, shape_sr {selected_rows -> dense} - data_transform: + data_transform : skip_transform : input interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -2616,7 +2622,7 @@ spmd_rule : ElementwiseUnaryInferSpmd kernel : func : sin - inplace: (x -> out) + inplace : (x -> out) backward : sin_grad interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -2777,10 +2783,10 @@ - op : swiglu args : (Tensor x, Tensor y) output : Tensor(out) - infer_meta: - func: SwiGLUInferMeta - spmd_rule: SwiGLUInferSpmd - kernel: + infer_meta : + func : SwiGLUInferMeta + spmd_rule : SwiGLUInferSpmd + kernel : func : swiglu optional : y backward: swiglu_grad @@ -2804,7 +2810,7 @@ func : UnchangedInferMeta kernel : func : tan - inplace: (x -> out) + inplace : (x -> out) backward : tan_grad interfaces : paddle::dialect::InferSymbolicShapeInterface @@ -2913,6 +2919,7 @@ backward : trilinear_interp_grad data_transform : skip_transform : out_size, size_tensor, scale_tensor + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : trunc args : (Tensor input) @@ -3052,9 +3059,9 @@ func : WarpctcInferMeta kernel : func : warpctc - data_type: logits - optional: logits_length, labels_length - intermediate: warpctcgrad + data_type : logits + optional : logits_length, labels_length + intermediate : warpctcgrad backward : warpctc_grad - op : warprnnt @@ -3064,8 +3071,8 @@ func : WarprnntInferMeta kernel : func : warprnnt - data_type: input - intermediate: warprnntgrad + data_type : input + intermediate : warprnntgrad backward : warprnnt_grad - op : weight_dequantize @@ -3085,8 +3092,8 @@ kernel : func : weight_only_linear data_type : x - optional: bias - backward: weight_only_linear_grad + optional : bias + backward : weight_only_linear_grad - op : weight_quantize args : (Tensor x, str algo = "weight_only_int8", int arch = 80, int group_size = -1) @@ -3095,7 +3102,8 @@ func : WeightQuantizeInferMeta kernel : func : weight_quantize - data_type: x + data_type : x + backend : x - op : weighted_sample_neighbors args : (Tensor row, Tensor colptr, Tensor edge_weight, Tensor input_nodes, Tensor eids, int sample_size, bool return_eids) @@ -3114,7 +3122,7 @@ spmd_rule: WhereInferSpmd kernel : func : where - inplace: (x -> out) + inplace : (x -> out) backward : where_grad interfaces : paddle::dialect::InferSymbolicShapeInterface diff --git a/paddle/phi/api/yaml/static_backward.yaml b/paddle/phi/api/yaml/static_backward.yaml index 526a7195a5bb3..d4ca3f05e7c0b 100755 --- a/paddle/phi/api/yaml/static_backward.yaml +++ b/paddle/phi/api/yaml/static_backward.yaml @@ -103,7 +103,7 @@ output : Tensor(weight_grad) infer_meta : func : EmbeddingGradInferMeta - param : [x,weght] + param : [x,weight] kernel : func : embedding_grad {dense, dense, dense -> dense} embedding_sparse_grad {dense, dense, dense -> selected_rows} diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 80d5f14e627a3..67690440f6bbb 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -31,7 +31,7 @@ if(WITH_XPU) list(APPEND BACKENDS_DEPS phi_dynload_xpti) endif() -if(WITH_MKLDNN) +if(WITH_ONEDNN) list(APPEND BACKENDS_SRCS onednn/onednn_context.cc) list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc) list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc) diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc index 0b056d6df972f..3e65845905646 100644 --- a/paddle/phi/backends/dynload/dynamic_loader.cc +++ b/paddle/phi/backends/dynload/dynamic_loader.cc @@ -103,13 +103,14 @@ static constexpr char* win_nvjpeg_lib = ".dll;nvjpeg64_" CUDA_VERSION_MAJOR ".dll;nvjpeg64_10.dll"; static constexpr char* win_cusolver_lib = "cusolver64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR - ".dll;cusolver64_" CUDA_VERSION_MAJOR ".dll;cusolver64_10.dll"; + ".dll;cusolver64_" CUDA_VERSION_MAJOR + ".dll;cusolver64_11.dll;cusolver64_10.dll"; static constexpr char* win_cusparse_lib = "cusparse64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR ".dll;cusparse64_" CUDA_VERSION_MAJOR ".dll;cusparse64_10.dll"; static constexpr char* win_cufft_lib = "cufft64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR - ".dll;cufft64_" CUDA_VERSION_MAJOR ".dll;cufft64_10.dll"; + ".dll;cufft64_" CUDA_VERSION_MAJOR ".dll;cufft64_11.dll;cufft64_10.dll"; #else static constexpr char* win_curand_lib = "curand64_" CUDA_VERSION_MAJOR CUDA_VERSION_MINOR diff --git a/paddle/phi/backends/onednn/onednn_context.cc b/paddle/phi/backends/onednn/onednn_context.cc index b7789f29740f0..1a27e83af50fb 100644 --- a/paddle/phi/backends/onednn/onednn_context.cc +++ b/paddle/phi/backends/onednn/onednn_context.cc @@ -189,7 +189,7 @@ struct OneDNNContext::Impl { std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current mkldnn session id. + // Find ShapeBlob for current onednn session id. auto map_it = pMap->find(sid); if (map_it == pMap->end()) { @@ -259,7 +259,7 @@ struct OneDNNContext::Impl { std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current mkldnn session id firstly + // Find ShapeBlob for current onednn session id firstly auto map_it = pMap->find(sid); // (jczaja): After first iteration of model's execution we // should have all elements cached (mostly) so failures are unlikely (less @@ -366,7 +366,7 @@ struct OneDNNContext::Impl { unsigned int block_next_cache_clearing_ = 0; // Holds some attributes only used by the onednn kernel calculation - // Since original mkldnn op kernel directly adds the operations that require + // Since original onednn op kernel directly adds the operations that require // fusion to the native kernel operations, and uses the attribute `fuse_xxx` // to control, for onednn, there will be some attributes that seem to be // independent of the device are also saved here. diff --git a/paddle/phi/backends/onednn/onednn_context.h b/paddle/phi/backends/onednn/onednn_context.h index 499be34650098..0e4654cb50a77 100644 --- a/paddle/phi/backends/onednn/onednn_context.h +++ b/paddle/phi/backends/onednn/onednn_context.h @@ -28,7 +28,7 @@ namespace phi { using TensorNameMap = std::map>; class OneDNNContextThreadLocals { - // default mkldnn session id + // default onednn session id typedef OneDNNContextThreadLocals self; struct Body { @@ -38,7 +38,7 @@ class OneDNNContextThreadLocals { // - For fixed-shape, it's a null string in default. // - For dynamic-shape, it's user specific. std::string cur_input_shape_str; - // the cache capacity of different input shapes for MKLDNN. + // the cache capacity of different input shapes for OneDNN. // Default 1 means fixed input shape, not dynamic shape. int cur_input_shape_cache_capacity; // Recently registered data_format. This is needed to @@ -73,9 +73,9 @@ class OneDNNContextThreadLocals { OneDNNContextThreadLocals(const OneDNNContextThreadLocals& c) = delete; public: - // default mkldnn session id + // default onednn session id static constexpr size_t kMKLDNNSessionID_Default = 0; - // mkldnn session id for cache clearing mode + // onednn session id for cache clearing mode static constexpr size_t kMKLDNNSessionID_CacheClearing = -1; TEST_API static Body& fetch(); }; @@ -89,7 +89,7 @@ class OneDNNContext : public CPUContext { template using umap_key_string_t = umap_value_smart_t; - // Following three maps are used to cache MKLDNN primitives. + // Following three maps are used to cache OneDNN primitives. // There relations are: // - BlobMap = Map // - ShapeBlob = Map diff --git a/paddle/phi/backends/xpu/xpu1_op_list.cc b/paddle/phi/backends/xpu/xpu1_op_list.cc index cef49d14c076f..58e5c5d72beab 100644 --- a/paddle/phi/backends/xpu/xpu1_op_list.cc +++ b/paddle/phi/backends/xpu/xpu1_op_list.cc @@ -154,13 +154,13 @@ XPUOpMap& get_kl1_ops() { XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_switch_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_switch", XPUKernelSet({phi::DataType::FLOAT32})}, {"index_select", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32, phi::DataType::INT64})}, - {"iou_similarity", XPUKernelSet({phi::DataType::FLOAT32})}, {"lamb", XPUKernelSet({phi::DataType::FLOAT32})}, {"layer_norm_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"layer_norm", XPUKernelSet({phi::DataType::FLOAT32})}, diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 07972469a32b1..9698544b3738f 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -549,6 +549,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::FLOAT32})}, {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"grid_sampler", XPUKernelSet({phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"hard_sigmoid_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, @@ -586,7 +588,6 @@ XPUOpMap& get_kl2_ops() { {"instance_norm_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"inverse", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT64})}, - {"iou_similarity", XPUKernelSet({phi::DataType::FLOAT32})}, {"label_smooth", XPUKernelSet({phi::DataType::FLOAT32})}, {"lamb", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"lars_momentum", @@ -836,8 +837,6 @@ XPUOpMap& get_kl2_ops() { phi::DataType::FLOAT16, phi::DataType::INT32, phi::DataType::INT64})}, - {"sampling_id", - XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT64})}, {"set_value", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32, @@ -914,6 +913,8 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT16, phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"spatial_transformer_resblock_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"split", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, @@ -1210,7 +1211,9 @@ XPUOpMap& get_kl2_ops() { {"fused_feedforward_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"qkv_attention_xpu", - XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::INT8})}, {"lod_reset", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16, @@ -1225,6 +1228,8 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"roformer_relative_embedding_xpu", XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, + {"cross_attention_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"variable_length_memory_efficient_attention", XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"flash_attn_unpadded", diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index 48dc5d8334193..779f35a483bc7 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -125,6 +125,7 @@ XPUOpMap& get_kl3_ops() { phi::DataType::INT64})}, {"c_concat", XPUKernelSet({phi::DataType::FLOAT16, + phi::DataType::BFLOAT16, phi::DataType::FLOAT32, phi::DataType::INT32, phi::DataType::INT64})}, @@ -523,6 +524,8 @@ XPUOpMap& get_kl3_ops() { phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, + {"group_norm_silu_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"hard_sigmoid_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, @@ -807,8 +810,6 @@ XPUOpMap& get_kl3_ops() { phi::DataType::FLOAT16, phi::DataType::INT32, phi::DataType::INT64})}, - {"sampling_id", - XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT64})}, {"set_value", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32, diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h index f28dd7e1c6ef1..e7a07b7a3e525 100644 --- a/paddle/phi/common/data_type.h +++ b/paddle/phi/common/data_type.h @@ -253,6 +253,46 @@ inline std::string DataTypeToString(const DataType& dtype) { } } +inline DataType StringToDataType(const std::string& dtype) { + if (dtype == "Undefined(ALL_DTYPE)") { + return DataType::UNDEFINED; + } else if (dtype == "bool") { + return DataType::BOOL; + } else if (dtype == "int8") { + return DataType::INT8; + } else if (dtype == "uint8") { + return DataType::UINT8; + } else if (dtype == "int16") { + return DataType::INT16; + } else if (dtype == "uint16") { + return DataType::UINT16; + } else if (dtype == "int32") { + return DataType::INT32; + } else if (dtype == "uint32") { + return DataType::UINT32; + } else if (dtype == "int64") { + return DataType::INT64; + } else if (dtype == "uint64") { + return DataType::UINT64; + } else if (dtype == "bfloat16") { + return DataType::BFLOAT16; + } else if (dtype == "float16") { + return DataType::FLOAT16; + } else if (dtype == "float32") { + return DataType::FLOAT32; + } else if (dtype == "float64") { + return DataType::FLOAT64; + } else if (dtype == "complex64") { + return DataType::COMPLEX64; + } else if (dtype == "complex128") { + return DataType::COMPLEX128; + } else if (dtype == "pstring") { + return DataType::PSTRING; + } else { + PD_THROW("Invalid enum data type `", dtype, "`."); + } +} + } // namespace phi namespace paddle { diff --git a/paddle/phi/config.h.in b/paddle/phi/config.h.in index cb3d7eadc7f04..38cac639437b7 100644 --- a/paddle/phi/config.h.in +++ b/paddle/phi/config.h.in @@ -12,8 +12,8 @@ #define ON 1 #define OFF 0 -// WITH_MKLDNN -#if @WITH_MKLDNN@ +// WITH_ONEDNN +#if @WITH_ONEDNN@ #undef PADDLE_WITH_DNNL #define PADDLE_WITH_DNNL #endif diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index b78cec1483272..c2d804199d2c7 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -203,7 +203,7 @@ class TEST_API DenseTensor : public TensorBase, * * 1. Some hardware or third-party libraries add some additional storage * properties on top of the description of the basic DenseTensor, such as - * memory desc of MKLDNN, storage_format and storage_layout of NPU, + * memory desc of OneDNN, storage_format and storage_layout of NPU, * these members are necessary for optimal performance, but if the properties * of each device are added to the DenseTensor with different macro isolation, * the memory layout of the DenseTensor will become more fragmented. diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index f493e0249d7bf..613ba5f1f7f1f 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -75,7 +75,7 @@ struct TEST_API DenseTensorMeta { bool is_scalar{false}; /// \brief Determine whether using gpudnn speed-up library in the new dygraph. - /// It maybe also support MKLDNN library in the near future. + /// It maybe also support OneDNN library in the near future. bool use_gpudnn{true}; DDim dims; DataType dtype{DataType::UNDEFINED}; diff --git a/paddle/phi/core/visit_type.h b/paddle/phi/core/visit_type.h index ad30da4ddcd6f..03da054450092 100644 --- a/paddle/phi/core/visit_type.h +++ b/paddle/phi/core/visit_type.h @@ -355,7 +355,7 @@ namespace phi { "`"); \ } \ }() -#if defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_HIP) +#if defined(PADDLE_WITH_XPU) #define PD_VISIT_ALL_TYPES(TYPE, NAME, ...) \ [&] { \ const auto& __dtype__ = TYPE; \ diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index 9ba70ce824b39..261b99512a0ff 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -1405,6 +1405,7 @@ void FusedRopeGradInferMeta(const MetaTensor& sin, const MetaTensor& dout_v, bool use_neox_rotary_style, bool time_major, + float rotary_emb_base, MetaTensor* dq, MetaTensor* dk, MetaTensor* dv) { diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index 278b4ba970ff1..88aea8f18181b 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -210,6 +210,7 @@ void FusedRopeGradInferMeta(const MetaTensor& sin, const MetaTensor& dout_v, bool use_neox_rotary_style, bool time_major, + float rotary_emb_base, MetaTensor* dq, MetaTensor* dk, MetaTensor* dv); diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 97edce9ad7953..63d1d1c9b32d0 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -1532,7 +1532,7 @@ void ExpandAsInferMeta(const MetaTensor& x, const MetaTensor& y, const std::vector& target_shape, MetaTensor* out) { -#define MAX_RANK_SUPPORTED 6 +#define MAX_RANK_SUPPORTED 8 auto x_dims = x.dims(); PADDLE_ENFORCE_GE( target_shape.size(), diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index b56e7fab0bfe6..e8eb740e453ff 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -116,6 +116,20 @@ void AddLayernormXPUInferMeta(const MetaTensor& x, out->share_lod(x); } +void GroupNormalizeSiluXPUInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + int groups, + float epsilon, + MetaTensor* out) { + auto x_dims = x.dims(); + auto out_dims = x_dims; + out->set_dims(out_dims); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + out->share_lod(x); +} + void FusedMultiTransformerInferMeta( const MetaTensor& x, const std::vector& ln_scales, @@ -568,6 +582,36 @@ void Conv2dXPUInferMeta(const MetaTensor& x, out->set_dtype(out_dtype); } +void SpatialTransformerResblockXPUInferMeta( + const MetaTensor& x, + const std::vector& x_max, + const std::vector& conv_bias, + const std::vector& conv_filter, + const std::vector& conv_filter_max, + const std::vector& gn_bias, + const std::vector& gn_scale, + const std::vector& dilations, + const std::vector& paddings, + const std::vector& strides, + const std::vector& gn_eps, + const std::vector& gn_groups, + const std::vector& groups, + bool conv_fix, + bool has_silu_fc_input, + bool include_silu, + MetaTensor* out, + MetaTensor* out_max) { + auto input_shape = x.dims(); + auto batch_size = input_shape[0]; + auto channel_out = conv_filter[0]->dims()[0]; + auto h = input_shape[2]; + auto w = input_shape[3]; + out->set_dims(common::make_ddim({batch_size, channel_out, h, w})); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); + out->share_lod(x); +} + void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, const std::vector& tables, @@ -3032,7 +3076,7 @@ void FusedConv2dAddActInferMeta(const MetaTensor& input, MetaTensor* output, std::vector outputs, MetaConfig config) { - // TODO(liuyuanle): mkldnn seems only support nchw. + // TODO(liuyuanle): onednn seems only support nchw. const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); std::vector out_shape = ComputeOutputShape(input, filter, @@ -3731,13 +3775,14 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, const MetaTensor& q_max, const MetaTensor& k_max, const MetaTensor& v_max, + const MetaTensor& qk_max, + const MetaTensor& qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype, - MetaTensor* qkv, - MetaTensor* qkv_max) { + MetaTensor* qkv) { auto q_dims = q.dims(); auto k_dims = k.dims(); auto v_dims = v.dims(); @@ -3781,9 +3826,6 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, qkv->set_dims(phi::make_ddim({q_dims[0], q_dims[1], head_num * head_dim})); qkv->set_dtype(out_dtype); qkv->set_layout(q.layout()); - qkv_max->set_dims(phi::make_ddim({6})); - qkv_max->set_dtype(out_dtype); - qkv_max->set_layout(q.layout()); } void SinePosXPUInferMeta(const MetaTensor& x, const MetaTensor& y, @@ -3816,6 +3858,95 @@ void SinePosXPUInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void CrossAttentionXPUInferMeta( + const MetaTensor& input_q, + const MetaTensor& input_kv, + const std::vector& fc_weight, + const std::vector& fc_weight_max, + const std::vector& fc_bias, + const MetaTensor& mask, + int head_num, + int head_dim, + float alpha, + DataType out_dtype, + MetaTensor* qkv, + MetaTensor* qkv_max) { + auto input_q_dims = input_q.dims(); + auto input_kv_dims = input_kv.dims(); + auto mask_dims = mask.dims(); + // input shape : {B, L, H*D} + PADDLE_ENFORCE_EQ(input_q_dims.size(), + 3, + phi::errors::InvalidArgument( + "The dim of input_q should be 3! But received ", + input_q_dims.size())); + PADDLE_ENFORCE_EQ(input_kv_dims.size(), + 3, + phi::errors::InvalidArgument( + "The dim of input_kv should be 3! But received ", + input_kv_dims.size())); + // sequece length of q and k/v not requied to be eqaul + // but batch size and dim should be the same + PADDLE_ENFORCE_EQ( + input_q_dims[0], + input_kv_dims[0], + phi::errors::InvalidArgument("The batch size of input_q and input_kv " + "should be the same! Received ", + input_q_dims[0], + " vs ", + input_kv_dims[0])); + PADDLE_ENFORCE_EQ( + input_q_dims[2], + input_kv_dims[2], + phi::errors::InvalidArgument("The hidden_dim of input_q and input_kv " + "should be the same! Received ", + input_q_dims[2], + " vs ", + input_kv_dims[2])); + int hidden_dim = head_num * head_dim; + PADDLE_ENFORCE_EQ( + input_q_dims[2], + hidden_dim, + phi::errors::InvalidArgument( + "The last dimension of input_q should be [H*D]! Received ", + input_q_dims[2], + " != expected ", + hidden_dim)); + PADDLE_ENFORCE_EQ(fc_weight.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_weight should be 3! But received ", + fc_weight.size())); + PADDLE_ENFORCE_EQ(fc_weight_max.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_weight_max should be 3! But received ", + fc_weight_max.size())); + PADDLE_ENFORCE_EQ( + fc_bias.size(), + 3, + phi::errors::InvalidArgument( + "The size of fc_bias should be 3! But received ", fc_bias.size())); + PADDLE_ENFORCE_LE( + mask_dims.size(), + 4, + phi::errors::InvalidArgument( + "The dim of mask should be not greater than 4!", mask_dims.size())); + + // output shape: {B, qL, H*D} + qkv->set_dims( + phi::make_ddim({input_q_dims[0], input_q_dims[1], head_num * head_dim})); + qkv->set_dtype(out_dtype); + qkv->set_layout(input_q.layout()); + // TODO(Terry) optmize the max value num + // unable to pass few PR-CIs, so just use a constant value + // int xpu2_max_value_num = phi::backends::xpu::get_xpu_max_ptr_size(-1); + const int xpu2_max_value_num = 6; + qkv_max->set_dims(phi::make_ddim({xpu2_max_value_num})); + qkv_max->set_dtype(out_dtype); + qkv_max->set_layout(input_q.layout()); +} + void MultiGruInferMeta( const MetaTensor& x, const std::vector& weight_x, diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index 0a7224e39f73b..632a656414b4f 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -70,6 +70,13 @@ void AddLayernormXPUInferMeta(const MetaTensor& x, float epsilon, MetaTensor* out); +void GroupNormalizeSiluXPUInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + int groups, + float epsilon, + MetaTensor* out); + void BlockMultiheadAttentionInferMeta(const MetaTensor& qkv, const MetaTensor& key_cache, const MetaTensor& value_cache, @@ -145,6 +152,26 @@ void Conv2dXPUInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* out_max); +void SpatialTransformerResblockXPUInferMeta( + const MetaTensor& x, + const std::vector& x_max, + const std::vector& conv_bias, + const std::vector& conv_filter, + const std::vector& conv_filter_max, + const std::vector& gn_bias, + const std::vector& gn_scale, + const std::vector& dilations, + const std::vector& paddings, + const std::vector& strides, + const std::vector& gn_eps, + const std::vector& gn_groups, + const std::vector& groups, + bool conv_fix, + bool has_silu_fc_input, + bool include_silu, + MetaTensor* out, + MetaTensor* out_max); + void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, const std::vector& tables, @@ -862,13 +889,14 @@ void QKVAttentionXPUInferMeta(const MetaTensor& q, const MetaTensor& q_max, const MetaTensor& k_max, const MetaTensor& v_max, + const MetaTensor& qk_max, + const MetaTensor& qkv_max, float alpha, int head_num, int head_dim, bool qkv_fc_fusion, DataType out_dtype, - MetaTensor* qkv, - MetaTensor* qkv_max); + MetaTensor* qkv); void SinePosXPUInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); @@ -877,6 +905,19 @@ void RoformerRelativePosXPUInferMeta(const MetaTensor& x, const MetaTensor& cos_emb, int max_pos_len, MetaTensor* out); +void CrossAttentionXPUInferMeta( + const MetaTensor& input_q, + const MetaTensor& input_kv, + const std::vector& fc_weight, + const std::vector& fc_weight_max, + const std::vector& fc_bias, + const MetaTensor& mask, + int head_num, + int head_dim, + float alpha, + DataType out_dtype, + MetaTensor* qkv, + MetaTensor* qkv_max); void MultiGruInferMeta( const MetaTensor& x, diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a71f0b37437ab..ceebbdb5b2d74 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -4273,6 +4273,15 @@ void WeightOnlyLinearInferMeta(const MetaTensor& x, "But received Input(X) dim[-1](%s) != Input(Weight) dim[1](%s)", x_dims[x_dims.size() - 1], w_dims[1])); + if (bias.initialized()) { + auto bias_dims = bias.dims(); + PADDLE_ENFORCE_EQ( + bias_dims.size(), + 1UL, + errors::InvalidArgument( + "The size of Input(Bias)'s dimension should equal to 1UL.", + bias_dims.size())); + } // per-channel dequantization if (group_size == -1) { @@ -4554,6 +4563,7 @@ void FusedRopeInferMeta(const MetaTensor& q, const MetaTensor& position_ids, bool use_neox_rotary_style, bool time_major, + float rotary_emb_base, MetaTensor* out_q, MetaTensor* out_k, MetaTensor* out_v) { @@ -4911,10 +4921,10 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x, } } -void FullWithTensorInferMeta(const MetaTensor& shape, +void FullWithTensorInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out) { - out->set_dims(common::make_ddim(std::vector(shape.numel(), -1))); + out->set_dims(common::make_ddim(shape.GetData())); out->set_dtype(dtype); } diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 3722a0d5844ba..8d6a366fdbb24 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -904,6 +904,7 @@ void FusedRopeInferMeta(const MetaTensor& q, const MetaTensor& position_ids, bool use_neox_rotary_style, bool time_major, + float rotary_emb_base, MetaTensor* out_q, MetaTensor* out_k, MetaTensor* out_v); @@ -951,7 +952,7 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x, MetaTensor* cache_kv_out, MetaTensor* beam_cache_offset_out); -void FullWithTensorInferMeta(const MetaTensor& shape, +void FullWithTensorInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out); diff --git a/paddle/phi/infermeta/spmd_rules/fused_rope.cc b/paddle/phi/infermeta/spmd_rules/fused_rope.cc index e58b987fb3499..9b2be06066799 100644 --- a/paddle/phi/infermeta/spmd_rules/fused_rope.cc +++ b/paddle/phi/infermeta/spmd_rules/fused_rope.cc @@ -241,7 +241,8 @@ SpmdInfo FusedRopeInferSpmd(const DistMetaTensor& q, const DistMetaTensor& cos, const DistMetaTensor& position_ids, bool use_neox_rotary_style, - bool time_major) { + bool time_major, + float rotary_emb_base) { check_q(q); std::vector>> @@ -392,7 +393,8 @@ SpmdInfo FusedRopeInferSpmdReverse(const DistMetaTensor& q, const DistMetaTensor& out_k, const DistMetaTensor& out_v, bool use_neox_rotary_style, - bool time_major) { + bool time_major, + float rotary_emb_base) { check_q(out_q); std::vector>> outputs_sharding_info; @@ -548,7 +550,8 @@ SpmdInfo FusedRopeGradInferSpmd(const DistMetaTensor& sin, const DistMetaTensor& out_k_grad, const DistMetaTensor& out_v_grad, bool use_neox_rotary_style, - bool time_major) { + bool time_major, + float rotary_emb_base) { // NOTE(zhonghui): The forward and backward kernels of fuse rope are same, so // the spmd rules can be shared. SpmdInfo spmd_info = FusedRopeInferSpmd(out_q_grad, diff --git a/paddle/phi/infermeta/spmd_rules/fused_rope.h b/paddle/phi/infermeta/spmd_rules/fused_rope.h index 3a5c331098ad1..63eba399a3bbf 100644 --- a/paddle/phi/infermeta/spmd_rules/fused_rope.h +++ b/paddle/phi/infermeta/spmd_rules/fused_rope.h @@ -30,7 +30,8 @@ SpmdInfo FusedRopeInferSpmd(const DistMetaTensor& q, const DistMetaTensor& cos, const DistMetaTensor& position_ids, bool use_neox_rotary_style = true, - bool time_major = false); + bool time_major = false, + float rotary_emb_base = 10000.f); SpmdInfo FusedRopeInferSpmdReverse(const DistMetaTensor& q, const DistMetaTensor& k, @@ -42,7 +43,8 @@ SpmdInfo FusedRopeInferSpmdReverse(const DistMetaTensor& q, const DistMetaTensor& out_k, const DistMetaTensor& out_v, bool use_neox_rotary_style = true, - bool time_major = false); + bool time_major = false, + float rotary_emb_base = 10000.f); SpmdInfo FusedRopeGradInferSpmd(const DistMetaTensor& sin, const DistMetaTensor& cos, @@ -51,7 +53,8 @@ SpmdInfo FusedRopeGradInferSpmd(const DistMetaTensor& sin, const DistMetaTensor& out_k_grad, const DistMetaTensor& out_v_grad, bool use_neox_rotary_style = true, - bool time_major = false); + bool time_major = false, + float rotary_emb_base = 10000.f); } // namespace distributed } // namespace phi diff --git a/paddle/phi/infermeta/spmd_rules/rules.cc b/paddle/phi/infermeta/spmd_rules/rules.cc index 9c6492ee75913..d74beb98de74e 100644 --- a/paddle/phi/infermeta/spmd_rules/rules.cc +++ b/paddle/phi/infermeta/spmd_rules/rules.cc @@ -502,6 +502,16 @@ PD_REGISTER_SPMD_RULE( PD_INFER_SPMD(phi::distributed::LayerNormInferSpmd), PD_INFER_SPMD(phi::distributed::LayerNormInferSpmdReverse)); +// fused_rms_norm +// NOTE(ZHIQIU): Temporally register fused_rms_norm rule, +// this is not for rms_norm kernel, but for the custom kernel +// 'fused_rms_norm' in PaddleNLP. +// It will be no longer needed when the PIR-AutoParallel project +// is finished. +PD_REGISTER_SPMD_RULE(fused_rms_norm, + PD_INFER_SPMD(phi::distributed::RmsNormInferSpmd), + PD_INFER_SPMD(phi::distributed::RmsNormInferSpmdReverse)); + PD_REGISTER_SPMD_RULE( flash_attention, PD_INFER_SPMD(phi::distributed::FlashAttInferSpmdStatic), diff --git a/paddle/phi/infermeta/spmd_rules/swiglu.cc b/paddle/phi/infermeta/spmd_rules/swiglu.cc index 924a80c2e39a0..040b8100d8042 100644 --- a/paddle/phi/infermeta/spmd_rules/swiglu.cc +++ b/paddle/phi/infermeta/spmd_rules/swiglu.cc @@ -27,8 +27,14 @@ namespace distributed { SpmdInfo SwiGLUInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y) { // y.dist_attr() is empty means y is None if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryInferSpmd(x); + return {{res.first[0], y.dist_attr()}, {res.second[0]}}; } else { return ElementwiseBinaryInferSpmd(x, y); } @@ -38,8 +44,14 @@ SpmdInfo SwiGLUInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out) { if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryInferSpmdReverse(x, out); + return {{res.first[0], y.dist_attr()}, {res.second[0]}}; } else { return ElementwiseBinaryInferSpmdReverse(x, y, out); } @@ -49,8 +61,15 @@ SpmdInfo SwiGLUGradInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out_grad) { if (y.dist_attr() == TensorDistAttr()) { - PADDLE_THROW( - phi::errors::Unimplemented("The input y is not allowed to be None")); + auto x_dims_mapping = x.dist_attr().dims_mapping(); + if (x_dims_mapping.back() != -1) { + PADDLE_THROW( + phi::errors::Unimplemented("The input y is none and input x's last " + "dim is sharded is not supported")); + } + auto res = ElementwiseUnaryGradInferSpmd(x, out_grad); + return {{res.first[0], y.dist_attr(), res.first[1]}, + {res.second[0], y.dist_attr()}}; } else { return ElementwiseBinaryGradInferSpmd(x, y, out_grad); } diff --git a/paddle/phi/infermeta/spmd_rules/tile.cc b/paddle/phi/infermeta/spmd_rules/tile.cc index 76eb0dd95f632..e6d98a1b28303 100644 --- a/paddle/phi/infermeta/spmd_rules/tile.cc +++ b/paddle/phi/infermeta/spmd_rules/tile.cc @@ -151,7 +151,7 @@ SpmdInfo TileInferSpmdReverse(const DistMetaTensor& x, auto x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src); x_dist_attr_dst.set_dims_mapping(x_dims_mapping_dst); - VLOG(4) << "TriuInferSpmdReverse:"; + VLOG(4) << "TileInferSpmdReverse:"; VLOG(4) << "out shape: [" << str_join(out_shape) << "]" << "src_dims_mapping: [" << str_join(out_dist_attr_src.dims_mapping()) diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index f10a86b33836a..beba7457039cc 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -146,6 +146,25 @@ void AddmmInferMeta(const MetaTensor& input, out->set_dtype(input.dtype()); } +void AssignPosInferMeta(const MetaTensor& x, + const MetaTensor& cum_count, + const MetaTensor& eff_num_len, + MetaTensor* out) { + phi::DataType X_dtype = x.dtype(); + phi::DataType cum_count_dtype = cum_count.dtype(); + + PADDLE_ENFORCE_EQ(cum_count_dtype, + X_dtype, + phi::errors::InvalidArgument( + "The dtype of the cum_count and X should be same")); + PADDLE_ENFORCE_EQ(cum_count_dtype, + phi::DataType::INT64, + phi::errors::InvalidArgument( + "The dtype of the cum_count_dtype, eff_num_len and " + "X should be same as int64")); + out->set_dtype(X_dtype); +} + void BatchFCInferMeta(const MetaTensor& input, const MetaTensor& w, const MetaTensor& bias, @@ -1429,12 +1448,19 @@ void ScatterNdAddInferMeta(const MetaTensor& x, // update.shape = index.shape[:-1] + output.shape[index.shape[-1]:] std::vector r_updates_dims; + bool without_dynamic_shape = true; for (int i = 0; i < index_dims_size - 1; ++i) { + if (index_dims[i] == -1) { + without_dynamic_shape = false; + } r_updates_dims.emplace_back(index_dims[i]); } for (int i = static_cast(index_dims[index_dims_size - 1]); i < ref_dims_size; ++i) { + if (ref_dims[i] == -1) { + without_dynamic_shape = false; + } r_updates_dims.emplace_back(ref_dims[i]); } // check for non-0d updates @@ -1442,25 +1468,27 @@ void ScatterNdAddInferMeta(const MetaTensor& x, r_updates_dims.size(), updates_dims_size, phi::errors::InvalidArgument( - "Updates has wrong shape. The shape of Updates and Input(Updates) " + "Updates has wrong shape. The shape of Updates and " + "Input(Updates) " "should be same, but received the shape of Updates is %d, " "the shape of Input(Updates) is %d.", r_updates_dims.size(), updates_dims_size)); - - for (int64_t i = 0; i < updates_dims_size; ++i) { - PADDLE_ENFORCE_EQ( - r_updates_dims[i], - updates_dims[i], - phi::errors::InvalidArgument( - "Updates has wrong shape. The dimensions of Updates and " - "Input(Updates) should match, but received Updates's" - "%d-th dimension is %d, Input(Updates)'s %d-th " - "dimension is %d.", - i, - r_updates_dims[i], - i, - updates_dims[i])); + if (without_dynamic_shape) { + for (int64_t i = 0; i < updates_dims_size; ++i) { + PADDLE_ENFORCE_EQ( + r_updates_dims[i], + updates_dims[i], + phi::errors::InvalidArgument( + "Updates has wrong shape. The dimensions of Updates and " + "Input(Updates) should match, but received Updates's" + "%d-th dimension is %d, Input(Updates)'s %d-th " + "dimension is %d.", + i, + r_updates_dims[i], + i, + updates_dims[i])); + } } } out->set_dims(ref_dims); diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h index c1c1af6f08218..c7c31e767f40f 100644 --- a/paddle/phi/infermeta/ternary.h +++ b/paddle/phi/infermeta/ternary.h @@ -53,6 +53,11 @@ void ArangeTensorInferMeta(const MetaTensor& start, const MetaTensor& step, MetaTensor* out); +void AssignPosInferMeta(const MetaTensor& x, + const MetaTensor& cum_count, + const MetaTensor& eff_num_len, + MetaTensor* out); + void BatchFCInferMeta(const MetaTensor& input, const MetaTensor& w, const MetaTensor& bias, diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 74d04da5de8f2..a152bc152ae6b 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -1219,7 +1219,7 @@ void EinsumRawInferMeta(const std::vector& inputs, void ExpandInferMeta(const MetaTensor& x, const IntArray& shape, MetaTensor* out) { -#define MAX_RANK_SUPPORTED 6 +#define EXPAND_MAX_RANK_SUPPORTED 8 auto x_dims = x.dims(); auto expand_shape = shape.GetData(); @@ -1238,11 +1238,11 @@ void ExpandInferMeta(const MetaTensor& x, static_cast(x_dims.size()))); PADDLE_ENFORCE_LE( expand_shape.size(), - MAX_RANK_SUPPORTED, + EXPAND_MAX_RANK_SUPPORTED, phi::errors::InvalidArgument("The number of elements (%d) of 'shape' for " "must not be greater than %d.", expand_shape.size(), - MAX_RANK_SUPPORTED)); + EXPAND_MAX_RANK_SUPPORTED)); PADDLE_ENFORCE_GE( expand_shape.size(), 0, @@ -1283,6 +1283,7 @@ void ExpandInferMeta(const MetaTensor& x, if (out_rank > 0 && out_shape[0] == x_dims[0]) { out->share_lod(x); } +#undef EXPAND_MAX_RANK_SUPPORTED } void FillAnyLikeInferMeta(const MetaTensor& x, @@ -4722,7 +4723,7 @@ void TileInferMeta(const MetaTensor& x, const IntArray& repeat_times, MetaTensor* out, MetaConfig config) { -#define MAX_RANK_SUPPORTED 6 +#define TILE_MAX_RANK_SUPPORTED 6 auto repeat_times_data = repeat_times.GetData(); auto x_dims = x.dims(); @@ -4732,19 +4733,19 @@ void TileInferMeta(const MetaTensor& x, PADDLE_ENFORCE_LE( x_dims.size(), - MAX_RANK_SUPPORTED, + TILE_MAX_RANK_SUPPORTED, errors::InvalidArgument( "The rank of the input 'x' for tile op " "must not be greater than %d, but the value received is %d.", - MAX_RANK_SUPPORTED, + TILE_MAX_RANK_SUPPORTED, x_dims.size())); PADDLE_ENFORCE_LE( repeat_times_data.size(), - MAX_RANK_SUPPORTED, + TILE_MAX_RANK_SUPPORTED, errors::InvalidArgument( "The size of the shape of input 'repeat_times' for tile op " "must not be greater than %d, but the value received is %d.", - MAX_RANK_SUPPORTED, + TILE_MAX_RANK_SUPPORTED, repeat_times_data.size())); PADDLE_ENFORCE_GE( repeat_times_data.size(), @@ -4785,6 +4786,7 @@ void TileInferMeta(const MetaTensor& x, out->share_lod(x); } out->set_dtype(x.dtype()); +#undef TILE_MAX_RANK_SUPPORTED } void TopKInferMeta(const MetaTensor& x, diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 304fd3cef793a..31de8c3e244be 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -209,11 +209,9 @@ if(WITH_ROCM) "gpu/lu_kernel.cu" "gpu/matrix_rank_kernel.cu" "gpu/matrix_rank_tol_kernel.cu" - "gpu/multiclass_nms3_kernel.cu" "gpu/put_along_axis_grad_kernel.cu" "gpu/put_along_axis_kernel.cu" "gpu/qr_kernel.cu" - "gpu/rms_norm_grad_kernel.cu" "gpu/svd_kernel.cu" "gpudnn/mha_cudnn_frontend.cu" "fusion/gpu/block_multi_head_attention_kernel.cu" @@ -239,7 +237,7 @@ set(cc_search_pattern "stride/*.cc" "fusion/cpu/*.cc") -if(WITH_MKLDNN) +if(WITH_ONEDNN) set(cc_search_pattern ${cc_search_pattern} "legacy/onednn/*.cc" "onednn/*.cc" "fusion/onednn/*.cc") endif() @@ -262,7 +260,9 @@ if(NOT AND AVX512F_FOUND AND AVX512F_FLAG AND WITH_MKL)) + list(REMOVE_ITEM kernel_cc "fusion/cpu/fused_layer_norm_avx_kernel.cc") list(REMOVE_ITEM kernel_cc "fusion/cpu/self_dp_attention_kernel.cc") + list(REMOVE_ITEM kernel_cc "fusion/cpu/rms_norm_avx_kernel.cc") endif() file( diff --git a/paddle/phi/kernels/cpu/cross_grad_kernel.cc b/paddle/phi/kernels/cpu/cross_grad_kernel.cc index 882c3dd9ee512..4c41107ba0199 100644 --- a/paddle/phi/kernels/cpu/cross_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_grad_kernel.cc @@ -18,6 +18,8 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/complex_functors.h" +#include "paddle/phi/kernels/funcs/for_range.h" namespace phi { @@ -81,9 +83,27 @@ void CrossGradKernel(const Context &dev_ctx, slice_size *= static_cast(input_x_dims[i]); } + int64_t numel = x.numel(); + DenseTensor x_conj, y_conj; + DenseTensorMeta meta_xy(x.dtype(), x.dims()); + x_conj.set_meta(meta_xy); + y_conj.set_meta(meta_xy); + + auto *input_x_conj_data = dev_ctx.template Alloc(&x_conj); + + auto *input_y_conj_data = dev_ctx.template Alloc(&y_conj); + + phi::funcs::ForRange for_range(dev_ctx, numel); + phi::funcs::ConjFunctor functor_x( + input_x.data(), numel, input_x_conj_data); + phi::funcs::ConjFunctor functor_y( + input_y.data(), numel, input_y_conj_data); + for_range(functor_x); + for_range(functor_y); + std::vector input_x_vec, input_y_vec, input_dout_vec; - phi::TensorToVector(input_x, dev_ctx, &input_x_vec); - phi::TensorToVector(input_y, dev_ctx, &input_y_vec); + phi::TensorToVector(x_conj, dev_ctx, &input_x_vec); + phi::TensorToVector(y_conj, dev_ctx, &input_y_vec); phi::TensorToVector(input_out_grad, dev_ctx, &input_dout_vec); std::vector out_dx_vec(output_x_grad->numel()); std::vector out_dy_vec(output_y_grad->numel()); @@ -120,4 +140,6 @@ PD_REGISTER_KERNEL(cross_grad, float, double, int, - int64_t) {} + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/cross_kernel.cc b/paddle/phi/kernels/cpu/cross_kernel.cc index 0f45b7c304e31..95f826cfe9132 100644 --- a/paddle/phi/kernels/cpu/cross_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_kernel.cc @@ -105,5 +105,13 @@ void CrossKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL( - cross, CPU, ALL_LAYOUT, phi::CrossKernel, float, double, int, int64_t) {} +PD_REGISTER_KERNEL(cross, + CPU, + ALL_LAYOUT, + phi::CrossKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc index b1a6ceda3647d..278b3bea324f1 100644 --- a/paddle/phi/kernels/cpu/full_kernel.cc +++ b/paddle/phi/kernels/cpu/full_kernel.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#include "paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h" +#include "paddle/phi/kernels/impl/full_with_tensor_kernel_impl.h" namespace phi { @@ -156,5 +156,4 @@ PD_REGISTER_KERNEL(full_with_tensor, phi::dtype::complex, phi::dtype::complex) { kernel->InputAt(0).SetBackend(phi::Backend::CPU); - kernel->InputAt(1).SetBackend(phi::Backend::CPU); } diff --git a/paddle/phi/kernels/cpu/isfinite_kernel.cc b/paddle/phi/kernels/cpu/isfinite_kernel.cc index c9f69c5f7e4f5..2fa44670c15c2 100644 --- a/paddle/phi/kernels/cpu/isfinite_kernel.cc +++ b/paddle/phi/kernels/cpu/isfinite_kernel.cc @@ -27,7 +27,10 @@ PD_REGISTER_KERNEL(isinf, phi::dtype::float16, phi::dtype::bfloat16, int, - int64_t) { + int64_t, + int16_t, + int8_t, + uint8_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); } diff --git a/paddle/phi/kernels/cpu/log_softmax_kernel.cc b/paddle/phi/kernels/cpu/log_softmax_kernel.cc index a57ab908d24ca..26e894945284c 100644 --- a/paddle/phi/kernels/cpu/log_softmax_kernel.cc +++ b/paddle/phi/kernels/cpu/log_softmax_kernel.cc @@ -122,7 +122,7 @@ void LogSoftmaxKernel(const Context& dev_ctx, } // namespace phi -// TODO(YuanRisheng): The layout of mkldnn kernel should be MKLDNN, we should +// TODO(YuanRisheng): The layout of onednn kernel should be OneDNN, we should // support specifying the exact layout when the kernel is registered PD_REGISTER_KERNEL( log_softmax, CPU, ALL_LAYOUT, phi::LogSoftmaxKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/nanmedian_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_kernel.cc index 2911d5c0fcec5..c38cb831d379b 100644 --- a/paddle/phi/kernels/cpu/nanmedian_kernel.cc +++ b/paddle/phi/kernels/cpu/nanmedian_kernel.cc @@ -103,8 +103,12 @@ void CalcMedianFunc(const Context& dev_ctx, offset = i * sort_k; int64_t pos = offset + sort_k - 1; o_ptr[i] = sort_out_ptr[pos]; - m_ptr[2 * i] = sort_indices_ptr[pos]; - m_ptr[2 * i + 1] = sort_indices_ptr[pos]; + if (mode == "avg") { + m_ptr[2 * i] = sort_indices_ptr[pos]; + m_ptr[2 * i + 1] = sort_indices_ptr[pos]; + } else { + m_ptr[i] = sort_indices_ptr[pos]; + } } } else { for (i = 0; i < pre_dim; i++) { diff --git a/paddle/phi/kernels/cpu/shape_broadcast_kernel.cc b/paddle/phi/kernels/cpu/shape_broadcast_kernel.cc new file mode 100644 index 0000000000000..3157be039255c --- /dev/null +++ b/paddle/phi/kernels/cpu/shape_broadcast_kernel.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/dense_tensor.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#include "paddle/utils/array_ref.h" + +namespace phi { + +template +std::vector ComputeBroadcastShape(const paddle::array_ref& large_shape, + const paddle::array_ref& small_shape) { + PADDLE_ENFORCE_GE( + large_shape.size(), + small_shape.size(), + phi::errors::PreconditionNotMet( + "Size of large_shape is expected to be greater or equal size of " + "small_shape, but got [%d] >= [%d].", + large_shape.size(), + small_shape.size())); + std::vector output_data; + output_data.reserve(large_shape.size()); + auto rank_gap = large_shape.size() - small_shape.size(); + for (size_t i = 0; i < rank_gap; ++i) { + output_data.push_back(large_shape[i]); + } + for (size_t i = 0; i < small_shape.size(); ++i) { + output_data.push_back(std::max(large_shape[i + rank_gap], small_shape[i])); + } + return output_data; +} + +template +void ShapeBroadcastKernel(const Context& ctx, + const DenseTensor& x_shape, + const DenseTensor& y_shape, + DenseTensor* out) { + PADDLE_ENFORCE_EQ( + x_shape.dims().size(), + 1, + phi::errors::InvalidArgument("Invalid input tensor. The rank of x_shape " + "should be equal 1, but now received [%d].", + x_shape.dims().size())); + PADDLE_ENFORCE_EQ( + y_shape.dims().size(), + 1, + phi::errors::InvalidArgument("Invalid input tensor. The rank of y_shape " + "should be equal 1, but now received [%d].", + y_shape.dims().size())); + paddle::array_ref x_shape_data(x_shape.data(), x_shape.numel()); + paddle::array_ref y_shape_data(y_shape.data(), y_shape.numel()); + const auto& output_data = + x_shape_data.size() > y_shape_data.size() + ? ComputeBroadcastShape(x_shape_data, y_shape_data) + : ComputeBroadcastShape(y_shape_data, x_shape_data); + T* out_data = ctx.template HostAlloc(out); + int64_t out_numel = out->numel(); + for (int i = 0; i < out_numel; ++i) { + out_data[i] = output_data[i]; + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(shape_broadcast, + CPU, + ALL_LAYOUT, + phi::ShapeBroadcastKernel, + int32_t, + int64_t) {} diff --git a/paddle/phi/kernels/elementwise_divide_grad_kernel.h b/paddle/phi/kernels/elementwise_divide_grad_kernel.h index c764f05c3983f..15b1e65a9cfdf 100644 --- a/paddle/phi/kernels/elementwise_divide_grad_kernel.h +++ b/paddle/phi/kernels/elementwise_divide_grad_kernel.h @@ -33,7 +33,8 @@ template void DivideDoubleGradKernel(const Context& dev_ctx, const DenseTensor& y, const DenseTensor& out, - const DenseTensor& dx, + const DenseTensor& grad_out, + const paddle::optional& dx, const paddle::optional& ddx, const paddle::optional& ddy, int axis, diff --git a/paddle/phi/kernels/flatten_kernel.h b/paddle/phi/kernels/flatten_kernel.h index b941a1fbb9691..ac53c5b82c6cb 100644 --- a/paddle/phi/kernels/flatten_kernel.h +++ b/paddle/phi/kernels/flatten_kernel.h @@ -40,7 +40,8 @@ void FlattenInferStridedKernel(const Context& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis, - DenseTensor* out); + DenseTensor* out, + DenseTensor* xshape); template void FlattenStridedKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/full_kernel.h b/paddle/phi/kernels/full_kernel.h index b10e02658fe75..e6d80ed43dff4 100644 --- a/paddle/phi/kernels/full_kernel.h +++ b/paddle/phi/kernels/full_kernel.h @@ -33,8 +33,8 @@ void FullKernel(const Context& dev_ctx, template void FullWithTensorKernel(const Context& dev_ctx, - const DenseTensor& shape, const DenseTensor& value, + const IntArray& shape, DataType dtype, DenseTensor* out); diff --git a/paddle/phi/kernels/funcs/common_shape.h b/paddle/phi/kernels/funcs/common_shape.h index 19f2fa1f2fac4..45a1024339ba3 100644 --- a/paddle/phi/kernels/funcs/common_shape.h +++ b/paddle/phi/kernels/funcs/common_shape.h @@ -52,7 +52,6 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims, "Axis should be less than or equal to %d, but received axis is %d.", max_dim, axis)); - if (x_dims.size() > y_dims.size()) { std::fill(y_dims_array, y_dims_array + axis, 1); if (axis + y_dims.size() < max_dim) { @@ -68,7 +67,6 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims, std::copy(x_dims.Get(), x_dims.Get() + x_dims.size(), x_dims_array + axis); std::copy(y_dims.Get(), y_dims.Get() + y_dims.size(), y_dims_array); } - for (int i = 0; i < max_dim; ++i) { PADDLE_ENFORCE_EQ( x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || diff --git a/paddle/phi/kernels/funcs/dropout_impl.cu.h b/paddle/phi/kernels/funcs/dropout_impl.cu.h index 463272a37c00d..855b6fe6c8e15 100644 --- a/paddle/phi/kernels/funcs/dropout_impl.cu.h +++ b/paddle/phi/kernels/funcs/dropout_impl.cu.h @@ -349,19 +349,6 @@ void DropoutFwGPUKernelDriver( } else { bool copy_in_kernel = GetSeedDataAndIncrement( dev_ctx, seed, is_fix_seed, seed_val, offset, &seed_data, &increment); -#ifdef PADDLE_WITH_HIP - VectorizedRandomGenerator - <<>>(0, - size, - seed_data, - dropout_prob, - x_data, - mask_data, - y_data, - upscale_in_train, - increment, - main_offset); -#else const phi::GPUContext* dev_ctx_p = &dev_ctx; auto gen_cuda = dev_ctx.GetGenerator(); auto state_index = gen_cuda->GetStateIndex(); @@ -370,10 +357,11 @@ void DropoutFwGPUKernelDriver( parameterSetter = [offset, dev_ctx_p, state_index, is_fix_seed]( phi::backends::gpu::gpuKernelParams& params) { if (!is_fix_seed) { - // we assume seed is null pointer - // seed copy to cpu is meaningless here + // we assume seed is null pointer + // seed copy to cpu is meaningless here +#ifndef PADDLE_WITH_HIP assert(seed_tensor_ptr == nullptr); - +#endif auto gen_cuda = dev_ctx_p->GetGenerator(); // ensure the generator use correct state index gen_cuda->SetStateIndex(state_index); @@ -393,9 +381,14 @@ void DropoutFwGPUKernelDriver( cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast(&(VectorizedRandomGenerator)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = + reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -417,7 +410,6 @@ void DropoutFwGPUKernelDriver( VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } } else { if (upscale_in_train) { diff --git a/paddle/phi/kernels/funcs/eigen/broadcast.cc b/paddle/phi/kernels/funcs/eigen/broadcast.cc index 04e13a6799931..0bf9d37d60e4a 100644 --- a/paddle/phi/kernels/funcs/eigen/broadcast.cc +++ b/paddle/phi/kernels/funcs/eigen/broadcast.cc @@ -73,7 +73,9 @@ struct EigenBroadcastGrad { template struct FUNCTOR; \ template struct FUNCTOR; \ template struct FUNCTOR; \ - template struct FUNCTOR + template struct FUNCTOR; \ + template struct FUNCTOR; \ + template struct FUNCTOR INSTANTIATION(EigenBroadcast, bool); INSTANTIATION(EigenBroadcast, dtype::float16); INSTANTIATION(EigenBroadcast, dtype::bfloat16); diff --git a/paddle/phi/kernels/funcs/eigen/broadcast.cu b/paddle/phi/kernels/funcs/eigen/broadcast.cu index 0c5a3408872c4..fe16588c9bce6 100644 --- a/paddle/phi/kernels/funcs/eigen/broadcast.cu +++ b/paddle/phi/kernels/funcs/eigen/broadcast.cu @@ -72,7 +72,9 @@ struct EigenBroadcastGrad { template struct FUNCTOR; \ template struct FUNCTOR; \ template struct FUNCTOR; \ - template struct FUNCTOR + template struct FUNCTOR; \ + template struct FUNCTOR; \ + template struct FUNCTOR INSTANTIATION(EigenBroadcast, bool); INSTANTIATION(EigenBroadcast, dtype::float16); INSTANTIATION(EigenBroadcast, dtype::bfloat16); diff --git a/paddle/phi/kernels/funcs/jit/README.en.md b/paddle/phi/kernels/funcs/jit/README.en.md index 0e1958a5c1415..cf661d5468a6c 100644 --- a/paddle/phi/kernels/funcs/jit/README.en.md +++ b/paddle/phi/kernels/funcs/jit/README.en.md @@ -100,4 +100,4 @@ Add more implementations of `your_key` for performance enhancement. 1. Add functions based on generated code in `gen`. It should be derived from `JitCode` and should have corresponding creator from `JitCodeCreator` which will be registered on the `your_key`. 2. If new attribute type is added, you should specialize `JitCodeKey` of this type. -3. Add more functions in `more`,you can use any third party you wish, like mkl, mkldnn or intrinsic code to reach the best performance. +3. Add more functions in `more`,you can use any third party you wish, like mkl, onednn or intrinsic code to reach the best performance. diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h index 6a82875819161..3eee52efcbebe 100644 --- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h +++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h @@ -166,14 +166,14 @@ __inline__ __device__ double rsqrt_(const double val) { return ::rsqrt(val); } -#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) +#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) || defined(PADDLE_WITH_HIP) template <> __inline__ __device__ half rsqrt_(const half val) { return hrsqrt(val); } #endif -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template 1) { if (lane == 0) { @@ -290,7 +294,11 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fast_ln_fwd_kernel( #pragma unroll for (int it = 1; it < THREADS_PER_WARP; it *= 2) { +#ifdef PADDLE_WITH_HIP + var_local += __shfl_xor(var_local, it); +#else var_local += __shfl_xor_sync(uint32_t(-1), var_local, it); +#endif } if (WARPS_N > 1) { @@ -546,7 +554,7 @@ __inline__ __device__ void cuLoadAddStridedInputs(const int64_t i1_block, } } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template 0; it /= 2) { +#ifdef PADDLE_WITH_HIP + sum_loss1 += __shfl_down(sum_loss1, it); + sum_loss2 += __shfl_down(sum_loss2, it); +#else sum_loss1 += __shfl_down_sync(uint32_t(-1), sum_loss1, it); sum_loss2 += __shfl_down_sync(uint32_t(-1), sum_loss2, it); +#endif } if (lane == 0) { diff --git a/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc new file mode 100644 index 0000000000000..62944d7ea3b09 --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/fused_layer_norm_avx_kernel.cc @@ -0,0 +1,244 @@ +// Copyright (c) 2024 PaddlePaddle Authors All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { +namespace fusion { + +template +void ResidualBiasSumFunc(const T* x_data, + const T* residual_data, + const T* bias_data, + const float residual_alpha, + const int rows, + const int cols, + const int iStride, + const int oStride, + T* out_data) { + __m512 vresidual_alpha = _mm512_set1_ps(residual_alpha); + const T* pb = bias_data; +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int r = 0; r < rows; ++r) { + const T* px = x_data + r * iStride; + const T* pr = residual_data ? residual_data + r * iStride : nullptr; + T* py = out_data + r * oStride; + for (int col = 0; col < cols; col += 16) { + int remain = cols - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + // residual*alpha + bias + x + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + } + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + _mm512_mask_storeu_ps(py + col, mask, vx); + } + } +} + +template +void LayerNormFunc(const T* x_data, + const T* residual_data, + const T* bias_data, + const T* norm_weight_data, + const T* norm_bias_data, + const float epsilon, + const float residual_alpha, + const int rows, + const int cols, + const int iStride, + const int oStride, + T* out_data, + T* residual_out_data, + T* mean_out, + T* var_out) { + auto size = cols; + __m512 vresidual_alpha = _mm512_set1_ps(residual_alpha); + __m512 vgamma = _mm512_set1_ps(1); + __m512 vbeta = _mm512_set1_ps(0); + const T* pb = bias_data; +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int r = 0; r < rows; ++r) { + const T* px = x_data + r * iStride; + const T* pr = residual_data ? residual_data + r * iStride : nullptr; + T* pr_out = residual_out_data ? residual_out_data + r * oStride : nullptr; + T* py = out_data + r * oStride; + + T sum = 0; + T squareSum = 0; + + __m512 vsum = _mm512_set1_ps(0); + __m512 vsqare = _mm512_set1_ps(0); + for (int col = 0; col < size; col += 16) { + int remain = size - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + // SUM(x) + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + _mm512_mask_storeu_ps(pr_out + col, mask, vx); + } + vsum = _mm512_add_ps(vsum, vx); + + // SUM(x*x) + __m512 tmp = _mm512_mul_ps(vx, vx); + vsqare = _mm512_add_ps(vsqare, tmp); + } + + sum = _mm512_reduce_add_ps(vsum); + squareSum = _mm512_reduce_add_ps(vsqare); + + // Mean + T mean = sum / size; + mean_out[r] = mean; + __m512 vmean = _mm512_set1_ps(mean); + + // Variance + T var = 1 / sqrt(squareSum / size - mean * mean + epsilon); + var_out[r] = var; + __m512 vvar = _mm512_set1_ps(var); + + for (int col = 0; col < size; col += 16) { + int remain = size - col; + __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); + + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual_data) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + residual_vx = _mm512_mul_ps(residual_vx, vresidual_alpha); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias_data) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + } + if (norm_weight_data) { + vgamma = _mm512_maskz_loadu_ps(mask, norm_weight_data + col); + } + if (norm_bias_data) { + vbeta = _mm512_maskz_loadu_ps(mask, norm_bias_data + col); + } + // (vx - vmean) * vgamma * vvar + vbeta + vx = _mm512_mask_sub_ps(vx, mask, vx, vmean); + vx = _mm512_mask_mul_ps(vx, mask, vx, vgamma); + vx = _mm512_mask_mul_ps(vx, mask, vx, vvar); + __m512 vy = _mm512_mask_add_ps(vx, mask, vx, vbeta); + _mm512_mask_storeu_ps(py + col, mask, vy); + } + } +} + +template +void FusedLayerNormAvxKernel(const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& bias, + const paddle::optional& residual, + const paddle::optional& norm_weight, + const paddle::optional& norm_bias, + const float epsilon, + const float residual_alpha, + const int begin_norm_axis, + const float quant_scale, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + DenseTensor* out, + DenseTensor* residual_out, + DenseTensor* mean, + DenseTensor* variance) { + if (quant_scale > 0.0f) { + PD_THROW("NOT supported quant int8. "); + } + const auto x_dims = x.dims(); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); + T* out_data = dev_ctx.template Alloc(out); + T* mean_out = dev_ctx.template Alloc(mean); + T* var_out = dev_ctx.template Alloc(variance); + + const T* x_data = x.data(); + const T* bias_data = bias ? bias.get().data() : nullptr; + const T* residual_data = residual ? residual.get().data() : nullptr; + const T* norm_weight_data = + norm_weight ? norm_weight.get().data() : nullptr; + const T* norm_bias_data = norm_bias ? norm_bias.get().data() : nullptr; + T* residual_out_data = + residual ? dev_ctx.template Alloc(residual_out) : nullptr; + + int32_t rows = static_cast(matrix_dim[0]); + int32_t cols = static_cast(matrix_dim[1]); + + auto iStride = cols; + auto oStride = cols; + if (!norm_weight && !norm_bias_data) { + ResidualBiasSumFunc(x_data, + residual_data, + bias_data, + residual_alpha, + rows, + cols, + iStride, + oStride, + out_data); + } else { + LayerNormFunc(x_data, + residual_data, + bias_data, + norm_weight_data, + norm_bias_data, + epsilon, + residual_alpha, + rows, + cols, + iStride, + oStride, + out_data, + residual_out_data, + mean_out, + var_out); + } +} +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(fused_bias_residual_layernorm, + CPU, + ALL_LAYOUT, + phi::fusion::FusedLayerNormAvxKernel, + float) {} diff --git a/paddle/phi/kernels/fusion/cpu/rms_norm_avx_kernel.cc b/paddle/phi/kernels/fusion/cpu/rms_norm_avx_kernel.cc new file mode 100644 index 0000000000000..a46b91b3c7330 --- /dev/null +++ b/paddle/phi/kernels/fusion/cpu/rms_norm_avx_kernel.cc @@ -0,0 +1,167 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { +namespace fusion { + +template +void RmsNormAvxKernel(const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& bias, + const paddle::optional& residual, + const DenseTensor& norm_weight, + const paddle::optional& norm_bias, + const float epsilon, + const int begin_norm_axis, + const float quant_scale, + const int quant_round_type, + const float quant_max_bound, + const float quant_min_bound, + DenseTensor* out, + DenseTensor* residual_out, + DenseTensor* inv_var) { + if (quant_scale > 0.0f) { + PD_THROW("NOT supported quant int8. "); + } + + const T* x_data = x.data(); + int32_t rows = 1; + int32_t cols = 1; + for (int i = 0; i < begin_norm_axis; i++) { + rows *= x.dims()[i]; + } + for (int i = begin_norm_axis; i < x.dims().size(); i++) { + cols *= x.dims()[i]; + } + + int size = cols; + auto istride = cols; + auto ostride = cols; + const T* norm_weight_data = norm_weight.data(); + const T* norm_bias_data = norm_bias ? norm_bias.get().data() : nullptr; + const T* residual_data = residual ? residual.get().data() : nullptr; + const T* bias_data = bias ? bias.get().data() : nullptr; + T* out_data = dev_ctx.template Alloc(out); + T* residual_out_data = + residual ? dev_ctx.template Alloc(residual_out) : nullptr; + + __m512 vb = _mm512_setzero_ps(); + const T* pb = bias_data; +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif + for (int r = 0; r < rows; ++r) { + const T* px = x_data + r * istride; + const T* pr = residual ? residual_data + r * istride : nullptr; + T* pr_out = residual ? residual_out_data + r * ostride : nullptr; + T* py = out_data + r * ostride; + + T squareSum = 0; + + __m512 vsqare = _mm512_set1_ps(0); + + int col = 0; + for (; col + 15 < size; col += 16) { + // SUM(x*x) + __m512 vx = _mm512_loadu_ps(px + col); + if (residual) { + __m512 residual_vx = _mm512_loadu_ps(pr + col); + vx = _mm512_add_ps(vx, residual_vx); + if (bias) { + __m512 vb = _mm512_loadu_ps(pb + col); + vx = _mm512_add_ps(vx, vb); + } + _mm512_storeu_ps(pr_out + col, vx); + } + __m512 tmp = _mm512_mul_ps(vx, vx); + vsqare = _mm512_add_ps(vsqare, tmp); + } + if (col < size) { + __mmask16 mask = (1 << (size - col)) - 1; + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + _mm512_mask_storeu_ps(pr_out + col, mask, vx); + } + __m512 tmp = _mm512_mul_ps(vx, vx); + vsqare = _mm512_add_ps(vsqare, tmp); + } + + squareSum = _mm512_reduce_add_ps(vsqare); + + // Variance + T var = 1 / sqrt(squareSum / size + epsilon); + __m512 vvar = _mm512_set1_ps(var); + + for (col = 0; col + 15 < size; col += 16) { + __m512 vx = _mm512_loadu_ps(px + col); + if (residual) { + __m512 residual_vx = _mm512_loadu_ps(pr + col); + vx = _mm512_add_ps(vx, residual_vx); + if (bias) { + __m512 vb = _mm512_loadu_ps(pb + col); + vx = _mm512_add_ps(vx, vb); + } + } + __m512 vw = _mm512_loadu_ps(norm_weight_data + col); + if (norm_bias_data) { + vb = _mm512_loadu_ps(norm_bias_data + col); + } + + // vy = vx * vvar * vw + vb + vx = _mm512_mul_ps(vx, vvar); + vx = _mm512_mul_ps(vx, vw); + __m512 vy = _mm512_add_ps(vx, vb); + _mm512_storeu_ps(py + col, vy); + } + if (col < size) { + __mmask16 mask = (1 << (size - col)) - 1; + __m512 vx = _mm512_maskz_loadu_ps(mask, px + col); + if (residual) { + __m512 residual_vx = _mm512_maskz_loadu_ps(mask, pr + col); + vx = _mm512_mask_add_ps(vx, mask, vx, residual_vx); + if (bias) { + __m512 vb = _mm512_maskz_loadu_ps(mask, pb + col); + vx = _mm512_mask_add_ps(vx, mask, vx, vb); + } + } + __m512 vw = _mm512_maskz_loadu_ps(mask, norm_weight_data + col); + if (norm_bias_data) { + vb = _mm512_maskz_loadu_ps(mask, norm_bias_data + col); + } + // vx * vvar * vw + vb + vx = _mm512_mask_mul_ps(vx, mask, vx, vvar); + vx = _mm512_mask_mul_ps(vx, mask, vx, vw); + __m512 vy = _mm512_mask_add_ps(vx, mask, vx, vb); + _mm512_mask_storeu_ps(py + col, mask, vy); + } + } // end for rows +} +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL( + rms_norm, CPU, ALL_LAYOUT, phi::fusion::RmsNormAvxKernel, float) {} diff --git a/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc b/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc index 0d3189187351c..dff41e6d4250c 100644 --- a/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/self_dp_attention_kernel.cc @@ -257,7 +257,9 @@ void softmax_sum_max(float* AB, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vx = _mm512_maskz_loadu_ps(mask, buf + off); - vx = vexp(vx * vrefac - vmax); + vx = _mm512_mask_mul_ps(vx, mask, vx, vrefac); + vx = _mm512_mask_sub_ps(vx, mask, vx, vmax); + vx = vexp(vx); _mm512_mask_storeu_ps(buf + off, mask, vx); @@ -275,8 +277,7 @@ void softmax_sum_max(float* AB, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vx = _mm512_maskz_loadu_ps(mask, buf + off); - vx = vx * vrsum; - + vx = _mm512_mask_mul_ps(vx, mask, vx, vrsum); _mm512_mask_storeu_ps(buf + off, mask, vx); } } @@ -301,7 +302,10 @@ void update_out_blk(float* output, __mmask16 mask = (remain >= 16 ? 0xffff : (1 << remain) - 1); __m512 vout = _mm512_maskz_loadu_ps(mask, outbuf + off); __m512 vabc = _mm512_maskz_loadu_ps(mask, buf + off); - __m512 vupt = vout * merr * vfac + vabc; + vout = _mm512_mask_mul_ps(vout, mask, vout, merr); + vout = _mm512_mask_mul_ps(vout, mask, vout, vfac); + __m512 vupt = _mm512_set1_ps(0.0f); + vupt = _mm512_mask_add_ps(vupt, mask, vout, vabc); _mm512_mask_storeu_ps(outbuf + off, mask, vupt); } pre_sum[i] = sum[i]; diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_act.py b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_act.py index 2104c676c9b82..9dd7e98a4109b 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_act.py +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_act.py @@ -360,6 +360,117 @@ def generate_sm80_16816(cutlass_dtype="cutlass::half_t"): return sm80_code +# hers is sm80 tf32. +def generate_sm80_1688(cutlass_dtype="cutlass::tfloat32_t"): + kernel_dict = { + "element_a": cutlass_dtype, + "layout_a": "cutlass::layout::TensorNHWC", + "element_b": cutlass_dtype, + "layout_b": "cutlass::layout::TensorNHWC", + "element_c": cutlass_dtype, + "layout_c": "cutlass::layout::TensorNHWC", + "opcode_class": "cutlass::arch::OpClassTensorOp", + "arch": "cutlass::arch::Sm80", + "swizzling_functor": "cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<4>", + # alpha is always float! + "element_epilogue": "float", + "math_operator": "cutlass::arch::OpMultiplyAdd", + } + + kernel_dict["stride_support"] = "cutlass::conv::StrideSupport::kStrided" + + # iterate over this loop + iterator_algorithms = [ + "cutlass::conv::IteratorAlgorithm::kOptimized", + ] + + math_instructions = [ + ( + "16,8,8", + cutlass_dtype, + cutlass_dtype, + "float", + ), + ] + + alignments = [4] + + kernel_dict["align_a"] = "4" + kernel_dict["align_b"] = "4" + # this should divided by oc + kernel_dict["epilogue_vector_length"] = "4" + kernel_dict["split_k_slices"] = "1" + + sm80_code = "" + for epi_func in SupportedAct: + op_dict = {} + op_dict["func_name"] = UnderScoreName[epi_func].lower() + "_sm80_fp32" + op_dict["enum_op_name"] = UnderScoreName[epi_func].upper() + # For a function, we record all its kernels into a std::vector in C++ code + all_kernel_names = "" + all_kernel_declares = "" + kernel_dict["epi_func"] = ActTag[epi_func] + suffix = 0 + for iterator_algorithm in iterator_algorithms: + for alignment in alignments: + for math_inst in math_instructions: + tiles = [ + TileDesc("128, 128, 16", 4, "32, 64, 16", math_inst), + TileDesc("128, 128, 16", 3, "32, 64, 16", math_inst), + TileDesc("256, 64, 16", 3, "64, 32, 16", math_inst), + TileDesc("64, 256, 16", 3, "32, 64, 16", math_inst), + TileDesc("128, 64, 16", 4, "64, 32, 16", math_inst), + TileDesc("64, 128, 16", 4, "32, 64, 16", math_inst), + TileDesc("64, 64, 16", 3, "32, 32, 16", math_inst), + TileDesc("128, 128, 32", 3, "32, 64, 32", math_inst), + TileDesc("256, 64, 32", 3, "64, 32, 32", math_inst), + TileDesc("64, 256, 32", 3, "32, 64, 32", math_inst), + TileDesc("128, 64, 32", 3, "64, 32, 32", math_inst), + TileDesc("64, 128, 32", 3, "32, 64, 32", math_inst), + TileDesc("64, 64, 32", 3, "32, 32, 32", math_inst), + ] + for tile in tiles: + kernel_dict["iterator_algorithm"] = iterator_algorithm + kernel_dict["Tshape"] = tile.Tshape + kernel_dict["Wshape"] = tile.Wshape + kernel_dict["Ishape"] = tile.math_inst[0] + kernel_dict["stages"] = str(tile.stages) + kernel_dict["element_accum"] = tile.math_inst[3] + kernel_dict["kernel_func_name"] = op_dict[ + "func_name" + ] + str(suffix) + suffix += 1 + cba_kernel = cba_kernel_no_alpha + if epi_func in [CbaAct.LeakyRelu]: + cba_kernel = cba_kernel_alpha + kernel_str = ( + cba_header + + SubstituteTemplate(cba_kernel, kernel_dict) + + CommonTail + ) + file_name = ( + "generated_tmp/" + + kernel_dict["kernel_func_name"] + + ".cu" + ) + write_kernel_to_file(kernel_str, file_name) + all_kernel_names += ( + kernel_dict["kernel_func_name"] + ", \n" + ) + all_kernel_declares += ( + "cutlass::Status " + + kernel_dict["kernel_func_name"] + + "(const ConvAllParams& params);" + ) + + # Generate op code + op_dict["kernel_func_declare"] = all_kernel_declares + op_dict["all_kernel_func_name"] = all_kernel_names + sm80_code += SubstituteTemplate(CommonConvFunction, op_dict) + + return sm80_code + + if __name__ == "__main__": sm_versions_and_types = [] args = parse_args() @@ -371,8 +482,10 @@ def generate_sm80_16816(cutlass_dtype="cutlass::half_t"): if args.cuda_arch in ["80", "86", "89"]: sm_versions_and_types.append(["80", "fp16"]) sm_versions_and_types.append(["80", "bf16"]) + sm_versions_and_types.append(["80", "fp32"]) all_code += generate_sm80_16816() all_code += generate_sm80_16816(cutlass_dtype="cutlass::bfloat16_t") + all_code += generate_sm80_1688(cutlass_dtype="cutlass::tfloat32_t") all_code += GenerateFunctionForPhi( sm_versions_and_types, SupportedAct, UnderScoreName, CamelName diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_residual.py b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_residual.py index 629ffc12415e9..e243a64e1548d 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_residual.py +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_bias_residual.py @@ -350,6 +350,121 @@ def generate_sm80_16816(cutlass_dtype="cutlass::half_t"): return sm80_code +def generate_sm80_1688(cutlass_dtype="cutlass::tfloat32_t"): + kernel_dict = { + "conv_kind_name": "Fprop", + "element_a": cutlass_dtype, + "layout_a": "cutlass::layout::TensorNHWC", + "element_b": cutlass_dtype, + "layout_b": "cutlass::layout::TensorNHWC", + "element_c": cutlass_dtype, + "layout_c": "cutlass::layout::TensorNHWC", + "opcode_class": "cutlass::arch::OpClassTensorOp", + "arch": "cutlass::arch::Sm80", + "swizzling_functor": "cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<4>", + # alpha is always float! + "element_epilogue": "float", + "math_operator": "cutlass::arch::OpMultiplyAdd", + "element_residul": cutlass_dtype, + } + + kernel_dict["stride_support"] = "cutlass::conv::StrideSupport::kStrided" + + # iterate over this loop + iterator_algorithms = [ + "cutlass::conv::IteratorAlgorithm::kOptimized", + ] + + math_instructions = [ + ( + "16,8,8", + cutlass_dtype, + cutlass_dtype, + "float", + ), + ] + + alignments = [4] + + kernel_dict["align_a"] = "4" + kernel_dict["align_b"] = "4" + kernel_dict["epilogue_vector_length"] = "4" + kernel_dict["split_k_slices"] = "1" + + sm80_code = "" + for epi_res_block in SupportedEpilogue: + op_dict = {} + op_dict["func_name"] = ( + UnderScoreName[epi_res_block].lower() + "_sm80_fp32" + ) + op_dict["enum_op_name"] = UnderScoreName[epi_res_block].upper() + # for a op, we record all its kernels into a std::vector in C++ code + all_kernel_names = "" + all_kernel_declares = "" + suffix = 0 + for iterator_algorithm in iterator_algorithms: + for alignment in alignments: + for math_inst in math_instructions: + tiles = [ + TileDesc("128, 128, 16", 4, "32, 64, 16", math_inst), + TileDesc("128, 128, 16", 3, "32, 64, 16", math_inst), + TileDesc("256, 64, 16", 3, "64, 32, 16", math_inst), + TileDesc("64, 256, 16", 3, "32, 64, 16", math_inst), + TileDesc("128, 64, 16", 4, "64, 32, 16", math_inst), + TileDesc("64, 128, 16", 4, "32, 64, 16", math_inst), + TileDesc("64, 64, 16", 3, "32, 32, 16", math_inst), + TileDesc("128, 128, 32", 3, "32, 64, 32", math_inst), + TileDesc("256, 64, 32", 3, "64, 32, 32", math_inst), + TileDesc("64, 256, 32", 3, "32, 64, 32", math_inst), + TileDesc("128, 64, 32", 3, "64, 32, 32", math_inst), + TileDesc("64, 128, 32", 3, "32, 64, 32", math_inst), + TileDesc("64, 64, 32", 3, "32, 32, 32", math_inst), + ] + + for tile in tiles: + kernel_dict["iterator_algorithm"] = iterator_algorithm + kernel_dict["Tshape"] = tile.Tshape + kernel_dict["Wshape"] = tile.Wshape + kernel_dict["Ishape"] = tile.math_inst[0] + kernel_dict["stages"] = str(tile.stages) + kernel_dict["element_accum"] = tile.math_inst[3] + kernel_dict["kernel_func_name"] = op_dict[ + "func_name" + ] + str(suffix) + suffix += 1 + kernel_dict["act1"] = ActTag[epi_res_block[0]] + kernel_dict["binary"] = epi_res_block[1] + kernel_dict["act2"] = ActTag[epi_res_block[2]] + + # sm80_code += SubstituteTemplate(cbr_kernel, kernel_dict) + kernel_str = ( + cbr_header + + SubstituteTemplate(cbr_kernel, kernel_dict) + + CommonTail + ) + file_name = ( + "generated_tmp/" + + kernel_dict["kernel_func_name"] + + ".cu" + ) + write_kernel_to_file(kernel_str, file_name) + + all_kernel_names += ( + kernel_dict["kernel_func_name"] + ", \n" + ) + all_kernel_declares += ( + "cutlass::Status " + + kernel_dict["kernel_func_name"] + + "(const ConvAllParams& params);" + ) + + # Generate op code + op_dict["kernel_func_declare"] = all_kernel_declares + op_dict["all_kernel_func_name"] = all_kernel_names + sm80_code += SubstituteTemplate(CommonConvFunction, op_dict) + return sm80_code + + if __name__ == "__main__": sm_versions_and_types = [] args = parse_args() @@ -361,8 +476,10 @@ def generate_sm80_16816(cutlass_dtype="cutlass::half_t"): if args.cuda_arch in ["80", "86", "89"]: sm_versions_and_types.append(["80", "fp16"]) sm_versions_and_types.append(["80", "bf16"]) + sm_versions_and_types.append(["80", "fp32"]) all_code += generate_sm80_16816() all_code += generate_sm80_16816(cutlass_dtype="cutlass::bfloat16_t") + all_code += generate_sm80_1688(cutlass_dtype="cutlass::tfloat32_t") all_code += GenerateFunctionForPhi( sm_versions_and_types, SupportedEpilogue, UnderScoreName, CamelName diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_common.py b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_common.py index 6dbf6bcbbb82a..5d2425fe4059b 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_common.py +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_common.py @@ -57,7 +57,7 @@ ${element_c} *bias = (${element_c} *)(params.bias); ${element_c} *output = (${element_c} *)(params.output); // only used by conv2d_bias_residual - auto residual = (${element_c} *)(params.residual); + auto residual = (${element_c} *)(params.residual); int batch = params.batch; int ic = params.ic; @@ -96,8 +96,8 @@ ImplicitGemm implicit_gemm_op; size_t bytes = implicit_gemm_op.get_workspace_size(arguments); -auto stream = params.stream; -void *workspace = params.workspace; + auto stream = params.stream; + void *workspace = params.workspace; cutlass::Status status = implicit_gemm_op.can_implement(arguments); CUTLASS_CHECK(status); @@ -125,7 +125,7 @@ std::map, int> map_problem_${func_name}; std::mutex ${func_name}_mutex; -void ${func_name}(ConvAllParams params) { +bool ${func_name}(ConvAllParams params) { int batch = params.batch; int ic = params.ic; int ih = params.ih; @@ -145,7 +145,7 @@ if (map_problem_${func_name}.count(problem_size)) { ${func_name}_all_func[map_problem_${func_name}.at(problem_size)]( params); - return; + return true; } int best_config_index = ProfileToGetBestConfig( @@ -155,6 +155,7 @@ map_problem_${func_name}[problem_size] = best_config_index; ${func_name}_all_func[best_config_index](params); + return true; } """ @@ -164,8 +165,8 @@ # this function is invoked by phi kernel CommonWrapperForPhi = """ -void ${op_name}(ConvAllParams params) { - ${dispatch_body} +bool ${op_name}(ConvAllParams params) { + ${dispatch_body} } """ @@ -173,14 +174,18 @@ def convert_c_data_type(dtype): if dtype == "fp16": return "Conv2dDataType::fp16" - if dtype == "bf16": + elif dtype == "bf16": return "Conv2dDataType::bf16" + elif dtype == "fp32": + return "Conv2dDataType::fp32" + else: + return None CommonDispatchTemp = ''' if (params.sm_version == ${sm_code} && params.data_type == ${data_type}) { - ${op_name_with_sm}(params); + return ${op_name_with_sm}(params); } ''' @@ -213,6 +218,7 @@ def GenerateFunctionForPhi( + data_type ) dispatch_body += SubstituteTemplate(CommonDispatchTemp, sm_dicts) + dispatch_body += ''' return false;''' op_dicts = {} op_dicts["dispatch_body"] = dispatch_body op_dicts["op_name"] = camel_names[epi_func] diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_decl.h b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_decl.h index b29ce65f5230a..a36495ca6abfb 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_decl.h +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_decl.h @@ -59,19 +59,17 @@ typedef struct { } ConvAllParams; // Below functions are provided by cutlass, they are called by phi. -extern "C" void Conv2dBiasAddRelu(ConvAllParams params); -extern "C" void Conv2dBiasRelu(ConvAllParams params); -extern "C" void Conv2dBiasLeakyRelu(ConvAllParams params); -extern "C" void Conv2dBiasSilu(ConvAllParams params); -extern "C" void Conv2dBias(ConvAllParams params); -extern "C" void Conv2dBiasSigmoid(ConvAllParams params); +extern "C" bool Conv2dBiasAddRelu(ConvAllParams params); +extern "C" bool Conv2dBiasRelu(ConvAllParams params); +extern "C" bool Conv2dBiasLeakyRelu(ConvAllParams params); +extern "C" bool Conv2dBiasSilu(ConvAllParams params); +extern "C" bool Conv2dBias(ConvAllParams params); +extern "C" bool Conv2dBiasSigmoid(ConvAllParams params); -extern "C" void Conv2dDepthwiseBias(ConvAllParams params); -extern "C" void Conv2dDepthwiseBiasRelu(ConvAllParams params); -extern "C" void Conv2dDepthwiseBiasSigmoid(ConvAllParams params); -extern "C" void Conv2dDepthwiseBiasSilu(ConvAllParams params); - -extern "C" int HelloFromCutlassConv2d(int a, int b); +extern "C" bool Conv2dDepthwiseBias(ConvAllParams params); +extern "C" bool Conv2dDepthwiseBiasRelu(ConvAllParams params); +extern "C" bool Conv2dDepthwiseBiasSigmoid(ConvAllParams params); +extern "C" bool Conv2dDepthwiseBiasSilu(ConvAllParams params); } // namespace cutlass_internal } // namespace fusion diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.cu b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.cu index 0a08cd165519d..6aed60cf1c23b 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.cu +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.cu @@ -325,6 +325,14 @@ int ProfileToGetBestConfig( params, op_type, static_cast(1.0)) << " compared with baseline," << "cost_time: " << elapsed_time << "ms." << std::endl; + } else if (params.data_type == Conv2dDataType::fp32) { + // debug code + std::cout << OpType2String(op_type) << ": tactic " << i + << " has max diff " + << conv2d_diff_gpu( + params, op_type, static_cast(1.0)) + << " compared with baseline," + << "cost_time: " << elapsed_time << "ms." << std::endl; } } } diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/generic_mixed_gemm_kernelLauncher.py b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/generic_mixed_gemm_kernelLauncher.py index 5847956020ceb..17911e4898220 100644 --- a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/generic_mixed_gemm_kernelLauncher.py +++ b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/generic_mixed_gemm_kernelLauncher.py @@ -234,9 +234,7 @@ def generate_source_cu( for arch in archs: for epilogue_tag in EpilogueTags.keys(): for stages in StagesList[arch]: - file_name = "autogen_tmp/generic_mixed_gemm_kernelLauncher_{}_sm{}_stages{}_{}.cu".format( - element_type, arch, stages, epilogue_tag - ) + file_name = f"autogen_tmp/generic_mixed_gemm_kernelLauncher_{element_type}_sm{arch}_stages{stages}_{epilogue_tag}.cu" all_code = generate_source_cu( element_type, arch, diff --git a/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu b/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu index 79057bee76219..9bf22b8ff84a1 100644 --- a/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu +++ b/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu @@ -26,7 +26,7 @@ namespace phi { namespace fusion { namespace cutlass_internal { -typedef void (*func)(phi::fusion::cutlass_internal::ConvAllParams); +typedef bool (*func)(phi::fusion::cutlass_internal::ConvAllParams); template void FusedConv2dAddActKernel(const Context& ctx, @@ -230,7 +230,12 @@ void FusedConv2dAddActKernel(const Context& ctx, "Cutlass conv2d_depthwise does not support this activation: %s.", activation.c_str())); } - conv_func(params); + + if (!conv_func(params)) { + PADDLE_THROW( + phi::errors::Fatal("no fused_conv2d_add_act cutlass kernel ")); + } + output->set_layout(DataLayout::NHWC); return; } @@ -265,7 +270,11 @@ void FusedConv2dAddActKernel(const Context& ctx, PADDLE_THROW(phi::errors::InvalidArgument( "Cutlass does not support this activation: %s.", activation.c_str())); } - conv_func(params); + + if (!conv_func(params)) { + PADDLE_THROW(phi::errors::Fatal("no fused_conv2d_add_act cutlass kernel ")); + } + output->set_layout(DataLayout::NHWC); } } // namespace cutlass_internal diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu index 60a82cfe7c198..48819c12a8dc0 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu @@ -11,7 +11,12 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#ifndef PADDLE_WITH_HIP +#ifdef PADDLE_WITH_HIP +#include +#include +#include +namespace cub = hipcub; +#else #include #include #endif @@ -21,9 +26,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" -#ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" -#endif namespace phi { namespace fusion { @@ -51,7 +54,6 @@ void FusedBiasDropoutResidualLnGradKernel( DenseTensor* bias_grad, DenseTensor* ln_scale_grad, DenseTensor* ln_bias_grad) { -#ifndef PADDLE_WITH_HIP using U = LayerNormParamType; auto* d_y_data = y_grad.data(); auto* ln_scale_data = @@ -114,15 +116,19 @@ void FusedBiasDropoutResidualLnGradKernel( d_x_data, d_bias_data, d_residual_data); -#else - PADDLE_THROW(phi::errors::Unimplemented( - "FusedBiasDropoutResidualLnGradKernel not surpport for rocm")); -#endif } } // namespace fusion } // namespace phi +#ifdef PADDLE_WITH_HIP +PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, + GPU, + ALL_LAYOUT, + phi::fusion::FusedBiasDropoutResidualLnGradKernel, + float, + phi::dtype::float16) {} +#else PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, GPU, ALL_LAYOUT, @@ -130,3 +136,4 @@ PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm_grad, float, double, phi::dtype::float16) {} +#endif diff --git a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu index 37450d3a4e178..ca0bcbe7f2466 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu @@ -17,9 +17,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" -#ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" -#endif namespace phi { namespace fusion { @@ -42,7 +40,6 @@ void FusedBiasDropoutResidualLnKernel( DenseTensor* dropout_mask_out, DenseTensor* ln_mean, DenseTensor* ln_variance) { -#ifndef PADDLE_WITH_HIP using U = phi::funcs::LayerNormParamType; auto* x_data = x.data(); auto* bias_data = (bias.get_ptr() == nullptr) ? nullptr : bias->data(); @@ -95,14 +92,20 @@ void FusedBiasDropoutResidualLnKernel( y_data, ln_mean_data, ln_var_data); -#else - PADDLE_THROW(phi::errors::Unimplemented( - "FusedBiasDropoutResidualLnKernel not support for rocm")); -#endif } } // namespace fusion } // namespace phi +#ifdef PADDLE_WITH_HIP +PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, + GPU, + ALL_LAYOUT, + phi::fusion::FusedBiasDropoutResidualLnKernel, + float, + phi::dtype::float16) { + kernel->OutputAt(1).SetDataType(phi::DataType::UINT8); +} +#else PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, GPU, ALL_LAYOUT, @@ -112,3 +115,4 @@ PD_REGISTER_KERNEL(fused_bias_dropout_residual_layer_norm, phi::dtype::float16) { kernel->OutputAt(1).SetDataType(phi::DataType::UINT8); } +#endif diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h b/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h index e5f5c9ba50ba4..d2cd2f1b545a7 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_act_bias.h @@ -35,7 +35,11 @@ struct GeluFunctor { template struct FastGeluFunctor { inline __device__ T operator()(const T x) const { +#ifdef PADDLE_WITH_HIP + assert(0 && "ROCM does not support FastGelu"); +#else return phi::GeluFwd(x); +#endif } }; @@ -92,8 +96,8 @@ __global__ void FusedDropoutActBias( int row_id = blockIdx.y; int idx = row_id * cols + col_id; - curandStatePhilox4_32_10_t state; - curand_init(seed, idx, increment, &state); + GPURAND(StatePhilox4_32_10_t) state; + GPURAND(_init)(seed, idx, increment, &state); const T factor = phi::fusion::GetFactor(dropout_prob, is_upscale_in_train, is_test); diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu index 801f070251fb2..8994d52138233 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_grad_kernel.cu @@ -202,18 +202,6 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, ? NoMaskBwFunctor(1.0f - dropout_rate) : NoMaskBwFunctor(1.0f - dropout_rate, 1.0f); -#ifdef PADDLE_WITH_HIP - VectorizedDropoutBackward> - <<>>(0, - numel, - seed_data, // idx: 2 need save - x_grad_data, - y_grad_data, - out_grad_data, - increment, // idx: 6 need save - main_offset, - functor); -#else // we assume seed/offset is same across iterations // seed_offset_data should preserved by cudaGraph pool const phi::GPUContext* dev_ctx_p = &dev_ctx; @@ -233,9 +221,13 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast( &(VectorizedDropoutBackward>)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -257,7 +249,6 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } } diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu index c95c5fbf0ca3d..54ec3604bbee9 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_add_kernel.cu @@ -186,18 +186,6 @@ void FusedDropoutAddKernel(const Context& dev_ctx, auto dst_functor = NoMaskFwFunctor(1.0f - dropout_rate, upscale_in_train); -#ifdef PADDLE_WITH_HIP - VectorizedDropoutForward> - <<>>(0, - numel, - seed_data, // need save - x_data, - y_data, - out_data, - increment, // need save - main_offset, - dst_functor); -#else // we assume seed/offset is same across iterations // seed_offset_data should preserved by cudaGraph pool const phi::GPUContext* dev_ctx_p = &dev_ctx; @@ -237,9 +225,13 @@ void FusedDropoutAddKernel(const Context& dev_ctx, cudaKernelCallback = [=](unsigned int id) { void* functionPtr = reinterpret_cast( &(VectorizedDropoutForward>)); +#ifdef PADDLE_WITH_HIP + hipFunction_t cudaFunc = reinterpret_cast(functionPtr); +#else cudaFunction_t cudaFunc; PADDLE_ENFORCE_GPU_SUCCESS( cudaGetFuncBySymbol(&cudaFunc, functionPtr)); +#endif VLOG(10) << "[cudaKernelCallback] cudaFunc = " << cudaFunc << " functionPtr = " << functionPtr; @@ -260,7 +252,6 @@ void FusedDropoutAddKernel(const Context& dev_ctx, VLOG(10) << "NON_CUDA_GRAPH seed = " << seed_data << ", increment = " << increment; -#endif } else { using MT = typename phi::dtype::MPTypeTrait::Type; MT factor = static_cast(1.0f - dropout_rate); diff --git a/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h b/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h index 2ef46378b1b9b..ef9ecbb435fdb 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h +++ b/paddle/phi/kernels/fusion/gpu/fused_dropout_common.h @@ -20,10 +20,25 @@ limitations under the License. */ #include #endif +#ifdef PADDLE_WITH_HIP +#include +#include +#include +#include +#endif + #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h" +#ifdef PADDLE_WITH_HIP +#define GPU(str) hip##str +#define GPURAND(str) hiprand##str +#else +#define GPU(str) cuda##str +#define GPURAND(str) curand##str +#endif + namespace phi { namespace fusion { @@ -63,26 +78,29 @@ inline phi::backends::gpu::GpuLaunchConfig Get1DBlocksAnd2DGrids( } template -__forceinline__ __device__ void RandVec(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec(GPURAND(StatePhilox4_32_10_t) * state, float *data); template <> -__forceinline__ __device__ void RandVec<1>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<1>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - data[0] = curand_uniform(state); + data[0] = GPURAND(_uniform)(state); } template <> -__forceinline__ __device__ void RandVec<2>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<2>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - data[0] = curand_uniform(state); - data[1] = curand_uniform(state); + data[0] = GPURAND(_uniform)(state); + data[1] = GPURAND(_uniform)(state); } template <> -__forceinline__ __device__ void RandVec<4>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<4>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { - float4 rand4 = curand_uniform4(state); + float4 rand4 = GPURAND(_uniform4)(state); data[0] = rand4.x; data[1] = rand4.y; data[2] = rand4.w; @@ -90,7 +108,8 @@ __forceinline__ __device__ void RandVec<4>(curandStatePhilox4_32_10_t *state, } template <> -__forceinline__ __device__ void RandVec<8>(curandStatePhilox4_32_10_t *state, +__forceinline__ __device__ void RandVec<8>(GPURAND(StatePhilox4_32_10_t) * + state, float *data) { RandVec<4>(state, data); RandVec<4>(state, data + 4); @@ -99,7 +118,7 @@ __forceinline__ __device__ void RandVec<8>(curandStatePhilox4_32_10_t *state, template inline void SetZero(const phi::GPUContext &ctx, T *ptr, const size_t size) { PADDLE_ENFORCE_GPU_SUCCESS( - cudaMemsetAsync(ptr, 0, size * sizeof(T), ctx.stream())); + GPU(MemsetAsync)(ptr, 0, size * sizeof(T), ctx.stream())); } /** diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu index e31b24e7e105e..221019531a548 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu @@ -38,10 +38,19 @@ limitations under the License. #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" -#ifndef PADDLE_WITH_HIP -#include #include "paddle/phi/kernels/fusion/gpu/attention_layer.norm.h" #include "paddle/phi/kernels/fusion/gpu/fused_dropout_helper.h" +#ifdef PADDLE_WITH_HIP +#include +#include +#include +namespace cub = hipcub; +#define GPU(str) hip##str +#define GPUMultiProcessorCount hipDeviceAttributeMultiprocessorCount +#else +#include +#define GPU(str) cuda##str +#define GPUMultiProcessorCount cudaDevAttrMultiProcessorCount #endif namespace phi { @@ -50,9 +59,11 @@ namespace fusion { namespace { -#ifndef PADDLE_WITH_HIP - +#ifdef PADDLE_WITH_HIP +constexpr int kWarpSize = 64; +#else constexpr int kWarpSize = 32; +#endif template struct SumOp { @@ -74,7 +85,11 @@ template