forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#89 from Shixiaowei02/dev/custom-op
add the custom operator tutorial
- Loading branch information
Showing
7 changed files
with
584 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
cmake_minimum_required(VERSION 3.0) | ||
project(cpp_inference_demo CXX C) | ||
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) | ||
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) | ||
option(USE_TENSORRT "Compile demo with TensorRT." ON) | ||
option(CUSTOM_OPERATOR_FILES "List of file names for custom operators" "") | ||
|
||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") | ||
include(external/boost) | ||
|
||
if(WITH_GPU) | ||
find_package(CUDA REQUIRED) | ||
add_definitions("-DPADDLE_WITH_CUDA") | ||
endif() | ||
|
||
if(NOT WITH_STATIC_LIB) | ||
add_definitions("-DPADDLE_WITH_SHARED_LIB") | ||
else() | ||
# PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. | ||
# Set it to empty in static library mode to avoid compilation issues. | ||
add_definitions("/DPD_INFER_DECL=") | ||
endif() | ||
|
||
macro(safe_set_static_flag) | ||
foreach(flag_var | ||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE | ||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) | ||
if(${flag_var} MATCHES "/MD") | ||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") | ||
endif(${flag_var} MATCHES "/MD") | ||
endforeach(flag_var) | ||
endmacro() | ||
|
||
if(NOT DEFINED PADDLE_LIB) | ||
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") | ||
endif() | ||
if(NOT DEFINED DEMO_NAME) | ||
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") | ||
endif() | ||
|
||
include_directories("${PADDLE_LIB}/") | ||
set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include") | ||
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include") | ||
|
||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") | ||
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") | ||
link_directories("${PADDLE_LIB}/paddle/lib") | ||
|
||
if (WIN32) | ||
add_definitions("/DGOOGLE_GLOG_DLL_DECL=") | ||
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) | ||
if (MSVC_STATIC_CRT) | ||
if (WITH_MKL) | ||
set(FLAG_OPENMP "/openmp") | ||
endif() | ||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") | ||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") | ||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") | ||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") | ||
safe_set_static_flag() | ||
if (WITH_STATIC_LIB) | ||
add_definitions(-DSTATIC_LIB) | ||
endif() | ||
endif() | ||
else() | ||
if(WITH_MKL) | ||
set(FLAG_OPENMP "-fopenmp") | ||
endif() | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") | ||
endif() | ||
|
||
if(WITH_GPU) | ||
if(NOT WIN32) | ||
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") | ||
else() | ||
if(CUDA_LIB STREQUAL "") | ||
set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") | ||
endif() | ||
endif(NOT WIN32) | ||
endif() | ||
|
||
if (USE_TENSORRT AND WITH_GPU) | ||
set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") | ||
if("${TENSORRT_ROOT}" STREQUAL "") | ||
message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") | ||
endif() | ||
set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) | ||
set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) | ||
endif() | ||
|
||
if (NOT WIN32) | ||
if (USE_TENSORRT AND WITH_GPU) | ||
include_directories("${TENSORRT_INCLUDE_DIR}") | ||
link_directories("${TENSORRT_LIB_DIR}") | ||
endif() | ||
endif(NOT WIN32) | ||
|
||
if(WITH_MKL) | ||
set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml") | ||
include_directories("${MATH_LIB_PATH}/include") | ||
if(WIN32) | ||
set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} | ||
${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") | ||
if(EXISTS ${MKLDNN_PATH}) | ||
include_directories("${MKLDNN_PATH}/include") | ||
if(WIN32) | ||
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) | ||
else(WIN32) | ||
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) | ||
endif(WIN32) | ||
endif() | ||
else() | ||
set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") | ||
include_directories("${OPENBLAS_LIB_PATH}/include/openblas") | ||
if(WIN32) | ||
set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
endif() | ||
|
||
if(WITH_STATIC_LIB) | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
if(WIN32) | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
endif() | ||
|
||
if (NOT WIN32) | ||
set(EXTERNAL_LIB "-lrt -ldl -lpthread") | ||
set(DEPS ${DEPS} | ||
${MATH_LIB} ${MKLDNN_LIB} | ||
glog gflags protobuf xxhash | ||
${EXTERNAL_LIB}) | ||
else() | ||
set(DEPS ${DEPS} | ||
${MATH_LIB} ${MKLDNN_LIB} | ||
glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) | ||
set(DEPS ${DEPS} shlwapi.lib) | ||
endif(NOT WIN32) | ||
|
||
if(WITH_GPU) | ||
if(NOT WIN32) | ||
if (USE_TENSORRT) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
else() | ||
if(USE_TENSORRT) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) | ||
endif() | ||
endif() | ||
|
||
cuda_add_library(pd_infer_custom_op ${CUSTOM_OPERATOR_FILES} SHARED) | ||
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) | ||
set(DEPS ${DEPS} boost pd_infer_custom_op) | ||
|
||
if(WIN32) | ||
if(USE_TENSORRT) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
) | ||
endif() | ||
if(WITH_MKL) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release | ||
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${CMAKE_BINARY_DIR}/Release | ||
) | ||
else() | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release | ||
) | ||
endif() | ||
if(NOT WITH_STATIC_LIB) | ||
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_fluid.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE} | ||
) | ||
endif() | ||
endif() | ||
|
||
target_link_libraries(${DEMO_NAME} ${DEPS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
## 自定义算子模型构建运行示例 | ||
|
||
### 一:获取本样例中的自定义算子模型 | ||
下载地址:https://paddle-inference-dist.bj.bcebos.com/inference_demo/custom_operator/custom_relu_infer_model.tgz | ||
|
||
### 二:**样例编译** | ||
|
||
文件 `custom_relu_op.cc`、`custom_relu_op.cu` 为自定义算子源文件,自定义算子编写方式请参考[飞桨官网文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)。 | ||
注意:自定义算子目前是试验功能,需要依赖 boost,并需要与飞桨预测库 `libpaddle_inference.so` 联合构建。 | ||
|
||
文件`custom_op_test.cc` 为预测的样例程序。 | ||
文件`CMakeLists.txt` 为编译构建文件。 | ||
脚本`run_impl.sh` 包含了第三方库、预编译库的信息配置。 | ||
|
||
我们首先需要对脚本`run_impl.sh` 文件中的配置进行修改。 | ||
|
||
1)**修改`run_impl.sh`** | ||
|
||
打开`run_impl.sh`,我们对以下的几处信息进行修改: | ||
|
||
```shell | ||
# 根据预编译库中的version.txt信息判断是否将以下三个标记打开 | ||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=OFF | ||
|
||
# 配置预测库的根目录 | ||
LIB_DIR=${YOUR_LIB_DIR}/paddle_inference_install_dir | ||
|
||
# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。 | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.5_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
# TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
``` | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
|
||
2) **运行样例** | ||
|
||
```shell | ||
# 进入build目录 | ||
cd build | ||
# 运行样例 | ||
./custom_op_test | ||
``` | ||
|
||
运行结束后,程序会将模型结果打印到屏幕,说明运行成功。 | ||
|
||
### 更多链接 | ||
- [Paddle Inference使用Quick Start!](https://paddle-inference.readthedocs.io/en/latest/introduction/quick_start.html) | ||
- [Paddle Inference C++ Api使用](https://paddle-inference.readthedocs.io/en/latest/api_reference/cxx_api_index.html) | ||
- [Paddle Inference Python Api使用](https://paddle-inference.readthedocs.io/en/latest/api_reference/python_api_index.html) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
include(ExternalProject) | ||
|
||
set(BOOST_PROJECT "extern_boost") | ||
# To release PaddlePaddle as a pip package, we have to follow the | ||
# manylinux1 standard, which features as old Linux kernels and | ||
# compilers as possible and recommends CentOS 5. Indeed, the earliest | ||
# CentOS version that works with NVIDIA CUDA is CentOS 6. And a new | ||
# version of boost, say, 1.66.0, doesn't build on CentOS 6. We | ||
# checked that the devtools package of CentOS 6 installs boost 1.41.0. | ||
# So we use 1.41.0 here. | ||
set(BOOST_VER "1.41.0") | ||
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE) | ||
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) | ||
|
||
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") | ||
|
||
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) | ||
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") | ||
|
||
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}" CACHE PATH "boost include directory." FORCE) | ||
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1) | ||
include_directories(${BOOST_INCLUDE_DIR}) | ||
|
||
ExternalProject_Add( | ||
${BOOST_PROJECT} | ||
${EXTERNAL_PROJECT_LOG_ARGS} | ||
DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR} | ||
URL ${BOOST_URL} | ||
DOWNLOAD_NO_PROGRESS 1 | ||
PREFIX ${BOOST_SOURCES_DIR} | ||
CONFIGURE_COMMAND "" | ||
BUILD_COMMAND "" | ||
INSTALL_COMMAND "" | ||
UPDATE_COMMAND "" | ||
) | ||
|
||
if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32) | ||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c) | ||
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";") | ||
add_library(boost STATIC ${dummyfile}) | ||
else() | ||
add_library(boost INTERFACE) | ||
endif() | ||
|
||
add_dependencies(boost ${BOOST_PROJECT}) | ||
set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include <numeric> | ||
#include <gflags/gflags.h> | ||
#include <glog/logging.h> | ||
|
||
#include "paddle/include/paddle_inference_api.h" | ||
|
||
using paddle_infer::Config; | ||
using paddle_infer::Predictor; | ||
using paddle_infer::CreatePredictor; | ||
|
||
void run(Predictor *predictor, const std::vector<float> &input, | ||
const std::vector<int> &input_shape, std::vector<float> *out_data) { | ||
auto input_names = predictor->GetInputNames(); | ||
auto input_t = predictor->GetInputHandle(input_names[0]); | ||
input_t->Reshape(input_shape); | ||
input_t->CopyFromCpu(input.data()); | ||
|
||
CHECK(predictor->Run()); | ||
|
||
auto output_names = predictor->GetOutputNames(); | ||
auto output_t = predictor->GetOutputHandle(output_names[0]); | ||
std::vector<int> output_shape = output_t->shape(); | ||
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, | ||
std::multiplies<int>()); | ||
|
||
out_data->resize(out_num); | ||
output_t->CopyToCpu(out_data->data()); | ||
} | ||
|
||
int main() { | ||
paddle::AnalysisConfig config; | ||
config.EnableUseGpu(100, 0); | ||
config.SetModel("custom_relu_infer_model/custom_relu.pdmodel", | ||
"custom_op_inference/custom_relu_infer_model/custom_relu.pdiparams"); | ||
auto predictor{paddle_infer::CreatePredictor(config)}; | ||
std::vector<int> input_shape = {1, 1, 28, 28}; | ||
std::vector<float> input_data(1 * 1 * 28 * 28, 1); | ||
std::vector<float> out_data; | ||
run(predictor.get(), input_data, input_shape, &out_data); | ||
for (auto e : out_data) { | ||
LOG(INFO) << e << '\n'; | ||
} | ||
return 0; | ||
} |
Oops, something went wrong.