Skip to content

Commit

Permalink
CopyFromCpu and CopyToCpu of Onnxruntime back-end optimize (PaddlePad…
Browse files Browse the repository at this point in the history
…dle#40561)

* add onnxruntime predictor

* Add code comments

* support link paddle2onnx onnxruntime

* support onnxruntime with python

* support onnxruntime with python

* support onnxruntime with windows

* paddle2onnx compile with windows

* supoort windows compile

* supoort windows compile with onnxruntime

* supoort windows compile with paddle2onnx

* supoort mac compile

* compile with mac

* compile with mac

* add code comments

* fix remind word

* code optimization

* add test case

* add test case

* add inference demo_ci test case

* fix compile paddle2onnx with no python

* add inference demo_ci test case

* add inference demo_ci test case

* add inference infer_ut test case

* support c go api and test cases

* add converage test case

* add converage test case

* add capi test case

* add capi test case

* fix onnxruntime copyfromcpu and copytocpu

* fix goapi

* modify code
  • Loading branch information
heliqi authored and liqitong-a committed Mar 17, 2022
1 parent 456e0bb commit 347d9f8
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 132 deletions.
1 change: 1 addition & 0 deletions cmake/external/paddle2onnx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ set(PADDLE2ONNX_OPTIONAL_ARGS
-DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
-DWITH_STATIC=OFF
-DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/inference/api/details/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
#

cc_library(reset_tensor_array SRCS reset_tensor_array.cc DEPS lod_tensor scope)
cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce)
if (WITH_ONNXRUNTIME)
cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce onnxruntime)
else (WITH_ONNXRUNTIME)
cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce)
endif (WITH_ONNXRUNTIME)
cc_library(zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc)

cc_test(zero_copy_tensor_test SRCS zero_copy_tensor_test.cc DEPS paddle_inference_api)
151 changes: 145 additions & 6 deletions paddle/fluid/inference/api/details/zero_copy_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,22 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/core/allocator.h"
#ifdef PADDLE_WITH_ONNXRUNTIME
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#endif

namespace paddle_infer {

using float16 = paddle::platform::float16;

void Tensor::Reshape(const std::vector<int> &shape) {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (is_ort_tensor_) {
shape_.assign(shape.begin(), shape.end());
return;
}
#endif

PADDLE_ENFORCE_EQ(
name_.empty(), false,
paddle::platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -123,6 +133,11 @@ T *Tensor::data(PlaceType *place, int *size) const {
}

DataType Tensor::type() const {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (is_ort_tensor_) {
return dtype_;
}
#endif
EAGER_GET_TENSOR(paddle::framework::LoDTensor);
auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
if (type == paddle::framework::proto::VarType::FP32) {
Expand All @@ -145,6 +160,13 @@ PlaceType Tensor::place() const { return place_; }

template <typename T>
void Tensor::CopyFromCpu(const T *data) {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (is_ort_tensor_) {
ORTCopyFromCpu<T>(data);
return;
}
#endif

EAGER_GET_TENSOR(paddle::framework::LoDTensor);
PADDLE_ENFORCE_GE(tensor->numel(), 0,
paddle::platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -382,6 +404,13 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,

template <typename T>
void Tensor::CopyToCpu(T *data) const {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (is_ort_tensor_) {
ORTCopyToCpu<T>(data);
return;
}
#endif

CopyToCpuImpl<T>(data, nullptr, nullptr, nullptr);
}

Expand Down Expand Up @@ -489,12 +518,7 @@ template PD_INFER_DECL uint8_t *Tensor::mutable_data<uint8_t>(PlaceType place);
template PD_INFER_DECL int8_t *Tensor::mutable_data<int8_t>(PlaceType place);
template PD_INFER_DECL float16 *Tensor::mutable_data<float16>(PlaceType place);

Tensor::Tensor(void *scope) : scope_{scope} {
PADDLE_ENFORCE_NOT_NULL(scope_,
paddle::platform::errors::PreconditionNotMet(
"The `scope` can not be nullptr. It should be "
"set to the pointer of scope."));
}
Tensor::Tensor(void *scope) : scope_{scope} {}

template <typename T>
void *Tensor::FindTensor() const {
Expand All @@ -513,6 +537,26 @@ void *Tensor::FindTensor() const {
}

std::vector<int> Tensor::shape() const {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (is_ort_tensor_) {
std::vector<int> shape;
// input handle
if (idx_ < 0) {
shape.assign(shape_.begin(), shape_.end());
} else { // output handle
auto binding = binding_.lock();
PADDLE_ENFORCE_NOT_NULL(binding,
paddle::platform::errors::PreconditionNotMet(
"output tensor [%s] no binding ptr", name_));
std::vector<Ort::Value> outputs = binding->GetOutputValues();
Ort::Value &value = outputs[idx_];
auto info = value.GetTensorTypeAndShapeInfo();
auto ort_shape = info.GetShape();
shape.assign(ort_shape.begin(), ort_shape.end());
}
return shape;
}
#endif
EAGER_GET_TENSOR(paddle::framework::LoDTensor);
PADDLE_ENFORCE_NOT_NULL(
tensor_, paddle::platform::errors::PreconditionNotMet(
Expand Down Expand Up @@ -573,4 +617,99 @@ void Tensor::SetPlace(PlaceType place, int device) {
device_ = device;
}

#ifdef PADDLE_WITH_ONNXRUNTIME
void Tensor::SetOrtMark(bool is_ort_tensor) { is_ort_tensor_ = is_ort_tensor; }

void Tensor::SetOrtBinding(const std::shared_ptr<Ort::IoBinding> binding) {
binding_ = binding;
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, float *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor<float>(memory_info, data, size, shape,
shape_len);
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int64_t *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor<int64_t>(memory_info, data, size, shape,
shape_len);
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int32_t *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor<int32_t>(memory_info, data, size, shape,
shape_len);
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, uint8_t *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor<uint8_t>(memory_info, data, size, shape,
shape_len);
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int8_t *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor<int8_t>(memory_info, data, size, shape,
shape_len);
}

Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, float16 *data,
size_t size, const int64_t *shape, size_t shape_len) {
return Ort::Value::CreateTensor(memory_info, static_cast<void *>(data),
size * sizeof(float16), shape, shape_len,
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
}

template <typename T>
void Tensor::ORTCopyFromCpu(const T *data) {
auto binding = binding_.lock();
PADDLE_ENFORCE_NOT_NULL(binding,
paddle::platform::errors::PreconditionNotMet(
"input tensor [%s] no binding ptr", name_));
const char *device_name = place_ == PlaceType::kCPU ? "Cpu" : "Cuda";
Ort::MemoryInfo memory_info(device_name, OrtDeviceAllocator, device_,
OrtMemTypeDefault);
size_t size = std::accumulate(begin(shape_), end(shape_), 1UL,
std::multiplies<size_t>());
auto ort_value = GetOrtVaule(memory_info, const_cast<T *>(data), size,
shape_.data(), shape_.size());
binding->BindInput(name_.c_str(), ort_value);
}

template <typename T>
void Tensor::ORTCopyToCpu(T *data) const {
auto binding = binding_.lock();
PADDLE_ENFORCE_NOT_NULL(binding,
paddle::platform::errors::PreconditionNotMet(
"output tensor [%s] no binding ptr", name_));
std::vector<Ort::Value> outputs = binding->GetOutputValues();
Ort::Value &value = outputs[idx_];
auto info = value.GetTensorTypeAndShapeInfo();
size_t size = info.GetElementCount() * sizeof(T);

if (place_ == PlaceType::kCPU) {
std::memcpy(static_cast<void *>(data), value.GetTensorData<void *>(), size);
} else {
paddle::memory::Copy(paddle::platform::CPUPlace(),
static_cast<void *>(data),
paddle::platform::CUDAPlace(device_),
value.GetTensorData<void>(), size, nullptr);
}
}

template void Tensor::ORTCopyFromCpu<float>(const float *data);
template void Tensor::ORTCopyFromCpu<int64_t>(const int64_t *data);
template void Tensor::ORTCopyFromCpu<int32_t>(const int32_t *data);
template void Tensor::ORTCopyFromCpu<uint8_t>(const uint8_t *data);
template void Tensor::ORTCopyFromCpu<int8_t>(const int8_t *data);
template void Tensor::ORTCopyFromCpu<float16>(const float16 *data);

template void Tensor::ORTCopyToCpu<float>(float *data) const;
template void Tensor::ORTCopyToCpu<int32_t>(int32_t *data) const;
template void Tensor::ORTCopyToCpu<uint8_t>(uint8_t *data) const;
template void Tensor::ORTCopyToCpu<int8_t>(int8_t *data) const;
template void Tensor::ORTCopyToCpu<float16>(float16 *data) const;
#endif

} // namespace paddle_infer
Loading

0 comments on commit 347d9f8

Please sign in to comment.