Merge pull request PaddlePaddle#52 from cryoco/trt-dynamic-shape-demo

add trt dynamic shape demo
zhoutianzi666 · Dec 10, 2020 · e867abe · e867abe
2 parents e6a611d + fe9489d
commit e867abe
Show file tree

Hide file tree

Showing 2 changed files with 110 additions and 0 deletions.
diff --git a/c++/paddle-trt/README.md b/c++/paddle-trt/README.md
@@ -203,3 +203,25 @@ cd build
 - [Paddle Inference使用Quick Start！](https://paddle-inference.readthedocs.io/en/latest/introduction/quick_start.html)
 - [Paddle Inference C++ Api使用](https://paddle-inference.readthedocs.io/en/latest/user_guides/cxx_api.html)
 - [Paddle Inference Python Api使用](https://paddle-inference.readthedocs.io/en/latest/user_guides/inference_python_api.html)
+
+### 四、使用TRT dynamic shape变长输入功能
+
+TRT的默认模式限制输入是定长的，即所有输入的shape大小必须相同，并与模型输入大小一致。如果需要输入不同尺寸的图片，需要设置变长输入模型。相关接口如下：
+
+```c++
+std::map<std::string, std::vector<int>> min_input_shape = {
+    {"image", {FLAGS_batch_size, 3, 112, 112}}};
+std::map<std::string, std::vector<int>> max_input_shape = {
+    {"image", {FLAGS_batch_size, 3, 448, 448}}};
+std::map<std::string, std::vector<int>> opt_input_shape = {
+    {"image", {FLAGS_batch_size, 3, 224, 224}}};
+config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                            opt_input_shape);
+```
+
+其中 `min_input_shape` 、`max_input_shape` 、`opt_input_shape` 都是string到int类型的map，表示输入变量的最小、最大和最优shape，最优的含义是指，TRT会以最优shape来进行优化，理论上，接近最优shape的输入图片性能最佳。
+
+上面几个map的key是指TRT子图的输入变量名，注意不一定与模型的输入名一致。若存在多个TRT子图，需要对每一个子图的所有输入变量配置最小、最大和最优shape。
+
+运行样例的方式与使用TRT FP32精度预测相同，运行脚本换为`trt_dynamic_shape_test`即可。
+
diff --git a/c++/paddle-trt/trt_dynamic_shape_test.cc b/c++/paddle-trt/trt_dynamic_shape_test.cc
@@ -0,0 +1,88 @@
+#include <chrono>
+#include <iostream>
+#include <memory>
+#include <numeric>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include "paddle/include/paddle_inference_api.h"
+
+using paddle_infer::Config;
+using paddle_infer::Predictor;
+using paddle_infer::CreatePredictor;
+using paddle_infer::PrecisionType;
+
+DEFINE_string(model_file, "", "Path of the inference model file.");
+DEFINE_string(params_file, "", "Path of the inference params file.");
+DEFINE_string(model_dir, "", "Directory of the inference model.");
+DEFINE_int32(batch_size, 1, "Batch size.");
+
+using Time = decltype(std::chrono::high_resolution_clock::now());
+Time time() { return std::chrono::high_resolution_clock::now(); };
+double time_diff(Time t1, Time t2) {
+  typedef std::chrono::microseconds ms;
+  auto diff = t2 - t1;
+  ms counter = std::chrono::duration_cast<ms>(diff);
+  return counter.count() / 1000.0;
+}
+
+std::shared_ptr<Predictor> InitPredictor() {
+  Config config;
+  if (FLAGS_model_dir != "") {
+    config.SetModel(FLAGS_model_dir);
+  } else {
+    config.SetModel(FLAGS_model_file, FLAGS_params_file);
+  }
+  config.EnableUseGpu(500, 0);
+  config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5,
+                              PrecisionType::kFloat32, false, false);
+  std::map<std::string, std::vector<int>> min_input_shape = {
+      {"image", {FLAGS_batch_size, 3, 112, 112}}};
+  std::map<std::string, std::vector<int>> max_input_shape = {
+      {"image", {FLAGS_batch_size, 3, 448, 448}}};
+  std::map<std::string, std::vector<int>> opt_input_shape = {
+      {"image", {FLAGS_batch_size, 3, 224, 224}}};
+  config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                                opt_input_shape);
+  return CreatePredictor(config);
+}
+
+void run(Predictor *predictor, const std::vector<float> &input,
+         const std::vector<int> &input_shape, std::vector<float> *out_data) {
+  int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1,
+                                  std::multiplies<int>());
+
+  auto input_names = predictor->GetInputNames();
+  auto input_t = predictor->GetInputHandle(input_names[0]);
+  input_t->Reshape(input_shape);
+  input_t->CopyFromCpu(input.data());
+
+  CHECK(predictor->Run());
+
+  auto output_names = predictor->GetOutputNames();
+  // there is only one output of Resnet50
+  auto output_t = predictor->GetOutputHandle(output_names[0]);
+  std::vector<int> output_shape = output_t->shape();
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+
+  out_data->resize(out_num);
+  output_t->CopyToCpu(out_data->data());
+}
+
+int main(int argc, char *argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto predictor = InitPredictor();
+  std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
+  // Init input as 1.0 here for example. You can also load preprocessed real
+  // pictures to vectors as input.
+  std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
+  std::vector<float> out_data;
+  run(predictor.get(), input_data, input_shape, &out_data);
+  // Print first 20 outputs
+  for (int i = 0; i < 20; i++) {
+    LOG(INFO) << out_data[i] << std::endl;
+  }
+  return 0;
+}