Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#165 from jiweibo/tuned_dynamic_shape
Browse files Browse the repository at this point in the history
Add tuned_dynamic_shape demo
  • Loading branch information
jiweibo authored Aug 31, 2021
2 parents 2aa854f + e2b32ef commit e2b5e26
Show file tree
Hide file tree
Showing 9 changed files with 1,651 additions and 0 deletions.
370 changes: 370 additions & 0 deletions c++/paddle-trt/tuned_dynamic_shape/README.md

Large diffs are not rendered by default.

149 changes: 149 additions & 0 deletions c++/paddle-trt/tuned_dynamic_shape/bert.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#include "paddle/include/paddle_inference_api.h"

#include <functional>
#include <gflags/gflags.h>
#include <glog/logging.h>

#include <chrono>
#include <iostream>
#include <numeric>
#include <thread>
#include <utility>
#include <vector>
#include <unordered_map>
#include <utility>

DEFINE_string(model_file, "", "Directory of the inference model.");
DEFINE_string(params_file, "", "Directory of the inference model.");
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_int32(max_batch_size, 1, "max batch size");
DEFINE_bool(use_gpu, true, "use gpu.");
DEFINE_bool(use_trt, true, "use trt.");
DEFINE_string(trt_precision, "trt_fp32", "trt_fp32, trt_fp16, etc.");
DEFINE_bool(serialize, false, "serialize");
DEFINE_bool(tuned_dynamic_shape, false, "use tuned dynamic shape");
DEFINE_bool(tune, false, "tune to get shape range.");
DEFINE_bool(allow_build_at_runtime, true, "allow rebuild trt engine at runtime");

using Predictor = paddle_infer::Predictor;
using Config = paddle_infer::Config;

const std::string shape_range_info = "shape_range_info.pbtxt";

paddle_infer::PrecisionType GetPrecisionType(const std::string& ptype) {
if (ptype == "trt_fp32")
return paddle_infer::PrecisionType::kFloat32;
if (ptype == "trt_fp16")
return paddle_infer::PrecisionType::kHalf;
return paddle_infer::PrecisionType::kFloat32;
}

std::vector<int> GetInputShape(const std::string& s, const std::string delimiter=":") {
std::vector<int> res;
size_t start = 0;
size_t end = s.find(delimiter);
while (end != std::string::npos) {
std::string val = s.substr(start, end - start);
res.push_back(std::stoi(val));
start = end + delimiter.length();
end = s.find(delimiter, start);
}
if (!s.substr(start, end).empty())
res.push_back(std::stoi(s.substr(start, end)));
return res;
}

void PrepareConfig(Config *config) {
if (FLAGS_model_dir != "") {
config->SetModel(FLAGS_model_dir);
} else {
config->SetModel(FLAGS_model_file, FLAGS_params_file);
}

if (FLAGS_use_gpu) {
config->EnableUseGpu(500, 0);
if (FLAGS_use_trt) {
config->EnableTensorRtEngine(1 << 30, FLAGS_max_batch_size, 3,
GetPrecisionType(FLAGS_trt_precision), FLAGS_serialize, false);
if (FLAGS_tuned_dynamic_shape) {
config->EnableTunedTensorRtDynamicShape(shape_range_info, FLAGS_allow_build_at_runtime);
}
}
}

if (FLAGS_tune) {
config->CollectShapeRangeInfo(shape_range_info);
}

LOG(INFO) << config->Summary();
}

void SingleThreadRun(std::shared_ptr<Predictor> predictor, const std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<int64_t>>>& input_info,
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>* output_info, int thread_id) {
auto in_names = predictor->GetInputNames();
for (auto& name : in_names) {
auto in_handle = predictor->GetInputHandle(name);
in_handle->Reshape(input_info.at(name).first);
in_handle->CopyFromCpu(input_info.at(name).second.data());
}

CHECK(predictor->Run());

output_info->clear();
auto out_names = predictor->GetOutputNames();
for (auto& name : out_names) {
auto out_handle = predictor->GetOutputHandle(name);
std::vector<int> shape = out_handle->shape();
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
std::vector<float> out_data;
if (out_handle->type() == paddle_infer::DataType::FLOAT32) {
std::vector<float> tmp_out_data(num);
out_handle->CopyToCpu(tmp_out_data.data());
out_data.insert(out_data.begin(), tmp_out_data.begin(), tmp_out_data.end());
} else if (out_handle->type() == paddle_infer::DataType::INT32) {
std::vector<int32_t> tmp_out_data(num);
out_handle->CopyToCpu(tmp_out_data.data());
out_data.insert(out_data.begin(), tmp_out_data.begin(), tmp_out_data.end());
} else {
LOG(FATAL) << "not supported type.";
}
output_info->insert(std::make_pair(name, std::make_pair(shape, out_data)));
}
VLOG(1) << thread_id << " run done.";
}

int main(int argc, char **argv) {
google::ParseCommandLineFlags(&argc, &argv, true);

Config config;
PrepareConfig(&config);

auto predictor = paddle_infer::CreatePredictor(config);
auto in_names = predictor->GetInputNames();
auto out_name = predictor->GetOutputNames()[0]; // "save_infer_model/scale_0.tmp_1"

std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<int64_t>>> input_infos;
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> output_infos;

std::vector<int32_t> features{32, 64, 128, 256};
for (size_t b = 1; b <= FLAGS_max_batch_size; b++) {
for (auto f : features) {
std::vector<int> shape{b, f};
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
std::vector<int64_t> in_data(num);
for (int i = 0; i < num; ++i) {
in_data[i] = i % 50006;
}
input_infos[in_names[0]] = std::make_pair(shape, in_data);

std::vector<int64_t> token(num);
for (int i = 0; i < num; ++i) {
token[i] = i % 2;
}
input_infos[in_names[1]] = std::make_pair(shape, token);
SingleThreadRun(predictor, input_infos, &output_infos, 0);
}
}

LOG(INFO) << "Run done";
}
167 changes: 167 additions & 0 deletions c++/paddle-trt/tuned_dynamic_shape/clas.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#include "paddle/include/paddle_inference_api.h"

#include <functional>
#include <gflags/gflags.h>
#include <glog/logging.h>

#include <chrono>
#include <iostream>
#include <numeric>
#include <string>
#include <thread>
#include <utility>
#include <vector>
#include <unordered_map>
#include <utility>

DEFINE_string(model_file, "", "Directory of the inference model.");
DEFINE_string(params_file, "", "Directory of the inference model.");
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_int32(max_batch_size, 1, "max batch size");
DEFINE_bool(use_gpu, true, "use gpu.");
DEFINE_bool(use_trt, true, "use trt.");
DEFINE_string(trt_precision, "trt_fp32", "trt_fp32, trt_fp16, etc.");
DEFINE_bool(serialize, false, "serialize");
DEFINE_bool(tuned_dynamic_shape, false, "use tuned dynamic shape");
DEFINE_bool(tune, false, "tune to get shape range.");
DEFINE_bool(allow_build_at_runtime, true, "allow rebuild trt engine at runtime");
DEFINE_string(hs, "224", "input heights, separeted by ':'");
DEFINE_string(ws, "224", "input widths, separeted by ':'");
DEFINE_string(no_seen_hs, "224", "no seen input heights, separeted by ':'");
DEFINE_string(no_seen_ws, "224", "no seen input widths, separeted by ':'");

using Predictor = paddle_infer::Predictor;
using Config = paddle_infer::Config;

const std::string shape_range_info = "shape_range_info.pbtxt";

paddle_infer::PrecisionType GetPrecisionType(const std::string& ptype) {
if (ptype == "trt_fp32")
return paddle_infer::PrecisionType::kFloat32;
if (ptype == "trt_fp16")
return paddle_infer::PrecisionType::kHalf;
return paddle_infer::PrecisionType::kFloat32;
}

std::vector<int> GetInputShape(const std::string& s, const std::string delimiter=":") {
std::vector<int> res;
size_t start = 0;
size_t end = s.find(delimiter);
while (end != std::string::npos) {
std::string val = s.substr(start, end - start);
res.push_back(std::stoi(val));
start = end + delimiter.length();
end = s.find(delimiter, start);
}
if (!s.substr(start, end).empty())
res.push_back(std::stoi(s.substr(start, end)));
return res;
}

void PrepareConfig(Config *config) {
if (FLAGS_model_dir != "") {
config->SetModel(FLAGS_model_dir);
} else {
config->SetModel(FLAGS_model_file, FLAGS_params_file);
}

if (FLAGS_use_gpu) {
config->EnableUseGpu(500, 0);
if (FLAGS_use_trt) {
config->EnableTensorRtEngine(1 << 30, FLAGS_max_batch_size, 3,
GetPrecisionType(FLAGS_trt_precision), FLAGS_serialize, false);
if (FLAGS_tuned_dynamic_shape) {
// config->Exp_DisableTensorRtOPs({"elementwise_add"});
config->EnableTunedTensorRtDynamicShape(shape_range_info, FLAGS_allow_build_at_runtime);
}
}
}

if (FLAGS_tune) {
config->CollectShapeRangeInfo(shape_range_info);
}

LOG(INFO) << config->Summary();
}

void SingleThreadRun(std::shared_ptr<Predictor> predictor, const std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>& input_info,
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>* output_info, int thread_id) {
auto in_names = predictor->GetInputNames();
for (auto& name : in_names) {
auto in_handle = predictor->GetInputHandle(name);
in_handle->Reshape(input_info.at(name).first);
in_handle->CopyFromCpu(input_info.at(name).second.data());
}

CHECK(predictor->Run());

output_info->clear();
auto out_names = predictor->GetOutputNames();
for (auto& name : out_names) {
auto out_handle = predictor->GetOutputHandle(name);
std::vector<int> shape = out_handle->shape();
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
std::vector<float> out_data(num);
out_handle->CopyToCpu(out_data.data());
output_info->insert(std::make_pair(name, std::make_pair(shape, out_data)));
}
VLOG(1) << thread_id << " run done.";
}

int main(int argc, char **argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::vector<int> hs = GetInputShape(FLAGS_hs);
std::vector<int> ws = GetInputShape(FLAGS_ws);
CHECK_EQ(hs.size(), ws.size()) << "The input height size and width size should be same";
std::vector<int> no_seen_hs = GetInputShape(FLAGS_no_seen_hs);
std::vector<int> no_seen_ws = GetInputShape(FLAGS_no_seen_ws);
CHECK_EQ(no_seen_hs.size(), no_seen_ws.size()) << "The input height size and width size should be same";

Config config;
PrepareConfig(&config);

auto predictor = paddle_infer::CreatePredictor(config);
auto in_name = predictor->GetInputNames()[0]; // "x"
auto out_name = predictor->GetOutputNames()[0]; // "save_infer_model/scale_0.tmp_1"

std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> input_infos;
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> output_infos;
constexpr int channel = 3;

for (size_t b = 1; b <= FLAGS_max_batch_size; b++) {
for (size_t i = 0; i < hs.size(); ++i) {
int h = hs[i];
int w = ws[i];
std::vector<int> shape{b, channel, h, w};
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
std::vector<float> in_data(num);
for (int i = 0; i < num; ++i) {
in_data[i] = i % 255 * 0.13f;
}
input_infos[in_name] = std::make_pair(shape, in_data);
SingleThreadRun(predictor, input_infos, &output_infos, 0);
LOG(INFO) << "Run input shape{" << b << ", " << channel << ", " << h << ", " << w << "} done.";
}
}

// if we support allow_build_at_runtime, test no seen shape and rebuild trt engine
if (!FLAGS_tune && FLAGS_allow_build_at_runtime) {
LOG(INFO) << "Test no seen shape and rebuild trt engine";
int b = FLAGS_max_batch_size;
for (size_t i = 0; i < no_seen_hs.size(); ++i) {
int h = no_seen_hs[i];
int w = no_seen_ws[i];
std::vector<int> shape{b, channel, h, w};
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
std::vector<float> in_data(num);
for (int i = 0; i < num; ++i) {
in_data[i] = i % 255 * 0.13f;
}
input_infos[in_name] = std::make_pair(shape, in_data);
SingleThreadRun(predictor, input_infos, &output_infos, 0);
LOG(INFO) << "Run input shape{" << b << ", " << channel << ", " << h << ", " << w << "} done.";
}
}

LOG(INFO) << "Run done";
}
48 changes: 48 additions & 0 deletions c++/paddle-trt/tuned_dynamic_shape/compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
set +x
set -e

work_path=$(dirname $(readlink -f $0))

# 1. check paddle_inference exists
if [ ! -d "${work_path}/../../lib/paddle_inference" ]; then
echo "Please download paddle_inference lib and move it in Paddle-Inference-Demo/lib"
exit 1
fi

# 2. check CMakeLists exists
if [ ! -f "${work_path}/CMakeLists.txt" ]; then
cp -a "${work_path}/../../lib/CMakeLists.txt" "${work_path}/"
fi

# 3. compile
mkdir -p build
cd build
rm -rf *

if [ ! -n "$1" ]; then
DEMO_NAME=clas
else
DEMO_NAME=$1
fi

WITH_MKL=ON
WITH_GPU=ON
USE_TENSORRT=ON

LIB_DIR=${work_path}/../../lib/paddle_inference
CUDNN_LIB=/usr/lib/x86_64-linux-gnu/
CUDA_LIB=/usr/local/cuda/lib64
TENSORRT_ROOT=/usr/local/TensorRT-7.2.3.4

cmake .. -DPADDLE_LIB=${LIB_DIR} \
-DWITH_MKL=${WITH_MKL} \
-DDEMO_NAME=${DEMO_NAME} \
-DWITH_GPU=${WITH_GPU} \
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=${USE_TENSORRT} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DCUDA_LIB=${CUDA_LIB} \
-DTENSORRT_ROOT=${TENSORRT_ROOT}

make -j
Loading

0 comments on commit e2b5e26

Please sign in to comment.