forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#165 from jiweibo/tuned_dynamic_shape
Add tuned_dynamic_shape demo
- Loading branch information
Showing
9 changed files
with
1,651 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#include "paddle/include/paddle_inference_api.h" | ||
|
||
#include <functional> | ||
#include <gflags/gflags.h> | ||
#include <glog/logging.h> | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <numeric> | ||
#include <thread> | ||
#include <utility> | ||
#include <vector> | ||
#include <unordered_map> | ||
#include <utility> | ||
|
||
DEFINE_string(model_file, "", "Directory of the inference model."); | ||
DEFINE_string(params_file, "", "Directory of the inference model."); | ||
DEFINE_string(model_dir, "", "Directory of the inference model."); | ||
DEFINE_int32(max_batch_size, 1, "max batch size"); | ||
DEFINE_bool(use_gpu, true, "use gpu."); | ||
DEFINE_bool(use_trt, true, "use trt."); | ||
DEFINE_string(trt_precision, "trt_fp32", "trt_fp32, trt_fp16, etc."); | ||
DEFINE_bool(serialize, false, "serialize"); | ||
DEFINE_bool(tuned_dynamic_shape, false, "use tuned dynamic shape"); | ||
DEFINE_bool(tune, false, "tune to get shape range."); | ||
DEFINE_bool(allow_build_at_runtime, true, "allow rebuild trt engine at runtime"); | ||
|
||
using Predictor = paddle_infer::Predictor; | ||
using Config = paddle_infer::Config; | ||
|
||
const std::string shape_range_info = "shape_range_info.pbtxt"; | ||
|
||
paddle_infer::PrecisionType GetPrecisionType(const std::string& ptype) { | ||
if (ptype == "trt_fp32") | ||
return paddle_infer::PrecisionType::kFloat32; | ||
if (ptype == "trt_fp16") | ||
return paddle_infer::PrecisionType::kHalf; | ||
return paddle_infer::PrecisionType::kFloat32; | ||
} | ||
|
||
std::vector<int> GetInputShape(const std::string& s, const std::string delimiter=":") { | ||
std::vector<int> res; | ||
size_t start = 0; | ||
size_t end = s.find(delimiter); | ||
while (end != std::string::npos) { | ||
std::string val = s.substr(start, end - start); | ||
res.push_back(std::stoi(val)); | ||
start = end + delimiter.length(); | ||
end = s.find(delimiter, start); | ||
} | ||
if (!s.substr(start, end).empty()) | ||
res.push_back(std::stoi(s.substr(start, end))); | ||
return res; | ||
} | ||
|
||
void PrepareConfig(Config *config) { | ||
if (FLAGS_model_dir != "") { | ||
config->SetModel(FLAGS_model_dir); | ||
} else { | ||
config->SetModel(FLAGS_model_file, FLAGS_params_file); | ||
} | ||
|
||
if (FLAGS_use_gpu) { | ||
config->EnableUseGpu(500, 0); | ||
if (FLAGS_use_trt) { | ||
config->EnableTensorRtEngine(1 << 30, FLAGS_max_batch_size, 3, | ||
GetPrecisionType(FLAGS_trt_precision), FLAGS_serialize, false); | ||
if (FLAGS_tuned_dynamic_shape) { | ||
config->EnableTunedTensorRtDynamicShape(shape_range_info, FLAGS_allow_build_at_runtime); | ||
} | ||
} | ||
} | ||
|
||
if (FLAGS_tune) { | ||
config->CollectShapeRangeInfo(shape_range_info); | ||
} | ||
|
||
LOG(INFO) << config->Summary(); | ||
} | ||
|
||
void SingleThreadRun(std::shared_ptr<Predictor> predictor, const std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<int64_t>>>& input_info, | ||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>* output_info, int thread_id) { | ||
auto in_names = predictor->GetInputNames(); | ||
for (auto& name : in_names) { | ||
auto in_handle = predictor->GetInputHandle(name); | ||
in_handle->Reshape(input_info.at(name).first); | ||
in_handle->CopyFromCpu(input_info.at(name).second.data()); | ||
} | ||
|
||
CHECK(predictor->Run()); | ||
|
||
output_info->clear(); | ||
auto out_names = predictor->GetOutputNames(); | ||
for (auto& name : out_names) { | ||
auto out_handle = predictor->GetOutputHandle(name); | ||
std::vector<int> shape = out_handle->shape(); | ||
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); | ||
std::vector<float> out_data; | ||
if (out_handle->type() == paddle_infer::DataType::FLOAT32) { | ||
std::vector<float> tmp_out_data(num); | ||
out_handle->CopyToCpu(tmp_out_data.data()); | ||
out_data.insert(out_data.begin(), tmp_out_data.begin(), tmp_out_data.end()); | ||
} else if (out_handle->type() == paddle_infer::DataType::INT32) { | ||
std::vector<int32_t> tmp_out_data(num); | ||
out_handle->CopyToCpu(tmp_out_data.data()); | ||
out_data.insert(out_data.begin(), tmp_out_data.begin(), tmp_out_data.end()); | ||
} else { | ||
LOG(FATAL) << "not supported type."; | ||
} | ||
output_info->insert(std::make_pair(name, std::make_pair(shape, out_data))); | ||
} | ||
VLOG(1) << thread_id << " run done."; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
google::ParseCommandLineFlags(&argc, &argv, true); | ||
|
||
Config config; | ||
PrepareConfig(&config); | ||
|
||
auto predictor = paddle_infer::CreatePredictor(config); | ||
auto in_names = predictor->GetInputNames(); | ||
auto out_name = predictor->GetOutputNames()[0]; // "save_infer_model/scale_0.tmp_1" | ||
|
||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<int64_t>>> input_infos; | ||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> output_infos; | ||
|
||
std::vector<int32_t> features{32, 64, 128, 256}; | ||
for (size_t b = 1; b <= FLAGS_max_batch_size; b++) { | ||
for (auto f : features) { | ||
std::vector<int> shape{b, f}; | ||
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); | ||
std::vector<int64_t> in_data(num); | ||
for (int i = 0; i < num; ++i) { | ||
in_data[i] = i % 50006; | ||
} | ||
input_infos[in_names[0]] = std::make_pair(shape, in_data); | ||
|
||
std::vector<int64_t> token(num); | ||
for (int i = 0; i < num; ++i) { | ||
token[i] = i % 2; | ||
} | ||
input_infos[in_names[1]] = std::make_pair(shape, token); | ||
SingleThreadRun(predictor, input_infos, &output_infos, 0); | ||
} | ||
} | ||
|
||
LOG(INFO) << "Run done"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
#include "paddle/include/paddle_inference_api.h" | ||
|
||
#include <functional> | ||
#include <gflags/gflags.h> | ||
#include <glog/logging.h> | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <numeric> | ||
#include <string> | ||
#include <thread> | ||
#include <utility> | ||
#include <vector> | ||
#include <unordered_map> | ||
#include <utility> | ||
|
||
DEFINE_string(model_file, "", "Directory of the inference model."); | ||
DEFINE_string(params_file, "", "Directory of the inference model."); | ||
DEFINE_string(model_dir, "", "Directory of the inference model."); | ||
DEFINE_int32(max_batch_size, 1, "max batch size"); | ||
DEFINE_bool(use_gpu, true, "use gpu."); | ||
DEFINE_bool(use_trt, true, "use trt."); | ||
DEFINE_string(trt_precision, "trt_fp32", "trt_fp32, trt_fp16, etc."); | ||
DEFINE_bool(serialize, false, "serialize"); | ||
DEFINE_bool(tuned_dynamic_shape, false, "use tuned dynamic shape"); | ||
DEFINE_bool(tune, false, "tune to get shape range."); | ||
DEFINE_bool(allow_build_at_runtime, true, "allow rebuild trt engine at runtime"); | ||
DEFINE_string(hs, "224", "input heights, separeted by ':'"); | ||
DEFINE_string(ws, "224", "input widths, separeted by ':'"); | ||
DEFINE_string(no_seen_hs, "224", "no seen input heights, separeted by ':'"); | ||
DEFINE_string(no_seen_ws, "224", "no seen input widths, separeted by ':'"); | ||
|
||
using Predictor = paddle_infer::Predictor; | ||
using Config = paddle_infer::Config; | ||
|
||
const std::string shape_range_info = "shape_range_info.pbtxt"; | ||
|
||
paddle_infer::PrecisionType GetPrecisionType(const std::string& ptype) { | ||
if (ptype == "trt_fp32") | ||
return paddle_infer::PrecisionType::kFloat32; | ||
if (ptype == "trt_fp16") | ||
return paddle_infer::PrecisionType::kHalf; | ||
return paddle_infer::PrecisionType::kFloat32; | ||
} | ||
|
||
std::vector<int> GetInputShape(const std::string& s, const std::string delimiter=":") { | ||
std::vector<int> res; | ||
size_t start = 0; | ||
size_t end = s.find(delimiter); | ||
while (end != std::string::npos) { | ||
std::string val = s.substr(start, end - start); | ||
res.push_back(std::stoi(val)); | ||
start = end + delimiter.length(); | ||
end = s.find(delimiter, start); | ||
} | ||
if (!s.substr(start, end).empty()) | ||
res.push_back(std::stoi(s.substr(start, end))); | ||
return res; | ||
} | ||
|
||
void PrepareConfig(Config *config) { | ||
if (FLAGS_model_dir != "") { | ||
config->SetModel(FLAGS_model_dir); | ||
} else { | ||
config->SetModel(FLAGS_model_file, FLAGS_params_file); | ||
} | ||
|
||
if (FLAGS_use_gpu) { | ||
config->EnableUseGpu(500, 0); | ||
if (FLAGS_use_trt) { | ||
config->EnableTensorRtEngine(1 << 30, FLAGS_max_batch_size, 3, | ||
GetPrecisionType(FLAGS_trt_precision), FLAGS_serialize, false); | ||
if (FLAGS_tuned_dynamic_shape) { | ||
// config->Exp_DisableTensorRtOPs({"elementwise_add"}); | ||
config->EnableTunedTensorRtDynamicShape(shape_range_info, FLAGS_allow_build_at_runtime); | ||
} | ||
} | ||
} | ||
|
||
if (FLAGS_tune) { | ||
config->CollectShapeRangeInfo(shape_range_info); | ||
} | ||
|
||
LOG(INFO) << config->Summary(); | ||
} | ||
|
||
void SingleThreadRun(std::shared_ptr<Predictor> predictor, const std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>& input_info, | ||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>>* output_info, int thread_id) { | ||
auto in_names = predictor->GetInputNames(); | ||
for (auto& name : in_names) { | ||
auto in_handle = predictor->GetInputHandle(name); | ||
in_handle->Reshape(input_info.at(name).first); | ||
in_handle->CopyFromCpu(input_info.at(name).second.data()); | ||
} | ||
|
||
CHECK(predictor->Run()); | ||
|
||
output_info->clear(); | ||
auto out_names = predictor->GetOutputNames(); | ||
for (auto& name : out_names) { | ||
auto out_handle = predictor->GetOutputHandle(name); | ||
std::vector<int> shape = out_handle->shape(); | ||
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); | ||
std::vector<float> out_data(num); | ||
out_handle->CopyToCpu(out_data.data()); | ||
output_info->insert(std::make_pair(name, std::make_pair(shape, out_data))); | ||
} | ||
VLOG(1) << thread_id << " run done."; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
google::ParseCommandLineFlags(&argc, &argv, true); | ||
std::vector<int> hs = GetInputShape(FLAGS_hs); | ||
std::vector<int> ws = GetInputShape(FLAGS_ws); | ||
CHECK_EQ(hs.size(), ws.size()) << "The input height size and width size should be same"; | ||
std::vector<int> no_seen_hs = GetInputShape(FLAGS_no_seen_hs); | ||
std::vector<int> no_seen_ws = GetInputShape(FLAGS_no_seen_ws); | ||
CHECK_EQ(no_seen_hs.size(), no_seen_ws.size()) << "The input height size and width size should be same"; | ||
|
||
Config config; | ||
PrepareConfig(&config); | ||
|
||
auto predictor = paddle_infer::CreatePredictor(config); | ||
auto in_name = predictor->GetInputNames()[0]; // "x" | ||
auto out_name = predictor->GetOutputNames()[0]; // "save_infer_model/scale_0.tmp_1" | ||
|
||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> input_infos; | ||
std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<float>>> output_infos; | ||
constexpr int channel = 3; | ||
|
||
for (size_t b = 1; b <= FLAGS_max_batch_size; b++) { | ||
for (size_t i = 0; i < hs.size(); ++i) { | ||
int h = hs[i]; | ||
int w = ws[i]; | ||
std::vector<int> shape{b, channel, h, w}; | ||
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); | ||
std::vector<float> in_data(num); | ||
for (int i = 0; i < num; ++i) { | ||
in_data[i] = i % 255 * 0.13f; | ||
} | ||
input_infos[in_name] = std::make_pair(shape, in_data); | ||
SingleThreadRun(predictor, input_infos, &output_infos, 0); | ||
LOG(INFO) << "Run input shape{" << b << ", " << channel << ", " << h << ", " << w << "} done."; | ||
} | ||
} | ||
|
||
// if we support allow_build_at_runtime, test no seen shape and rebuild trt engine | ||
if (!FLAGS_tune && FLAGS_allow_build_at_runtime) { | ||
LOG(INFO) << "Test no seen shape and rebuild trt engine"; | ||
int b = FLAGS_max_batch_size; | ||
for (size_t i = 0; i < no_seen_hs.size(); ++i) { | ||
int h = no_seen_hs[i]; | ||
int w = no_seen_ws[i]; | ||
std::vector<int> shape{b, channel, h, w}; | ||
int num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); | ||
std::vector<float> in_data(num); | ||
for (int i = 0; i < num; ++i) { | ||
in_data[i] = i % 255 * 0.13f; | ||
} | ||
input_infos[in_name] = std::make_pair(shape, in_data); | ||
SingleThreadRun(predictor, input_infos, &output_infos, 0); | ||
LOG(INFO) << "Run input shape{" << b << ", " << channel << ", " << h << ", " << w << "} done."; | ||
} | ||
} | ||
|
||
LOG(INFO) << "Run done"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
set +x | ||
set -e | ||
|
||
work_path=$(dirname $(readlink -f $0)) | ||
|
||
# 1. check paddle_inference exists | ||
if [ ! -d "${work_path}/../../lib/paddle_inference" ]; then | ||
echo "Please download paddle_inference lib and move it in Paddle-Inference-Demo/lib" | ||
exit 1 | ||
fi | ||
|
||
# 2. check CMakeLists exists | ||
if [ ! -f "${work_path}/CMakeLists.txt" ]; then | ||
cp -a "${work_path}/../../lib/CMakeLists.txt" "${work_path}/" | ||
fi | ||
|
||
# 3. compile | ||
mkdir -p build | ||
cd build | ||
rm -rf * | ||
|
||
if [ ! -n "$1" ]; then | ||
DEMO_NAME=clas | ||
else | ||
DEMO_NAME=$1 | ||
fi | ||
|
||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
LIB_DIR=${work_path}/../../lib/paddle_inference | ||
CUDNN_LIB=/usr/lib/x86_64-linux-gnu/ | ||
CUDA_LIB=/usr/local/cuda/lib64 | ||
TENSORRT_ROOT=/usr/local/TensorRT-7.2.3.4 | ||
|
||
cmake .. -DPADDLE_LIB=${LIB_DIR} \ | ||
-DWITH_MKL=${WITH_MKL} \ | ||
-DDEMO_NAME=${DEMO_NAME} \ | ||
-DWITH_GPU=${WITH_GPU} \ | ||
-DWITH_STATIC_LIB=OFF \ | ||
-DUSE_TENSORRT=${USE_TENSORRT} \ | ||
-DCUDNN_LIB=${CUDNN_LIB} \ | ||
-DCUDA_LIB=${CUDA_LIB} \ | ||
-DTENSORRT_ROOT=${TENSORRT_ROOT} | ||
|
||
make -j |
Oops, something went wrong.