Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add mkl multi-thread test cases in PR-CI-INFERENCE #34946

Merged
merged 3 commits into from
Aug 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ for model_name in $clas_download_list; do
download $url_prefix $model_name
done

nlp_download_list='ernie_text_cls'
for model_name in $nlp_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/nlp"
download $url_prefix $model_name
done

# compile and run test
cd $current_dir
mkdir -p build
Expand Down Expand Up @@ -144,6 +150,26 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
fi
fi

# ---------gpu ernie_text_cls on linux---------
if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=test_ernie_text_cls \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_GTEST=ON
make -j$(nproc)
./test_ernie_text_cls \
--modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
--gtest_output=xml:test_ernie_text_cls.xml
if [ $? -ne 0 ]; then
echo "test_ernie_text_cls runs failed" >> ${current_dir}/build/test_summary.txt
EXIT_CODE=1
fi
fi

if [[ -f ${current_dir}/build/test_summary.txt ]];then
echo "=====================test summary======================"
cat ${current_dir}/build/test_summary.txt
Expand Down
46 changes: 45 additions & 1 deletion paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
FLAGS_modeldir + "/inference.pdiparams");
config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine(
1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, true, false);
PrepareDynamicShape(&config, 4);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
Expand All @@ -149,6 +149,50 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
std::cout << "finish multi-thread test" << std::endl;
}

TEST(test_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
int thread_num = 2; // thread > 2 may OOM
// init input data
std::map<std::string, paddle::test::Record> my_input_data_map;
my_input_data_map["x"] = PrepareInput(2, 640);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config_no_ir.SwitchIrOptim(false);
// prepare inference config
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config.EnableMKLDNN();
config.SetMkldnnCacheCapacity(10);
config.SetCpuMathLibraryNumThreads(10);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);

// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool(config, thread_num);
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool.Retrive(i), &my_input_data_map,
&infer_output_data, 2);
}

// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
threads[i].join();
CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
}

std::cout << "finish multi-thread test" << std::endl;
}

} // namespace paddle_infer

int main(int argc, char** argv) {
Expand Down
137 changes: 137 additions & 0 deletions paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "test_suite.h" // NOLINT

DEFINE_string(modeldir, "", "Directory of the inference model.");

namespace paddle_infer {

template <typename T>
T cRandom(int min, int max) {
unsigned int seed = 100;
return (min +
static_cast<T>(max * rand_r(&seed) / static_cast<T>(RAND_MAX + 1)));
}

std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
// init input data
int digit_length = 115;
paddle::test::Record input_ids, segment_ids;
int input_num = batch_size * digit_length;
std::vector<int64_t> input_data(input_num, 1);
std::vector<int64_t> segment_data(input_num, 0);
srand((unsigned)time(NULL));
for (int x = 0; x < input_data.size(); x++) {
input_data[x] = cRandom<int>(1, 100);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This cRandom method seems to always return 1 https://ideone.com/Al9qCY

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got a build error on CI so I looked at this
image

}
input_ids.data = std::vector<float>(input_data.begin(), input_data.end());
input_ids.shape = std::vector<int>{batch_size, digit_length};
input_ids.type = paddle::PaddleDType::INT64;

segment_ids.data =
std::vector<float>(segment_data.begin(), segment_data.end());
segment_ids.shape = std::vector<int>{batch_size, digit_length};
segment_ids.type = paddle::PaddleDType::INT64;

std::map<std::string, paddle::test::Record> my_input_data_map;
my_input_data_map.insert({"input_ids", input_ids});
my_input_data_map.insert({"token_type_ids", segment_ids});

return my_input_data_map;
}

TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
// init input data
auto my_input_data_map = PrepareInput(2);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

以后可以把每个测试case的config.Summary()打印出来,develop和今后的2.2支持

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

收到,下个PR就把 config summary打印出来

config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config_no_ir.SwitchIrOptim(false);

// prepare inference config from buffer
std::string prog_file = FLAGS_modeldir + "/inference.pdmodel";
std::string params_file = FLAGS_modeldir + "/inference.pdiparams";
std::string prog_str = paddle::test::read_file(prog_file);
std::string params_str = paddle::test::read_file(params_file);
config.SetModelBuffer(prog_str.c_str(), prog_str.size(), params_str.c_str(),
params_str.size());
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);
// get infer results
paddle_infer::services::PredictorPool pred_pool(config, 1);
SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
&infer_output_data);
// check outputs
CompareRecord(&truth_output_data, &infer_output_data);
std::cout << "finish test" << std::endl;
}

TEST(test_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
int thread_num = 4;
// init input data
auto my_input_data_map = PrepareInput(2);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config_no_ir.SwitchIrOptim(false);
// prepare inference config
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config.EnableMKLDNN();
config.SetMkldnnCacheCapacity(10);
config.SetCpuMathLibraryNumThreads(10);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);

// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool(config, thread_num);
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool.Retrive(i), &my_input_data_map,
&infer_output_data, 2);
}

// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
threads[i].join();
CompareRecord(&truth_output_data, &infer_output_data);
}

std::cout << "finish multi-thread test" << std::endl;
}

} // namespace paddle_infer

int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
41 changes: 35 additions & 6 deletions paddle/fluid/inference/tests/infer_ut/test_suite.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,50 @@ class Record {
paddle::PaddleDType type;
};

std::string read_file(std::string filename) {
std::ifstream file(filename);
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}

void SingleThreadPrediction(paddle_infer::Predictor *predictor,
std::map<std::string, Record> *input_data_map,
std::map<std::string, Record> *output_data_map,
int repeat_times = 2) {
// prepare input tensor
auto input_names = predictor->GetInputNames();
for (const auto & [ key, value ] : *input_data_map) {
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(value.data.data());
switch (value.type) {
case paddle::PaddleDType::INT64: {
std::vector<int64_t> input_value =
std::vector<int64_t>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
case paddle::PaddleDType::INT32: {
std::vector<int32_t> input_value =
std::vector<int32_t>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
case paddle::PaddleDType::FLOAT32: {
std::vector<float> input_value =
std::vector<float>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
}
}

// inference
for (size_t i = 0; i < repeat_times; ++i) {
CHECK(predictor->Run());
ASSERT_TRUE(predictor->Run());
}

// get output data to Record
Expand Down Expand Up @@ -112,8 +141,8 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
size_t numel = value.data.size() / sizeof(float);
EXPECT_EQ(value.data.size(), truth_record.data.size());
for (size_t i = 0; i < numel; ++i) {
CHECK_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
epislon);
ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
epislon);
}
}
}
Expand Down