Skip to content

Commit

Permalink
fix FP32
Browse files Browse the repository at this point in the history
  • Loading branch information
Wish committed Sep 14, 2021
1 parent 7ff2f71 commit 5de09da
Show file tree
Hide file tree
Showing 11 changed files with 20 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/application/app_alphapose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ int app_alphapose(){
return 0;

string onnx_file = iLogger::format("%s.onnx", name);
string model_file = iLogger::format("%s.fp32.trtmodel", name);
string model_file = iLogger::format("%s.FP32.trtmodel", name);
int test_batch_size = 16;

if(!iLogger::exists(model_file)){
Expand Down
8 changes: 4 additions & 4 deletions src/application/app_arcface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static bool compile_models(){
return false;

string onnx_file = iLogger::format("%s.onnx", name);
string model_file = iLogger::format("%s.fp32.trtmodel", name);
string model_file = iLogger::format("%s.FP32.trtmodel", name);
int test_batch_size = 1;

if(not iLogger::exists(model_file)){
Expand Down Expand Up @@ -116,7 +116,7 @@ int app_arcface(){

auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.5f);
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
auto library = build_library(detector, arcface);

auto files = iLogger::find_files("face/recognize");
Expand Down Expand Up @@ -180,7 +180,7 @@ int app_arcface_video(){

auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.5f);
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
auto library = build_library(detector, arcface);
//auto remote_show = create_zmq_remote_show();
INFO("Use tools/show.py to remote show");
Expand Down Expand Up @@ -277,7 +277,7 @@ int app_arcface_tracker(){

auto detector = Scrfd::create_infer("scrfd_2.5g_bnkps.640x480.FP32.trtmodel", 0, 0.6f);
//auto detector = RetinaFace::create_infer("mb_retinaface.640x480.FP32.trtmodel", 0, 0.6f);
auto arcface = Arcface::create_infer("arcface_iresnet50.fp32.trtmodel", 0);
auto arcface = Arcface::create_infer("arcface_iresnet50.FP32.trtmodel", 0);
//auto library = build_library(detector, arcface);

//tools/show.py connect to remote show
Expand Down
2 changes: 1 addition & 1 deletion src/application/app_arcface/arcface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ namespace Arcface{
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
// speed up
memcpy(image_host, image.data, size_image);
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));

CUDAKernel::warp_affine_bilinear_and_normalize(
Expand Down
10 changes: 5 additions & 5 deletions src/application/app_fall_recognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static bool compile_models(){
return false;

string onnx_file = iLogger::format("%s.onnx", name);
string model_file = iLogger::format("%s.fp32.trtmodel", name);
string model_file = iLogger::format("%s.FP32.trtmodel", name);
int test_batch_size = 1;

if(not iLogger::exists(model_file)){
Expand All @@ -46,13 +46,13 @@ static bool compile_models(){
int app_fall_recognize(){
cv::setNumThreads(0);

INFO("===================== test alphapose fp32 ==================================");
INFO("===================== test alphapose FP32 ==================================");
if(!compile_models())
return 0;

auto pose_model_file = "sppe.fp32.trtmodel";
auto detector_model_file = "yolox_m.fp32.trtmodel";
auto gcn_model_file = "fall_bp.fp32.trtmodel";
auto pose_model_file = "sppe.FP32.trtmodel";
auto detector_model_file = "yolox_m.FP32.trtmodel";
auto gcn_model_file = "fall_bp.FP32.trtmodel";

auto pose_model = AlphaPose::create_infer(pose_model_file, 0);
auto detector_model = Yolo::create_infer(detector_model_file, Yolo::Type::X, 0, 0.4f);
Expand Down
5 changes: 3 additions & 2 deletions src/application/app_high_performance/yolo_high_perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,10 @@ namespace YoloHighPerf{
float* affine_matrix_host = (float*)cpu_workspace;
uint8_t* image_host = size_matrix + cpu_workspace;

checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
memcpy(image_host, image.data, size_image);
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));

CUDAKernel::warp_affine_bilinear_and_normalize(
Expand Down
2 changes: 1 addition & 1 deletion src/application/app_retinaface/retinaface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ namespace RetinaFace{
// checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
// speed up
memcpy(image_host, image.data, size_image);
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));

CUDAKernel::warp_affine_bilinear_and_normalize(
Expand Down
2 changes: 1 addition & 1 deletion src/application/app_scrfd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ static void scrfd_performance(shared_ptr<Scrfd::Infer> infer){
int app_scrfd(){

TRT::set_device(0);
INFO("===================== test scrfd fp32 ==================================");
INFO("===================== test scrfd FP32 ==================================");

string model_file;
if(!compile_scrfd(640, 640, model_file))
Expand Down
2 changes: 1 addition & 1 deletion src/application/app_scrfd/scrfd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ namespace Scrfd{
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
// speed up
memcpy(image_host, image.data, size_image);
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));

CUDAKernel::warp_affine_bilinear_and_normalize(
Expand Down
2 changes: 1 addition & 1 deletion src/application/app_yolo/yolo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ namespace Yolo{
//checkCudaRuntime(cudaMemcpyAsync(image_host, image.data, size_image, cudaMemcpyHostToHost, stream_));
// speed up
memcpy(image_host, image.data, size_image);
memcpy(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i));
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_host, job.additional.d2i, sizeof(job.additional.d2i), cudaMemcpyHostToHost, stream_));
checkCudaRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(job.additional.d2i), cudaMemcpyHostToDevice, stream_));

CUDAKernel::warp_affine_bilinear_and_normalize(
Expand Down
1 change: 0 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ void test_all(){
app_arcface_video();
app_arcface_tracker();
app_scrfd();
app_plugin();
INFO("test done.");
}

Expand Down
3 changes: 2 additions & 1 deletion src/tensorRT/common/trt_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ namespace TRT{
if(head_ == DataHead::Device){
checkCudaRuntime(cudaMemcpyAsync((char*)data_->gpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToDevice, stream_));
}else if(head_ == DataHead::Host){
checkCudaRuntime(cudaMemcpyAsync((char*)data_->cpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToHost, stream_));
//checkCudaRuntime(cudaMemcpyAsync((char*)data_->cpu() + offset_location, src, copyed_bytes, cudaMemcpyHostToHost, stream_));
memcpy((char*)data_->cpu() + offset_location, src, copyed_bytes);
}else{
INFOE("Unsupport head type %d", head_);
}
Expand Down

0 comments on commit 5de09da

Please sign in to comment.