Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/runtime/register_trt_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
}

c10::cuda::CUDAStream stream = c10::cuda::getCurrentCUDAStream(inputs[0].device().index());

// nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.
std::unique_lock<std::mutex> lock(compiled_engine->mu);
compiled_engine->exec_ctx->enqueueV2(gpu_handles.data(), stream, nullptr);

return outputs;
Expand Down
2 changes: 2 additions & 0 deletions core/runtime/runtime.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once
#include <map>
#include <memory>
#include <mutex>
#include <utility>
#include "ATen/core/function_schema.h"
#include "NvInfer.h"
Expand Down Expand Up @@ -47,6 +48,7 @@ struct TRTEngine : torch::CustomClassHolder {
std::shared_ptr<nvinfer1::IExecutionContext> exec_ctx;
std::pair<uint64_t, uint64_t> num_io;
std::string name;
std::mutex mu;
CudaDevice device_info;

std::unordered_map<uint64_t, uint64_t> in_binding_map;
Expand Down
13 changes: 13 additions & 0 deletions tests/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ test_suite(
":test_default_input_types",
":test_compiled_modules",
":test_modules_as_engines",
":test_runtime_thread_safety",
":test_multiple_registered_engines",
":test_serialization",
":test_module_fallback",
Expand All @@ -27,6 +28,7 @@ test_suite(
":test_default_input_types",
":test_compiled_modules",
":test_modules_as_engines",
":test_runtime_thread_safety",
":test_multiple_registered_engines",
":test_serialization",
":test_module_fallback",
Expand Down Expand Up @@ -95,6 +97,17 @@ cc_test(
timeout="long"
)

cc_test(
name = "test_runtime_thread_safety",
srcs = ["test_runtime_thread_safety.cpp"],
data = [
"//tests/modules:jit_models",
],
deps = [
":cpp_api_test",
]
)

cc_test(
name = "test_module_fallback",
srcs = ["test_module_fallback.cpp"],
Expand Down
83 changes: 83 additions & 0 deletions tests/cpp/test_runtime_thread_safety.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include <string>
#include <thread>
#include "gtest/gtest.h"
#include "tests/util/util.h"
#include "torch/script.h"
#include "trtorch/trtorch.h"

void run_infer(
int thread_id,
torch::jit::Module& mod,
torch::jit::Module& trt_mod,
const std::vector<torch::jit::IValue> inputs,
const std::vector<torch::jit::IValue> inputs_trt,
std::vector<torch::jit::IValue>& out_vec,
std::vector<torch::jit::IValue>& trt_out_vec) {
int count = 10;
while (count-- > 0) {
out_vec[thread_id] = mod.forward(inputs);
trt_out_vec[thread_id] = trt_mod.forward(inputs_trt);
}
}

TEST(CppAPITests, RuntimeThreadSafety) {
std::string path = "tests/modules/resnet50_traced.jit.pt";
torch::jit::Module mod;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
mod = torch::jit::load(path);
} catch (const c10::Error& e) {
std::cerr << "error loading the model\n";
}
mod.eval();
mod.to(torch::kCUDA);

torch::Tensor in_jit = at::randint(5, {1, 3, 224, 224}, torch::kCUDA).to(torch::kFloat);
torch::Tensor in_trt = in_jit.clone().to(torch::kFloat);

std::vector<torch::jit::IValue> inputs_jit;
std::vector<torch::jit::IValue> inputs_trt;
inputs_jit.push_back(in_jit.clone());
inputs_trt.push_back(in_trt.clone());

std::vector<trtorch::CompileSpec::Input> input_ranges;
for (auto in : inputs_trt) {
input_ranges.push_back({std::vector<int64_t>{1, 3, 224, 224},
std::vector<int64_t>{1, 3, 224, 224},
std::vector<int64_t>{16, 3, 224, 224},
torch::kFloat});
}
auto compile_settings = trtorch::CompileSpec(input_ranges);

// FP32 execution
compile_settings.enabled_precisions = {torch::kFloat};
compile_settings.strict_types = true;
auto trt_mod = trtorch::CompileGraph(mod, compile_settings);
std::cout << "trtorch::CompileGraph" << std::endl;

int num_threads = 10;
std::vector<torch::jit::IValue> out_vec(num_threads), trt_out_vec(num_threads);
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; i++) {
threads.push_back(std::thread(
run_infer,
i,
std::ref(mod),
std::ref(trt_mod),
inputs_jit,
inputs_trt,
std::ref(out_vec),
std::ref(trt_out_vec)));
}

for (int i = 0; i < num_threads; i++) {
threads[i].join();
}

bool flag = true;
for (int i = 0; i < num_threads; i++) {
bool f = trtorch::tests::util::almostEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor(), 1e-2);
flag = flag && f;
}
ASSERT_TRUE(flag);
}