You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Reading engine from file /content/engine/yolo11x_fp16.engine
Total Inference Time : 17.17
Total Frame processed : 750
Average Inference FPS : 43.69
Total Feature Time : 75.53
Average feature FPS : 9.93
Total Tracking Time : 14.76
Average Tracking FPS : 50.83 .
How to reduce my inference time using tensorrt engine for fast-reid model . Below i have given the code. I need help
Loop over each image in the batch and process it sequentially
for image in fastreid_batch_images:
# Set the image as input to the model
fastreid_inputs[0].host = image
# Set the input shape for the context (batch size of 1)
context.set_input_shape('input', image.shape)
# Perform inference on the single image
features = do_inference(context, engine, bindings, fastreid_inputs, fastreid_outputs, stream)
# Flatten the features and append to the list
extracted_features.append(np.array(features[0]).flatten())
kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
[cuda_call(cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind, stream)) for inp in inputs]
Run inference.
execute_async_func()
Transfer predictions back from the GPU.
kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
[cuda_call(cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind, stream)) for out in outputs]
num_io = engine.num_io_tensors
for i in range(num_io):
context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
return _do_inference_base(inputs, outputs, stream, execute_async_func)
i want help with running batch inference for tensorrt in the load_and_inference_fastreid function. i am storing the detected obj in a batch and sending it for feature extraction where i am using for loop for individual obj insted i want to work on batch how to do that
First make sure the fast reid model you've exported supports dynamic batches. If so then under this function:
# Loop over each image in the batch and process it sequentially
for image in fastreid_batch_images:
# Set the image as input to the model
fastreid_inputs[0].host = image
# Set the input shape for the context (batch size of 1)
context.set_input_shape('input', image.shape)
# Perform inference on the single image
features = do_inference(context, engine, bindings, fastreid_inputs, fastreid_outputs, stream)
# Flatten the features and append to the list
extracted_features.append(np.array(features[0]).flatten())
Split the images in fastreid_batch_images into batches along the batch axis (under most circumstances should be 0) and pass them to do_inference.
Also ### please increase the readability using code blocks.
Reading engine from file /content/engine/yolo11x_fp16.engine
Total Inference Time : 17.17
Total Frame processed : 750
Average Inference FPS : 43.69
Total Feature Time : 75.53
Average feature FPS : 9.93
Total Tracking Time : 14.76
Average Tracking FPS : 50.83 .
How to reduce my inference time using tensorrt engine for fast-reid model . Below i have given the code. I need help
def load_and_inference_fastreid(fastreid_batch_images, engine, fastreid_inputs:np.ndarray,fastreid_outputs:np.ndarray,bindings,stream):
#load the tensorrt engine
if not os.path.exists(fastreid_engine_path):
raise FileNotFoundError(f"Engine file {fastreid_engine_path} not found. Please ensure the path is correct.")
with trt.Runtime(TRT_LOGGER) as runtime, open(fastreid_engine_path,"rb") as f:
fastreid_engine_data = f.read()
engine = runtime.deserialize_cuda_engine(fastreid_engine_data)
fastreid_img = cv2.resize(frame, (256,128), interpolation=cv2.INTER_LINEAR)
fastreid_input_img = preprocess_image(fastreid_img)
fastreid_batch_images = np.concatenate([fastreid_input_img], axis=0)
fastreid_inputs,fastreid_outputs,bindings,stream = allocate_buffers(engine,fastreid_output_shape,profile_idx=0)
#Create excution context
context = engine.create_execution_context()
List to hold extracted features for each image
extracted_features = []
Loop over each image in the batch and process it sequentially
for image in fastreid_batch_images:
# Set the image as input to the model
fastreid_inputs[0].host = image
fastreid_inputs[0].host = fastreid_batch_images
context.set_input_shape('input', fastreid_batch_images.shape)
#perform feature extraction
#extracted_features = get_feature_fastreid(engine,context,fastreid_inputs,fastreid_outputs,bindings,stream)
extracted_features = do_inference(context, engine, bindings, fastreid_inputs, fastreid_outputs, stream)
#Print type and structure of extracted_features
#print(f"Extracted features type: {type(extracted_features)}")
#print(f"Extracted features content: {extracted_features}")
# Reshape the extracted features to a 2D array (1, 2048)
extracted_features = np.array(extracted_features[0]).flatten()
#print(f"Reshaped extracted features shape: {extracted_features.shape}")
return extracted_features
def _do_inference_base(inputs, outputs, stream, execute_async_func):
Transfer input data to the GPU.
kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
[cuda_call(cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind, stream)) for inp in inputs]
Run inference.
execute_async_func()
Transfer predictions back from the GPU.
kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
[cuda_call(cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind, stream)) for out in outputs]
Synchronize the stream
cuda_call(cudart.cudaStreamSynchronize(stream))
Return only the host outputs.
return [out.host for out in outputs]
def do_inference(context, engine, bindings, inputs, outputs, stream):
def execute_async_func():
context.execute_async_v3(stream_handle=stream)
Setup context tensor address.
num_io = engine.num_io_tensors
for i in range(num_io):
context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
return _do_inference_base(inputs, outputs, stream, execute_async_func)
Define target classes
target_classes = ['car', 'bus', 'truck', 'motorcycle']
def main():
Input
video_file_name = '/content/30.mp4'
video_path = os.path.join(current_directory, 'data', video_file_name)
cap = cv2.VideoCapture(video_path) # Load video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
original_size = (frame_width, frame_height)
#original_size = (1200, 800)
fps = int(cap.get(cv2.CAP_PROP_FPS))
Model and engine paths
model_name = "yolo11x"
precision = "fp16" # int8 or fp32 or fp16
#quantization_method = ''
onnx_model_path = os.path.join(current_directory, 'onnx', f'{model_name}{device.type}.onnx')
engine_file_path = os.path.join(current_directory, 'engine', f'{model_name}{precision}.engine')
fastreid_onnx_path = "/content/fastreid_model.onnx"
fastreid_engine_path = "/content/fastreid_model.trt"
os.makedirs(os.path.dirname(engine_file_path), exist_ok=True)
Output shapes expected
output_shapes = [(1, 84, 8400)]
output_shape_ndarray = np.array(output_shapes[0], dtype=np.int32)
fastreid Output shapes expected
#fastreid_output_shapes = (1,256) #(1,2048)
fastreid_output_shapes = (1, 2048)
fastreid_output_shape_ndarray = np.array(fastreid_output_shapes, dtype=np.int32)
#print(fastreid_output_shape_ndarray)
Load or build the yolo TensorRT engine and do inference
with get_engine(onnx_model_path, engine_file_path, precision) as engine,
engine.create_execution_context() as context:
The text was updated successfully, but these errors were encountered: