diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py index 1c0c324f24..daf83e307c 100644 --- a/src/deepsparse/pipeline.py +++ b/src/deepsparse/pipeline.py @@ -87,7 +87,6 @@ def __init__( logger_manager: Optional[LoggerManager] = None, benchmark: bool = False, ): - self.ops = ops self.router = router self.generator_router = generator_router diff --git a/src/deepsparse/utils/annotate.py b/src/deepsparse/utils/annotate.py index 36164a7537..ef560064ed 100644 --- a/src/deepsparse/utils/annotate.py +++ b/src/deepsparse/utils/annotate.py @@ -20,12 +20,14 @@ import os import shutil import time +from collections import deque from copy import copy from pathlib import Path from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple, Union import numpy +from deepsparse.timing import InferencePhases from sparsezoo.utils import create_dirs @@ -42,6 +44,23 @@ __all__ = ["get_image_loader_and_saver", "get_annotations_save_dir", "annotate"] +class AverageFPS: + def __init__(self, num_samples=20): + self.frame_times = deque(maxlen=num_samples) + + def measure(self, duration): + self.frame_times.append(duration) + + def calculate(self): + if len(self.frame_times) > 1: + return numpy.average(self.frame_times) + else: + return 0.0 + + +afps = AverageFPS() + + def get_image_loader_and_saver( path: str, save_dir: str, @@ -61,6 +80,19 @@ def get_image_loader_and_saver( image_batch, video, or web-cam based on path given, and a boolean value that is True is the returned objects load videos """ + # webcam + if path.isnumeric(): + loader = WebcamLoader(int(path), image_shape) + saver = ( + VideoSaver(save_dir, 30, loader.original_frame_size, None) + if not no_save + else None + ) + return loader, saver, True + + if no_save: + print("no_save ignored since not using webcam") + # video if path.endswith(".mp4"): loader = VideoLoader(path, image_shape) @@ -71,15 +103,7 @@ def get_image_loader_and_saver( target_fps, ) return loader, saver, True - # webcam - if path.isnumeric(): - loader = WebcamLoader(int(path), image_shape) - saver = ( - VideoSaver(save_dir, 30, loader.original_frame_size, None) - if not no_save - else None - ) - return loader, saver, True + # image file(s) return ImageLoader(path, image_shape), ImageSaver(save_dir), False @@ -364,13 +388,11 @@ def annotate( if isinstance(original_image, str): original_image = cv2.imread(image) - if target_fps is None and calc_fps: - start = time.perf_counter() - pipeline_output = pipeline(images=[image]) if target_fps is None and calc_fps: - target_fps = 1 / (time.perf_counter() - start) + afps.measure(1 / pipeline._timer.time_delta(InferencePhases.ENGINE_FORWARD)) + target_fps = afps.calculate() result = annotation_func( image=original_image, diff --git a/src/deepsparse/yolo/annotate.py b/src/deepsparse/yolo/annotate.py index ffecbf6b51..3752873e1e 100644 --- a/src/deepsparse/yolo/annotate.py +++ b/src/deepsparse/yolo/annotate.py @@ -219,8 +219,11 @@ def main( ) if is_webcam: + cv2.namedWindow("annotated", cv2.WINDOW_NORMAL) cv2.imshow("annotated", annotated_image) - cv2.waitKey(1) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break # save if saver: diff --git a/src/deepsparse/yolo/utils/utils.py b/src/deepsparse/yolo/utils/utils.py index e778fabe17..7261d71e11 100644 --- a/src/deepsparse/yolo/utils/utils.py +++ b/src/deepsparse/yolo/utils/utils.py @@ -410,12 +410,12 @@ def modify_yolo_onnx_input_shape( f"at {model_path} with the new input shape" ) save_onnx(model, model_path) - return model_path + return model_path, None else: _LOGGER.info( "Saving the ONNX model with the " "new input shape to a temporary file" ) - return save_onnx_to_temp_files(model, with_external_data=not inplace) + return save_onnx_to_temp_files(model, with_external_data=not inplace), None def get_tensor_dim_shape(tensor: onnx.TensorProto, dim: int) -> int: @@ -461,9 +461,11 @@ def annotate_image( img_res = numpy.copy(image) + num_ppl = 0 for idx in range(len(boxes)): label = labels[idx] if scores[idx] > score_threshold: + num_ppl += 1 if label == "person" else 0 annotation_text = f"{label}: {scores[idx]:.0%}" # bounding box points @@ -509,13 +511,22 @@ def annotate_image( ) if images_per_sec is not None: - img_res = _plot_fps( - img_res=img_res, - images_per_sec=images_per_sec, - x=20, - y=30, - font_scale=0.9, - thickness=2, + # img_res = _plot_fps( + # img_res=img_res, + # images_per_sec=images_per_sec, + # x=20, + # y=30, + # font_scale=0.9, + # thickness=2, + # ) + img_res = _draw_text( + img_res, + f"FPS: {images_per_sec:0.1f} | People Count: {num_ppl} | YOLOv8 on DeepSparse", + pos=(10, 10), + font_scale=0.7, + text_color=(204, 85, 17), + text_color_bg=(255, 255, 255), + font_thickness=2, ) return img_res @@ -557,3 +568,35 @@ def _plot_fps( cv2.LINE_AA, ) return img_res + + +def _draw_text( + img: numpy.ndarray, + text: str, + font=cv2.FONT_HERSHEY_SIMPLEX, + pos=(0, 0), + font_scale=1, + font_thickness=2, + text_color=(0, 255, 0), + text_color_bg=(0, 0, 0), +): + + offset = (5, 5) + x, y = pos + text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness) + text_w, text_h = text_size + rec_start = tuple(x - y for x, y in zip(pos, offset)) + rec_end = tuple(x + y for x, y in zip((x + text_w, y + text_h), offset)) + cv2.rectangle(img, rec_start, rec_end, text_color_bg, -1) + cv2.putText( + img, + text, + (x, int(y + text_h + font_scale - 1)), + font, + font_scale, + text_color, + font_thickness, + cv2.LINE_AA, + ) + + return text_size diff --git a/src/deepsparse/yolov8/annotate.py b/src/deepsparse/yolov8/annotate.py index 22329481ca..67b35cbb81 100644 --- a/src/deepsparse/yolov8/annotate.py +++ b/src/deepsparse/yolov8/annotate.py @@ -232,8 +232,11 @@ def main( ) if is_webcam: + cv2.namedWindow("annotated", cv2.WINDOW_NORMAL) cv2.imshow("annotated", annotated_image) - cv2.waitKey(1) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break # save if saver: