diff --git a/README.md b/README.md index cf7b2cd..a8cbbf7 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,34 @@ Then it can be run with `skellytracker`. Running the basic `skellytracker` will open the first webcam port on your computer and run pose estimaiton in realtime with mediapipe holistic as a tracker. You can specify the tracker with `skellytracker TRACKER_NAME`, where `TRACKER_NAME` is the name of an available tracker. To view the names of all available trackers, see `RUN_ME.py`. It will take some time to initialize the tracker the first time you run it, as it will likely need to download the model. + +## Using skellytracker in your project + +To use skellytracker in your project, import a tracker like `from skellytracker import YOLOPoseTracker`, then instantiate it with your desired parameters like `tracker = YOLOPoseTracker(model_size="medium")`, and then use `tracker.process_image(frame)` or `tracker.process_video(video_filepath)`. Processing image by image will let you access each individual annotated frame with `tracker.annotated_image`, and you can optionally record the data with `tracker.recorder.record()`. Access recorded data with `tracker.recorder.process_tracked_objects()`. The running, recording, and processing are done separately to give control over the amount of processing done at each step in the pipeline. Processing an entire video allows you to save the annotated frames as a video, and optionally saves and returns the data as a numpy array. Each tracker has an associated `ModelInfo` class to access model attributes. + +Skellytracker is still under development, so version updates may make breaking changes to the API. Please report any issues and pull requests to the [skellytracker repo](https://github.com/freemocap/skellytracker). + +### Extending the API +To extend the API, import the `BaseTracker` and `BaseRecorder` abstract base classes from skellytracker. Then create a new tracker and recorder inheriting from the base classes and implement all of the abstract methods. + +## Contributing + +We love your input! We want to make contributing to this project as easy and transparent as possible, whether it's: + +- Reporting a bug +- Discussing the current state of the code +- Submitting a fix +- Proposing new features +- Becoming a maintainer + +Pull requests are the best way to propose changes to the codebase (we +use [Github Flow](https://docs.github.com/en/get-started/quickstart/github-flow)). We actively welcome your pull +requests: + +1. Fork the repo and create your branch from `main`. +2. Download the development dependencies with `pip install -e '.[dev]'`. +2. If you've added code that should be tested (including any tracker), add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes by running `pytest skellytracker/tests`. +5. Make sure your code lints. +6. Make that pull request! diff --git a/skellytracker/__init__.py b/skellytracker/__init__.py index 3b8046c..b0512c8 100644 --- a/skellytracker/__init__.py +++ b/skellytracker/__init__.py @@ -27,10 +27,12 @@ try: from skellytracker.trackers.mediapipe_tracker.mediapipe_holistic_tracker import MediapipeHolisticTracker + from skellytracker.trackers.mediapipe_tracker.mediapipe_model_info import MediapipeModelInfo except: print("To use mediapipe_holistic_tracker, install skellytracker[mediapipe]") try: from skellytracker.trackers.yolo_tracker.yolo_tracker import YOLOPoseTracker + from skellytracker.trackers.yolo_tracker.yolo_model_info import YOLOModelInfo except: print("To use yolo_tracker, install skellytracker[yolo]") try: diff --git a/skellytracker/test/test_brightest_point_tracker.py b/skellytracker/tests/test_brightest_point_tracker.py similarity index 100% rename from skellytracker/test/test_brightest_point_tracker.py rename to skellytracker/tests/test_brightest_point_tracker.py diff --git a/skellytracker/tests/test_mediapipe_holistic_tracker.py b/skellytracker/tests/test_mediapipe_holistic_tracker.py index 58026a5..8c53bdb 100644 --- a/skellytracker/tests/test_mediapipe_holistic_tracker.py +++ b/skellytracker/tests/test_mediapipe_holistic_tracker.py @@ -1,5 +1,3 @@ -import math -import cv2 import pytest import numpy as np diff --git a/skellytracker/tests/test_test.py b/skellytracker/tests/test_test.py deleted file mode 100644 index a3abbfd..0000000 --- a/skellytracker/tests/test_test.py +++ /dev/null @@ -1,12 +0,0 @@ -def returnTrue(num): - try: - return True - except: - return False - - -def test_test(): - """This is a test of the test framework. It should always pass. - To make your own tests, copy this function, change the name, and add your own assertions. - """ - assert returnTrue(6) == True \ No newline at end of file diff --git a/skellytracker/tests/test_yolo_mediapipe_combo_tracker.py b/skellytracker/tests/test_yolo_mediapipe_combo_tracker.py new file mode 100644 index 0000000..72bd8fb --- /dev/null +++ b/skellytracker/tests/test_yolo_mediapipe_combo_tracker.py @@ -0,0 +1,231 @@ +import pytest +import numpy as np + + +from skellytracker.trackers.mediapipe_tracker.mediapipe_model_info import ( + MediapipeModelInfo, +) +from skellytracker.trackers.yolo_mediapipe_combo_tracker.yolo_mediapipe_combo_tracker import ( + YOLOMediapipeComboTracker, +) + + +@pytest.mark.usefixtures("test_image") +def test_process_image(test_image): + tracker = YOLOMediapipeComboTracker( + model_size="nano", + model_complexity=0, + ) + tracked_objects = tracker.process_image(test_image) + + assert len(tracked_objects) == 4 + assert tracked_objects["pose_landmarks"] is not None + assert tracked_objects["pose_landmarks"].extra["landmarks"] is not None + assert tracked_objects["right_hand_landmarks"] is not None + assert tracked_objects["right_hand_landmarks"].extra["landmarks"] is not None + assert tracked_objects["left_hand_landmarks"] is not None + assert tracked_objects["left_hand_landmarks"].extra["landmarks"] is not None + assert tracked_objects["face_landmarks"] is not None + assert tracked_objects["face_landmarks"].extra["landmarks"] is not None + + +@pytest.mark.usefixtures("test_image") +def test_annotate_image(test_image): + tracker = YOLOMediapipeComboTracker( + model_size="nano", + model_complexity=0, + ) + tracker.process_image(test_image) + + assert tracker.annotated_image is not None + + +@pytest.mark.usefixtures("test_image") +def test_record_no_buffer(test_image): + tracker = YOLOMediapipeComboTracker( + model_size="nano", + model_complexity=0, + bounding_box_buffer_percentage=0, + ) + tracked_objects = tracker.process_image(test_image) + tracker.recorder.record(tracked_objects=tracked_objects) + assert len(tracker.recorder.recorded_objects) == 1 + assert len(tracker.recorder.recorded_objects[0]) == 4 + + processed_results = tracker.recorder.process_tracked_objects( + image_size=test_image.shape[:2] + ) + assert processed_results is not None + assert processed_results.shape == ( + 1, + MediapipeModelInfo.num_tracked_points_total, + 3, + ) + + expected_results = np.array( + [ + [ + [735.7643890380859, 77.78585314750671, -485.70934295654297], + [757.0420074462891, 73.8272774219513, -451.7356872558594], + [765.8688354492188, 75.42142689228058, -452.0623016357422], + [774.5138549804688, 77.09728181362152, -452.29907989501953], + [729.3473052978516, 70.99358797073364, -452.5171661376953], + [720.2278137207031, 70.5675083398819, -452.57137298583984], + [711.7780303955078, 70.23908793926239, -452.44258880615234], + [780.3971099853516, 86.27676665782928, -254.8146629333496], + [694.5964813232422, 76.93311989307404, -251.65258407592773], + [745.7817077636719, 89.95153248310089, -411.6321563720703], + [709.8857879638672, 86.26414954662323, -411.1183166503906], + [817.0114135742188, 149.35277938842773, -178.73090744018555], + [546.7483139038086, 128.01610708236694, -154.72180366516113], + [825.9496307373047, 219.85299110412598, -142.79363632202148], + [394.10709381103516, 183.29222917556763, -92.53458023071289], + [850.7412719726562, 284.1442108154297, -277.7159309387207], + [251.08980178833008, 229.20471668243408, -194.44988250732422], + [860.8124542236328, 301.0432004928589, -309.02509689331055], + [211.39860153198242, 242.8161120414734, -219.8578643798828], + [843.7083435058594, 300.99024295806885, -385.36643981933594], + [222.63912200927734, 246.6996932029724, -290.17900466918945], + [839.26025390625, 295.74596643447876, -312.1304130554199], + [240.05075454711914, 241.5028166770935, -226.83252334594727], + [632.852668762207, 273.6677813529968, -22.331013679504395], + [484.76829528808594, 259.1326332092285, 21.80124521255493], + [549.3550491333008, 360.4587650299072, -38.61574411392212], + [425.41954040527344, 343.2985496520996, -20.899696350097656], + [486.68445587158203, 415.20827293395996, 411.0944366455078], + [399.84127044677734, 408.2763719558716, 398.0601501464844], + [476.32495880126953, 416.31832122802734, 451.4645767211914], + ] + ] + ) + assert np.allclose( + processed_results[:, :30, :], expected_results[:, :30, :], atol=2 + ) + + +@pytest.mark.usefixtures("test_image") +def test_record_buffer_by_image_size(test_image): + tracker = YOLOMediapipeComboTracker( + model_size="nano", + model_complexity=0, + bounding_box_buffer_percentage=10, + buffer_size_method="buffer_by_image_size", + ) + tracked_objects = tracker.process_image(test_image) + tracker.recorder.record(tracked_objects=tracked_objects) + assert len(tracker.recorder.recorded_objects) == 1 + assert len(tracker.recorder.recorded_objects[0]) == 4 + + processed_results = tracker.recorder.process_tracked_objects( + image_size=test_image.shape[:2] + ) + assert processed_results is not None + assert processed_results.shape == ( + 1, + MediapipeModelInfo.num_tracked_points_total, + 3, + ) + + expected_results = np.array( + [ + [ + [732.0687866210938, 79.0345823764801, -635.7109069824219], + [753.8626098632812, 75.13578772544861, -608.0960464477539], + [761.7510223388672, 76.88950717449188, -608.355827331543], + [769.5530700683594, 78.81806373596191, -608.4667587280273], + [729.2195892333984, 72.18442976474762, -607.5491333007812], + [721.5387725830078, 71.86003804206848, -607.6824188232422], + [714.1613006591797, 71.62245333194733, -607.6252746582031], + [775.9226226806641, 86.93966388702393, -409.65084075927734], + [698.8643646240234, 77.10046291351318, -402.1929931640625], + [741.6652679443359, 90.84179520606995, -557.6795196533203], + [709.4770050048828, 86.31318032741547, -555.5589294433594], + [814.6518707275391, 149.26713109016418, -293.33160400390625], + [544.8312759399414, 127.3474645614624, -270.33700942993164], + [831.3512420654297, 219.2579483985901, -218.43671798706055], + [392.59105682373047, 186.1086130142212, -182.6705551147461], + [854.2007446289062, 282.19802141189575, -344.9155044555664], + [248.35845947265625, 233.24616193771362, -273.1560516357422], + [857.7934265136719, 299.5019817352295, -382.82962799072266], + [202.44756698608398, 248.4213924407959, -300.88146209716797], + [844.1407775878906, 300.61514139175415, -460.82714080810547], + [212.99942016601562, 251.683087348938, -378.4083938598633], + [839.8464965820312, 294.21818017959595, -380.2419662475586], + [230.61071395874023, 246.78754091262817, -307.7861785888672], + [639.9309539794922, 268.61598014831543, -0.536465011537075], + [481.90975189208984, 254.10432815551758, 0.47791849821805954], + [546.6202926635742, 351.9411635398865, 89.65752601623535], + [416.8575668334961, 342.4706482887268, -87.49273300170898], + [496.87782287597656, 406.89836025238037, 635.2282333374023], + [393.65081787109375, 407.12096214294434, 370.8687210083008], + [481.2765884399414, 408.57420444488525, 681.8212890625], + ] + ] + ) + assert np.allclose( + processed_results[:, :30, :], expected_results[:, :30, :], atol=2 + ) + + +@pytest.mark.usefixtures("test_image") +def test_record_buffer_by_box_size(test_image): + tracker = YOLOMediapipeComboTracker( + model_size="nano", + model_complexity=0, + bounding_box_buffer_percentage=10, + buffer_size_method="buffer_by_box_size", + ) + tracked_objects = tracker.process_image(test_image) + tracker.recorder.record(tracked_objects=tracked_objects) + assert len(tracker.recorder.recorded_objects) == 1 + assert len(tracker.recorder.recorded_objects[0]) == 4 + + processed_results = tracker.recorder.process_tracked_objects( + image_size=test_image.shape[:2] + ) + assert processed_results is not None + assert processed_results.shape == ( + 1, + MediapipeModelInfo.num_tracked_points_total, + 3, + ) + + expected_results = np.array( + [ + [ + [731.2718200683594, 77.88420975208282, -548.9945602416992], + [754.6127319335938, 74.30741965770721, -521.489372253418], + [762.8125762939453, 76.33775532245636, -521.7532348632812], + [771.2681579589844, 78.52324604988098, -521.8650436401367], + [730.0675964355469, 70.9510749578476, -521.0283660888672], + [722.1942138671875, 70.54689288139343, -521.1295318603516], + [714.8076629638672, 70.24498343467712, -521.0332870483398], + [779.2241668701172, 87.20496118068695, -340.54332733154297], + [700.3346252441406, 76.72817766666412, -330.6981658935547], + [741.3204956054688, 90.19120931625366, -479.1018295288086], + [708.2732391357422, 85.63711881637573, -476.2978744506836], + [813.9542388916016, 149.21152353286743, -249.2682647705078], + [545.847282409668, 128.2720971107483, -214.6741485595703], + [831.6414642333984, 217.95576810836792, -199.20928955078125], + [392.5309371948242, 185.60349941253662, -141.00683212280273], + [850.6895446777344, 280.559663772583, -331.45111083984375], + [252.97996520996094, 230.9888792037964, -229.0725326538086], + [856.6841125488281, 298.4851026535034, -367.7482604980469], + [203.86322021484375, 247.05041885375977, -250.86517333984375], + [841.0806274414062, 299.39956426620483, -436.93775177001953], + [215.9295654296875, 250.416419506073, -324.9034881591797], + [836.7790985107422, 292.6754379272461, -363.13419342041016], + [234.16423797607422, 246.12889766693115, -261.9901657104492], + [639.0184783935547, 271.9633984565735, -11.131852865219116], + [485.26206970214844, 257.6048684120178, 10.823948383331299], + [544.7049331665039, 353.29198837280273, 29.705591201782227], + [422.3688507080078, 341.57193660736084, -53.83963108062744], + [484.64855194091797, 412.5028896331787, 527.0954132080078], + [392.8578186035156, 406.18433475494385, 385.8837127685547], + [471.3254165649414, 411.8346977233887, 569.1376495361328], + ] + ] + ) + assert np.allclose( + processed_results[:, :30, :], expected_results[:, :30, :], atol=2 + ) diff --git a/skellytracker/tests/test_yolo_object_tracker.py b/skellytracker/tests/test_yolo_object_tracker.py new file mode 100644 index 0000000..4cd0843 --- /dev/null +++ b/skellytracker/tests/test_yolo_object_tracker.py @@ -0,0 +1,42 @@ +import pytest +import numpy as np + + +from skellytracker.trackers.yolo_object_tracker.yolo_object_tracker import ( + YOLOObjectTracker, +) + + +@pytest.mark.usefixtures("test_image") +def test_process_image_person_only(test_image): + tracker = YOLOObjectTracker(model_size="nano", person_only=True) + tracked_objects = tracker.process_image(test_image) + + assert len(tracked_objects) == 1 + assert tracked_objects["object"] is not None + assert tracked_objects["object"].extra["boxes_xyxy"] is not None + assert np.allclose(tracked_objects["object"].extra["boxes_xyxy"], [90.676,96.981,493.54,812.03], atol=1e-2) + assert tracked_objects["object"].extra["original_image_shape"] == (1280, 720) + +@pytest.mark.usefixtures("test_image") +def test_annotate_image(test_image): + tracker = YOLOObjectTracker() + tracker.process_image(test_image) + + assert tracker.annotated_image is not None + + +@pytest.mark.usefixtures("test_image") +def test_record(test_image): + tracker = YOLOObjectTracker(model_size="nano", person_only=True) + tracked_objects = tracker.process_image(test_image) + tracker.recorder.record(tracked_objects=tracked_objects) + assert len(tracker.recorder.recorded_objects) == 1 + + processed_results = tracker.recorder.process_tracked_objects() + assert processed_results is not None + assert processed_results.shape == (1,4) + + assert np.allclose( + processed_results, [90.676,96.981,493.54,812.03], atol=1e-2 + ) diff --git a/skellytracker/trackers/base_tracker/base_recorder.py b/skellytracker/trackers/base_tracker/base_recorder.py index 9974da0..9a0fc6e 100644 --- a/skellytracker/trackers/base_tracker/base_recorder.py +++ b/skellytracker/trackers/base_tracker/base_recorder.py @@ -53,6 +53,8 @@ def save(self, file_path: str) -> None: :return: None """ if self.recorded_objects_array is None: - self.process_tracked_objects() + recorded_objects_array = self.process_tracked_objects() + else: + recorded_objects_array = self.recorded_objects_array logger.info(f"Saving recorded objects to {file_path}") - np.save(file_path, self.recorded_objects_array) + np.save(file_path, recorded_objects_array) diff --git a/skellytracker/trackers/base_tracker/base_tracker.py b/skellytracker/trackers/base_tracker/base_tracker.py index a3ab1c5..a21c0ed 100644 --- a/skellytracker/trackers/base_tracker/base_tracker.py +++ b/skellytracker/trackers/base_tracker/base_tracker.py @@ -25,8 +25,8 @@ class BaseTracker(ABC): def __init__( self, - tracked_object_names: List[str] = None, - recorder: BaseRecorder = None, + recorder: BaseRecorder, + tracked_object_names: List[str] = [], **data: Any, ): self.recorder = recorder @@ -65,7 +65,7 @@ def process_video( output_video_filepath: Optional[Union[str, Path]] = None, save_data_bool: bool = False, use_tqdm: bool = True, - ) -> np.ndarray: + ) -> Optional[np.ndarray]: """ Run the tracker on a video. @@ -73,7 +73,7 @@ def process_video( :param output_video_filepath: Path to save annotated video to, does not save video if None. :param save_data_bool: Whether to save the data to a file. :param use_tqdm: Whether to use tqdm to show a progress bar - :return: Array of tracked keypoint data + :return: Array of tracked keypoint data, if save_data_bool is True """ cap = cv2.VideoCapture(str(input_video_filepath)) @@ -118,6 +118,8 @@ def process_video( if self.recorder is not None: self.recorder.record(self.tracked_objects) if video_handler is not None: + if self.annotated_image is None: + self.annotated_image = frame video_handler.add_frame(self.annotated_image) ret, frame = cap.read() @@ -130,7 +132,7 @@ def process_video( output_array = self.recorder.process_tracked_objects(image_size=image_size) if save_data_bool: self.recorder.save( - file_path=Path(input_video_filepath).with_suffix(".npy") + file_path=str(Path(input_video_filepath).with_suffix(".npy")) ) else: output_array = None diff --git a/skellytracker/trackers/charuco_tracker/charuco_tracker.py b/skellytracker/trackers/charuco_tracker/charuco_tracker.py index c0d172c..46c72a3 100644 --- a/skellytracker/trackers/charuco_tracker/charuco_tracker.py +++ b/skellytracker/trackers/charuco_tracker/charuco_tracker.py @@ -16,7 +16,7 @@ def __init__(self, squareLength: float = 1, markerLength: float = .8, ): - super().__init__(tracked_object_names=tracked_object_names) + super().__init__(recorder=None, tracked_object_names=tracked_object_names) self.board = cv2.aruco.CharucoBoard_create(squares_x, squares_y, squareLength, markerLength, dictionary) def process_image(self, image: np.ndarray, **kwargs) -> Dict[str, TrackedObject]: diff --git a/skellytracker/trackers/mmpose_tracker/mmpose_tracker.py b/skellytracker/trackers/mmpose_tracker/mmpose_tracker.py index 64e6b62..abcf4c1 100644 --- a/skellytracker/trackers/mmpose_tracker/mmpose_tracker.py +++ b/skellytracker/trackers/mmpose_tracker/mmpose_tracker.py @@ -7,7 +7,7 @@ class MMPoseTracker(BaseTracker): def __init__(self, config_file, checkpoint_file): - super().__init__(tracked_object_names=["human_pose"]) + super().__init__(recorder=None, tracked_object_names=["human_pose"]) self.model = init_pose_model(config_file, checkpoint_file, device='cuda:0') def process_image(self, image, **kwargs): diff --git a/skellytracker/trackers/segment_anything_tracker/segment_anything_tracker.py b/skellytracker/trackers/segment_anything_tracker/segment_anything_tracker.py index 4cd327e..f9a8882 100644 --- a/skellytracker/trackers/segment_anything_tracker/segment_anything_tracker.py +++ b/skellytracker/trackers/segment_anything_tracker/segment_anything_tracker.py @@ -6,8 +6,8 @@ from skellytracker.trackers.base_tracker.base_tracker import BaseTracker class SAMTracker(BaseTracker): - def __init__(self, model_size: str="nano"): - super().__init__(tracked_object_names=["segmentation"]) + def __init__(self): + super().__init__(recorder=None,tracked_object_names=["segmentation"]) self.model = SAM('sam_b.pt') diff --git a/skellytracker/trackers/tracker_manager.py b/skellytracker/trackers/tracker_manager.py deleted file mode 100644 index 63f7af5..0000000 --- a/skellytracker/trackers/tracker_manager.py +++ /dev/null @@ -1,44 +0,0 @@ -import multiprocessing as mp -import time -from asyncio import sleep -from typing import List - -from skellytracker.trackers.base_tracker.base_tracker import BaseTracker -from skellytracker.trackers.bright_point_tracker.brightest_point_tracker import BrightestPointTracker - - -class TrackerManager: - def __init__(self, trackers: List[BaseTracker]): - self.trackers = trackers - self.parent_connection, self.child_connection = mp.Pipe() - self.process = mp.Process(target=self._process_images, args=(self.child_connection, self.trackers)) - self.process.start() - - @staticmethod - def _process_images(conn, trackers): - while True: - time.sleep(0.001) - image = conn.recv() - if image is None: - break - for tracker in trackers: - tracker.process_image(image) - - def add_image(self, image): - self.parent_connection.send(image) - - def demo(self): - self.trackers[0].demo() - - def stop(self): - self.parent_connection.send(None) - self.process.join() - - - -if __name__ == "__main__": - - trackers = [BrightestPointTracker()] - - manager = TrackerManager(trackers) - manager.demo() diff --git a/skellytracker/trackers/yolo_object_tracker/yolo_object_recorder.py b/skellytracker/trackers/yolo_object_tracker/yolo_object_recorder.py index 5f785e9..3afea7e 100644 --- a/skellytracker/trackers/yolo_object_tracker/yolo_object_recorder.py +++ b/skellytracker/trackers/yolo_object_tracker/yolo_object_recorder.py @@ -13,6 +13,6 @@ def record(self, tracked_objects: Dict[str, TrackedObject]) -> None: def process_tracked_objects(self, **kwargs) -> np.ndarray: self.recorded_objects_array = np.zeros((len(self.recorded_objects), 4)) for i, recorded_object in enumerate(self.recorded_objects): - self.recorded_objects_array[i, :] = recorded_object.extra["boxes_xywh"] + self.recorded_objects_array[i, :] = recorded_object.extra["boxes_xyxy"] return self.recorded_objects_array diff --git a/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py b/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py index 62dca7b..0257bbf 100644 --- a/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py +++ b/skellytracker/trackers/yolo_object_tracker/yolo_object_tracker.py @@ -24,21 +24,23 @@ def __init__( pytorch_model = yolo_object_model_dictionary[model_size] self.model = YOLO(pytorch_model) self.confidence_threshold = confidence_threshold + # TODO: When we expose this in freemocap, replace this with an int/list[int] to decide which class to track + # TODO: Will also need to parameterize the "max_det" and setup tracker to take multiple tracked objects if person_only: self.classes = 0 # 0 is the YOLO class for person detection else: - self.classes = None + self.classes = None # None includes all classes def process_image(self, image, **kwargs) -> Dict[str, TrackedObject]: results = self.model(image, classes=self.classes, max_det=1, verbose=False, conf=self.confidence_threshold) - box_xyxy = np.asarray(results[0].boxes.xyxy.cpu()).flatten() # if on GPU, need to copy to CPU before np array conversion + box_xyxy = np.asarray(results[0].boxes.xyxy.cpu()).flatten() # On GPU, need to copy to CPU before np array conversion if box_xyxy.size > 0: self.tracked_objects["object"].pixel_x = (box_xyxy[0] + box_xyxy[2]) / 0.5 self.tracked_objects["object"].pixel_y = (box_xyxy[1] + box_xyxy[3]) / 0.5 - self.tracked_objects["object"].extra["boxes_xywy"] = box_xyxy + self.tracked_objects["object"].extra["boxes_xyxy"] = box_xyxy self.tracked_objects["object"].extra["original_image_shape"] = results[ 0 ].boxes.orig_shape