diff --git a/src/super_gradients/training/models/classification_models/base_classifer.py b/src/super_gradients/training/models/classification_models/base_classifer.py index 56ea93dbed..f7d21e3621 100644 --- a/src/super_gradients/training/models/classification_models/base_classifer.py +++ b/src/super_gradients/training/models/classification_models/base_classifer.py @@ -30,10 +30,11 @@ def set_dataset_processing_params(self, class_names: Optional[List[str]] = None, self._image_processor = image_processor or self._image_processor @lru_cache(maxsize=1) - def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = False) -> ClassificationPipeline: + def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True) -> ClassificationPipeline: """Instantiate the prediction pipeline of this model. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ if None in (self._class_names, self._image_processor): raise RuntimeError( @@ -48,24 +49,34 @@ def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = Fal image_processor=self._image_processor, class_names=self._class_names, fuse_model=fuse_model, + fp16=fp16, ) return pipeline - def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False) -> ImagesClassificationPrediction: + def predict( + self, + images: ImageSource, + batch_size: int = 32, + fuse_model: bool = True, + skip_image_resizing: bool = False, + fp16: bool = True, + ) -> ImagesClassificationPrediction: """Predict an image or a list of images. :param images: Images to predict. :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) return pipeline(images, batch_size=batch_size) # type: ignore - def predict_webcam(self, fuse_model: bool = True, skip_image_resizing: bool = False) -> None: + def predict_webcam(self, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True) -> None: """Predict using webcam. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) pipeline.predict_webcam() diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py index d5519d7376..1dc372e2aa 100644 --- a/src/super_gradients/training/models/detection_models/customizable_detector.py +++ b/src/super_gradients/training/models/detection_models/customizable_detector.py @@ -5,6 +5,7 @@ * each module accepts in_channels and other parameters * each module defines out_channels property on construction """ + from typing import Union, Optional, List, Callable from functools import lru_cache @@ -208,6 +209,7 @@ def _get_pipeline( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ) -> DetectionPipeline: """Instantiate the prediction pipeline of this model. @@ -222,6 +224,7 @@ def _get_pipeline( If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. """ if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf): raise RuntimeError( @@ -256,6 +259,7 @@ def _get_pipeline( ), class_names=self._class_names, fuse_model=fuse_model, + fp16=fp16, ) return pipeline @@ -271,6 +275,7 @@ def predict( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ) -> ImagesDetectionPrediction: """Predict an image or a list of images. @@ -287,6 +292,7 @@ def predict( If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. """ pipeline = self._get_pipeline( iou=iou, @@ -297,6 +303,7 @@ def predict( max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, + fp16=fp16, ) return pipeline(images, batch_size=batch_size) # type: ignore @@ -310,6 +317,7 @@ def predict_webcam( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ): """Predict using webcam. @@ -325,6 +333,7 @@ def predict_webcam( If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. """ pipeline = self._get_pipeline( iou=iou, @@ -335,6 +344,7 @@ def predict_webcam( max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, + fp16=fp16, ) pipeline.predict_webcam() diff --git a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py index 9556690d46..98b7e319f0 100644 --- a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py +++ b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py @@ -208,6 +208,7 @@ def _get_pipeline( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ) -> DetectionPipeline: """Instantiate the prediction pipeline of this model. @@ -256,6 +257,7 @@ def _get_pipeline( ), class_names=self._class_names, fuse_model=fuse_model, + fp16=fp16, ) return pipeline @@ -271,6 +273,7 @@ def predict( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ) -> ImagesDetectionPrediction: """Predict an image or a list of images. @@ -287,6 +290,7 @@ def predict( If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. + :param fp16: If True, the model will use mixed precision for inference. """ pipeline = self._get_pipeline( iou=iou, @@ -297,6 +301,7 @@ def predict( max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, + fp16=fp16, ) return pipeline(images, batch_size=batch_size) # type: ignore @@ -310,13 +315,13 @@ def predict_webcam( max_predictions: Optional[int] = None, multi_label_per_box: Optional[bool] = None, class_agnostic_nms: Optional[bool] = None, + fp16: bool = True, ): """Predict using webcam. :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. :param conf: (Optional) Below the confidence threshold, prediction are discarded. If None, the default value associated to the training is used. - :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. :param nms_top_k: (Optional) The maximum number of detections to consider for NMS. @@ -325,6 +330,7 @@ def predict_webcam( If False, each anchor can produce only one label of the class with the highest score. :param class_agnostic_nms: (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked). If False NMS is performed separately for each class. + :param fp16: If True, use mixed precision for inference. """ pipeline = self._get_pipeline( iou=iou, @@ -335,6 +341,7 @@ def predict_webcam( max_predictions=max_predictions, multi_label_per_box=multi_label_per_box, class_agnostic_nms=class_agnostic_nms, + fp16=fp16, ) pipeline.predict_webcam() diff --git a/src/super_gradients/training/models/detection_models/yolo_base.py b/src/super_gradients/training/models/detection_models/yolo_base.py index d46cedb5aa..fcef3df40f 100755 --- a/src/super_gradients/training/models/detection_models/yolo_base.py +++ b/src/super_gradients/training/models/detection_models/yolo_base.py @@ -537,7 +537,7 @@ def set_dataset_processing_params( @lru_cache(maxsize=1) def _get_pipeline( - self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False + self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True ) -> DetectionPipeline: """Instantiate the prediction pipeline of this model. @@ -546,6 +546,7 @@ def _get_pipeline( If None, the default value associated to the training is used. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf): raise RuntimeError( @@ -569,6 +570,7 @@ def _get_pipeline( post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf), class_names=self._class_names, fuse_model=fuse_model, + fp16=fp16, ) return pipeline @@ -580,6 +582,7 @@ def predict( batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False, + fp16: bool = True, ) -> ImagesDetectionPrediction: """Predict an image or a list of images. @@ -590,11 +593,12 @@ def predict( :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) return pipeline(images, batch_size=batch_size) # type: ignore - def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False): + def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16=True): """Predict using webcam. :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. @@ -602,8 +606,9 @@ def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = No If None, the default value associated to the training is used. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) pipeline.predict_webcam() def train(self, mode: bool = True): diff --git a/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py b/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py index bdb3ffa832..a335db6436 100644 --- a/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py +++ b/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py @@ -583,7 +583,9 @@ def set_dataset_processing_params( self._default_nms_conf = conf or self._default_nms_conf @lru_cache(maxsize=1) - def _get_pipeline(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False) -> PoseEstimationPipeline: + def _get_pipeline( + self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True + ) -> PoseEstimationPipeline: """Instantiate the prediction pipeline of this model. :param conf: (Optional) Below the confidence threshold, prediction are discarded. @@ -621,11 +623,18 @@ def _get_pipeline(self, conf: Optional[float] = None, fuse_model: bool = True, s keypoint_colors=self._keypoint_colors, post_prediction_callback=self.get_post_prediction_callback(conf=conf), fuse_model=fuse_model, + fp16=fp16, ) return pipeline def predict( - self, images: ImageSource, conf: Optional[float] = None, batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False + self, + images: ImageSource, + conf: Optional[float] = None, + batch_size: int = 32, + fuse_model: bool = True, + skip_image_resizing: bool = False, + fp16: bool = True, ) -> ImagesPoseEstimationPrediction: """Predict an image or a list of images. @@ -635,11 +644,12 @@ def predict( :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) return pipeline(images, batch_size=batch_size) # type: ignore - def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False): + def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True): """Predict using webcam. :param conf: (Optional) Below the confidence threshold, prediction are discarded. @@ -647,7 +657,7 @@ def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True, :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. """ - pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing) + pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16) pipeline.predict_webcam() def train(self, mode: bool = True): diff --git a/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py b/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py index 4856386716..cff6ca577c 100644 --- a/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py +++ b/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py @@ -149,16 +149,18 @@ def predict( batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False, + fp16: bool = True, ) -> PoseEstimationPrediction: """Predict an image or a list of images. - :param images: Images to predict. - :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. - :param conf: (Optional) Below the confidence threshold, prediction are discarded. - If None, the default value associated to the training is used. - :param batch_size: Maximum number of images to process at the same time. - :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param images: Images to predict. + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param batch_size: Maximum number of images to process at the same time. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ pipeline = self._get_pipeline( iou=iou, @@ -167,9 +169,43 @@ def predict( post_nms_max_predictions=post_nms_max_predictions, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, + fp16=fp16, ) return pipeline(images, batch_size=batch_size) # type: ignore + def predict_webcam( + self, + iou: Optional[float] = None, + conf: Optional[float] = None, + pre_nms_max_predictions: Optional[int] = None, + post_nms_max_predictions: Optional[int] = None, + batch_size: int = 32, + fuse_model: bool = True, + skip_image_resizing: bool = False, + fp16: bool = True, + ): + """Predict using webcam. + + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param batch_size: Maximum number of images to process at the same time. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. + + """ + pipeline = self._get_pipeline( + iou=iou, + conf=conf, + pre_nms_max_predictions=pre_nms_max_predictions, + post_nms_max_predictions=post_nms_max_predictions, + fuse_model=fuse_model, + skip_image_resizing=skip_image_resizing, + fp16=fp16, + ) + pipeline.predict_webcam() + @lru_cache(maxsize=1) def _get_pipeline( self, @@ -179,14 +215,16 @@ def _get_pipeline( post_nms_max_predictions: Optional[int] = None, fuse_model: bool = True, skip_image_resizing: bool = False, + fp16: bool = True, ) -> PoseEstimationPipeline: """Instantiate the prediction pipeline of this model. - :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. - :param conf: (Optional) Below the confidence threshold, prediction are discarded. - If None, the default value associated to the training is used. - :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used. + :param conf: (Optional) Below the confidence threshold, prediction are discarded. + If None, the default value associated to the training is used. + :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. :param skip_image_resizing: If True, the image processor will not resize the images. + :param fp16: If True, use mixed precision for inference. """ if None in (self._image_processor, self._default_nms_iou, self._default_nms_conf, self._edge_links): raise RuntimeError( @@ -219,6 +257,7 @@ def _get_pipeline( edge_links=self._edge_links, edge_colors=self._edge_colors, keypoint_colors=self._keypoint_colors, + fp16=fp16, ) return pipeline diff --git a/src/super_gradients/training/models/segmentation_models/segmentation_module.py b/src/super_gradients/training/models/segmentation_models/segmentation_module.py index 745d1cd898..86721298a5 100644 --- a/src/super_gradients/training/models/segmentation_models/segmentation_module.py +++ b/src/super_gradients/training/models/segmentation_models/segmentation_module.py @@ -88,7 +88,7 @@ def set_dataset_processing_params( self._image_processor = image_processor or self._image_processor @lru_cache(maxsize=1) - def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline: + def _get_pipeline(self, fuse_model: bool = True, fp16: bool = True) -> SegmentationPipeline: """Instantiate the segmentation pipeline of this model. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. """ @@ -102,23 +102,26 @@ def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline: image_processor=self._image_processor, class_names=self._class_names, fuse_model=fuse_model, + fp16=fp16, ) return pipeline - def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True) -> ImagesSegmentationPrediction: + def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, fp16: bool = True) -> ImagesSegmentationPrediction: """Predict an image or a list of images. :param images: Images to predict. :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model) + pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16) return pipeline(images, batch_size=batch_size) # type: ignore - def predict_webcam(self, fuse_model: bool = True): + def predict_webcam(self, fuse_model: bool = True, fp16: bool = True): """Predict using webcam. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model) + pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16) pipeline.predict_webcam() def get_input_shape_steps(self) -> Tuple[int, int]: diff --git a/src/super_gradients/training/models/segmentation_models/stdc.py b/src/super_gradients/training/models/segmentation_models/stdc.py index 93de46c168..01c75f3338 100644 --- a/src/super_gradients/training/models/segmentation_models/stdc.py +++ b/src/super_gradients/training/models/segmentation_models/stdc.py @@ -2,6 +2,7 @@ Implementation of paper: "Rethinking BiSeNet For Real-time Semantic Segmentation", https://arxiv.org/abs/2104.13188 Based on original implementation: https://github.com/MichaelFan01/STDC-Seg, cloned 23/08/2021, commit 59ff37f """ + from functools import lru_cache from typing import Union, List, Optional, Callable, Dict, Tuple from abc import ABC, abstractmethod @@ -659,7 +660,7 @@ def set_dataset_processing_params( self._image_processor = image_processor or self._image_processor @lru_cache(1) - def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline: + def _get_pipeline(self, fuse_model: bool = True, fp16: bool = True) -> SegmentationPipeline: """Instantiate the segmentation pipeline of this model. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. """ @@ -676,20 +677,22 @@ def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline: ) return pipeline - def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True) -> ImagesSegmentationPrediction: + def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, fp16: bool = True) -> ImagesSegmentationPrediction: """Predict an image or a list of images. :param images: Images to predict. :param batch_size: Maximum number of images to process at the same time. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model) + pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16) return pipeline(images, batch_size=batch_size) # type: ignore - def predict_webcam(self, fuse_model: bool = True): + def predict_webcam(self, fuse_model: bool = True, fp16: bool = True): """Predict using webcam. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ - pipeline = self._get_pipeline(fuse_model=fuse_model) + pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16) pipeline.predict_webcam() def get_input_shape_steps(self) -> Tuple[int, int]: diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py index e79cb9a315..8c625c2183 100644 --- a/src/super_gradients/training/pipelines/pipelines.py +++ b/src/super_gradients/training/pipelines/pipelines.py @@ -73,6 +73,7 @@ def __init__( device: Optional[str] = None, fuse_model: bool = True, dtype: Optional[torch.dtype] = None, + fp16: bool = True, ): model_device: torch.device = infer_model_device(model=model) if device: @@ -89,6 +90,7 @@ def __init__( self.image_processor = image_processor self.fuse_model = fuse_model # If True, the model will be fused in the first forward pass, to make sure it gets the right input_size + self.fp16 = fp16 def _fuse_model(self, input_example: torch.Tensor): logger.info("Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`") @@ -205,7 +207,7 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray]) ) # Predict - with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(): + with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16): torch_inputs = torch.from_numpy(np.array(preprocessed_images)).to(self.device) torch_inputs = torch_inputs.to(self.dtype) @@ -284,6 +286,7 @@ class DetectionPipeline(Pipeline): :param image_processor: Single image processor or a list of image processors for preprocessing and postprocessing the images. :param device: The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ def __init__( @@ -294,6 +297,7 @@ def __init__( device: Optional[str] = None, image_processor: Union[Processing, List[Processing]] = None, fuse_model: bool = True, + fp16: bool = True, ): if isinstance(image_processor, list): image_processor = ComposeProcessing(image_processor) @@ -308,6 +312,7 @@ def __init__( image_processor=image_processor, class_names=class_names, fuse_model=fuse_model, + fp16=fp16, ) self.post_prediction_callback = post_prediction_callback @@ -378,6 +383,7 @@ def __init__( device: Optional[str] = None, image_processor: Union[Processing, List[Processing]] = None, fuse_model: bool = True, + fp16: bool = True, ): if isinstance(image_processor, list): image_processor = ComposeProcessing(image_processor) @@ -388,6 +394,7 @@ def __init__( image_processor=image_processor, class_names=None, fuse_model=fuse_model, + fp16=fp16, ) self.post_prediction_callback = post_prediction_callback self.edge_links = np.asarray(edge_links, dtype=int) @@ -408,9 +415,11 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m PoseEstimationPrediction( poses=image_level_predictions.poses.cpu().numpy() if torch.is_tensor(image_level_predictions.poses) else image_level_predictions.poses, scores=image_level_predictions.scores.cpu().numpy() if torch.is_tensor(image_level_predictions.scores) else image_level_predictions.scores, - bboxes_xyxy=image_level_predictions.bboxes_xyxy.cpu().numpy() - if torch.is_tensor(image_level_predictions.bboxes_xyxy) - else image_level_predictions.bboxes_xyxy, + bboxes_xyxy=( + image_level_predictions.bboxes_xyxy.cpu().numpy() + if torch.is_tensor(image_level_predictions.bboxes_xyxy) + else image_level_predictions.bboxes_xyxy + ), image_shape=image.shape, edge_links=self.edge_links, edge_colors=self.edge_colors, @@ -450,6 +459,7 @@ class ClassificationPipeline(Pipeline): :param image_processor: Single image processor or a list of image processors for preprocessing and postprocessing the images. :param device: The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ def __init__( @@ -459,6 +469,7 @@ def __init__( device: Optional[str] = None, image_processor: Union[Processing, List[Processing]] = None, fuse_model: bool = True, + fp16: bool = True, ): super().__init__( model=model, @@ -466,6 +477,7 @@ def __init__( image_processor=image_processor, class_names=class_names, fuse_model=fuse_model, + fp16=fp16, ) def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[ClassificationPrediction]: @@ -516,6 +528,7 @@ class SegmentationPipeline(Pipeline): :param image_processor: Single image processor or a list of image processors for preprocessing and postprocessing the images. :param device: The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support. :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage. + :param fp16: If True, use mixed precision for inference. """ def __init__( @@ -525,10 +538,11 @@ def __init__( device: Optional[str] = None, image_processor: Optional[Processing] = None, fuse_model: bool = True, + fp16: bool = True, ): - super().__init__(model=model, device=device, image_processor=image_processor, class_names=class_names, fuse_model=fuse_model) + super().__init__(model=model, device=device, image_processor=image_processor, class_names=class_names, fuse_model=fuse_model, fp16=fp16) - def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[DetectionPrediction]: + def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[SegmentationPrediction]: """Decode the model output, by applying post prediction callback. This includes NMS. :param model_output: Direct output of the model, without any post-processing. @@ -556,7 +570,7 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m return predictions - def _instantiate_image_prediction(self, image: np.ndarray, prediction: DetectionPrediction) -> ImagePrediction: + def _instantiate_image_prediction(self, image: np.ndarray, prediction: SegmentationPrediction) -> ImagePrediction: return ImageSegmentationPrediction(image=image, prediction=prediction, class_names=self.class_names) def _combine_image_prediction_to_images(