diff --git a/src/super_gradients/training/models/classification_models/base_classifer.py b/src/super_gradients/training/models/classification_models/base_classifer.py
index 56ea93dbed..f7d21e3621 100644
--- a/src/super_gradients/training/models/classification_models/base_classifer.py
+++ b/src/super_gradients/training/models/classification_models/base_classifer.py
@@ -30,10 +30,11 @@ def set_dataset_processing_params(self, class_names: Optional[List[str]] = None,
         self._image_processor = image_processor or self._image_processor
 
     @lru_cache(maxsize=1)
-    def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = False) -> ClassificationPipeline:
+    def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True) -> ClassificationPipeline:
         """Instantiate the prediction pipeline of this model.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16: If True, use mixed precision for inference.
         """
         if None in (self._class_names, self._image_processor):
             raise RuntimeError(
@@ -48,24 +49,34 @@ def _get_pipeline(self, fuse_model: bool = True, skip_image_resizing: bool = Fal
             image_processor=self._image_processor,
             class_names=self._class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
-    def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False) -> ImagesClassificationPrediction:
+    def predict(
+        self,
+        images: ImageSource,
+        batch_size: int = 32,
+        fuse_model: bool = True,
+        skip_image_resizing: bool = False,
+        fp16: bool = True,
+    ) -> ImagesClassificationPrediction:
         """Predict an image or a list of images.
 
         :param images:      Images to predict.
         :param batch_size:  Maximum number of images to process at the same time.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16: If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
-    def predict_webcam(self, fuse_model: bool = True, skip_image_resizing: bool = False) -> None:
+    def predict_webcam(self, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True) -> None:
         """Predict using webcam.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16: If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         pipeline.predict_webcam()
diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py
index d5519d7376..1dc372e2aa 100644
--- a/src/super_gradients/training/models/detection_models/customizable_detector.py
+++ b/src/super_gradients/training/models/detection_models/customizable_detector.py
@@ -5,6 +5,7 @@
  * each module accepts in_channels and other parameters
  * each module defines out_channels property on construction
 """
+
 from typing import Union, Optional, List, Callable
 from functools import lru_cache
 
@@ -208,6 +209,7 @@ def _get_pipeline(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ) -> DetectionPipeline:
         """Instantiate the prediction pipeline of this model.
 
@@ -222,6 +224,7 @@ def _get_pipeline(
                                     If False, each anchor can produce only one label of the class with the highest score.
         :param class_agnostic_nms:  (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked).
                                     If False NMS is performed separately for each class.
+        :param fp16:                If True, use mixed precision for inference.
         """
         if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
             raise RuntimeError(
@@ -256,6 +259,7 @@ def _get_pipeline(
             ),
             class_names=self._class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
@@ -271,6 +275,7 @@ def predict(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ) -> ImagesDetectionPrediction:
         """Predict an image or a list of images.
 
@@ -287,6 +292,7 @@ def predict(
                                     If False, each anchor can produce only one label of the class with the highest score.
         :param class_agnostic_nms:  (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked).
                                     If False NMS is performed separately for each class.
+        :param fp16:                        If True, use mixed precision for inference.
         """
         pipeline = self._get_pipeline(
             iou=iou,
@@ -297,6 +303,7 @@ def predict(
             max_predictions=max_predictions,
             multi_label_per_box=multi_label_per_box,
             class_agnostic_nms=class_agnostic_nms,
+            fp16=fp16,
         )
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
@@ -310,6 +317,7 @@ def predict_webcam(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ):
         """Predict using webcam.
 
@@ -325,6 +333,7 @@ def predict_webcam(
                                     If False, each anchor can produce only one label of the class with the highest score.
         :param class_agnostic_nms:  (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked).
                                     If False NMS is performed separately for each class.
+        :param fp16:                If True, use mixed precision for inference.
         """
         pipeline = self._get_pipeline(
             iou=iou,
@@ -335,6 +344,7 @@ def predict_webcam(
             max_predictions=max_predictions,
             multi_label_per_box=multi_label_per_box,
             class_agnostic_nms=class_agnostic_nms,
+            fp16=fp16,
         )
         pipeline.predict_webcam()
 
diff --git a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py
index 9556690d46..98b7e319f0 100644
--- a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py
+++ b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py
@@ -208,6 +208,7 @@ def _get_pipeline(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ) -> DetectionPipeline:
         """Instantiate the prediction pipeline of this model.
 
@@ -256,6 +257,7 @@ def _get_pipeline(
             ),
             class_names=self._class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
@@ -271,6 +273,7 @@ def predict(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ) -> ImagesDetectionPrediction:
         """Predict an image or a list of images.
 
@@ -287,6 +290,7 @@ def predict(
                                     If False, each anchor can produce only one label of the class with the highest score.
         :param class_agnostic_nms:  (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked).
                                     If False NMS is performed separately for each class.
+        :param fp16:                If True, the model will use mixed precision for inference.
         """
         pipeline = self._get_pipeline(
             iou=iou,
@@ -297,6 +301,7 @@ def predict(
             max_predictions=max_predictions,
             multi_label_per_box=multi_label_per_box,
             class_agnostic_nms=class_agnostic_nms,
+            fp16=fp16,
         )
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
@@ -310,13 +315,13 @@ def predict_webcam(
         max_predictions: Optional[int] = None,
         multi_label_per_box: Optional[bool] = None,
         class_agnostic_nms: Optional[bool] = None,
+        fp16: bool = True,
     ):
         """Predict using webcam.
 
         :param iou:                 (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
         :param conf:                (Optional) Below the confidence threshold, prediction are discarded.
                                     If None, the default value associated to the training is used.
-        :param batch_size:          Maximum number of images to process at the same time.
         :param fuse_model:          If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
         :param nms_top_k:           (Optional) The maximum number of detections to consider for NMS.
@@ -325,6 +330,7 @@ def predict_webcam(
                                     If False, each anchor can produce only one label of the class with the highest score.
         :param class_agnostic_nms:  (Optional) If True, perform class-agnostic NMS (i.e IoU of boxes of different classes is checked).
                                     If False NMS is performed separately for each class.
+        :param fp16:                If True, use mixed precision for inference.
         """
         pipeline = self._get_pipeline(
             iou=iou,
@@ -335,6 +341,7 @@ def predict_webcam(
             max_predictions=max_predictions,
             multi_label_per_box=multi_label_per_box,
             class_agnostic_nms=class_agnostic_nms,
+            fp16=fp16,
         )
         pipeline.predict_webcam()
 
diff --git a/src/super_gradients/training/models/detection_models/yolo_base.py b/src/super_gradients/training/models/detection_models/yolo_base.py
index d46cedb5aa..fcef3df40f 100755
--- a/src/super_gradients/training/models/detection_models/yolo_base.py
+++ b/src/super_gradients/training/models/detection_models/yolo_base.py
@@ -537,7 +537,7 @@ def set_dataset_processing_params(
 
     @lru_cache(maxsize=1)
     def _get_pipeline(
-        self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False
+        self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True
     ) -> DetectionPipeline:
         """Instantiate the prediction pipeline of this model.
 
@@ -546,6 +546,7 @@ def _get_pipeline(
                         If None, the default value associated to the training is used.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:    If True, use mixed precision for inference.
         """
         if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
             raise RuntimeError(
@@ -569,6 +570,7 @@ def _get_pipeline(
             post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
             class_names=self._class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
@@ -580,6 +582,7 @@ def predict(
         batch_size: int = 32,
         fuse_model: bool = True,
         skip_image_resizing: bool = False,
+        fp16: bool = True,
     ) -> ImagesDetectionPrediction:
         """Predict an image or a list of images.
 
@@ -590,11 +593,12 @@ def predict(
         :param batch_size:  Maximum number of images to process at the same time.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:        If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
-    def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False):
+    def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16=True):
         """Predict using webcam.
 
         :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
@@ -602,8 +606,9 @@ def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = No
                         If None, the default value associated to the training is used.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:    If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         pipeline.predict_webcam()
 
     def train(self, mode: bool = True):
diff --git a/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py b/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py
index bdb3ffa832..a335db6436 100644
--- a/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py
+++ b/src/super_gradients/training/models/pose_estimation_models/dekr_hrnet.py
@@ -583,7 +583,9 @@ def set_dataset_processing_params(
         self._default_nms_conf = conf or self._default_nms_conf
 
     @lru_cache(maxsize=1)
-    def _get_pipeline(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False) -> PoseEstimationPipeline:
+    def _get_pipeline(
+        self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True
+    ) -> PoseEstimationPipeline:
         """Instantiate the prediction pipeline of this model.
 
         :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
@@ -621,11 +623,18 @@ def _get_pipeline(self, conf: Optional[float] = None, fuse_model: bool = True, s
             keypoint_colors=self._keypoint_colors,
             post_prediction_callback=self.get_post_prediction_callback(conf=conf),
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
     def predict(
-        self, images: ImageSource, conf: Optional[float] = None, batch_size: int = 32, fuse_model: bool = True, skip_image_resizing: bool = False
+        self,
+        images: ImageSource,
+        conf: Optional[float] = None,
+        batch_size: int = 32,
+        fuse_model: bool = True,
+        skip_image_resizing: bool = False,
+        fp16: bool = True,
     ) -> ImagesPoseEstimationPrediction:
         """Predict an image or a list of images.
 
@@ -635,11 +644,12 @@ def predict(
         :param batch_size:  Maximum number of images to process at the same time.
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:       If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
-    def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False):
+    def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True, skip_image_resizing: bool = False, fp16: bool = True):
         """Predict using webcam.
 
         :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
@@ -647,7 +657,7 @@ def predict_webcam(self, conf: Optional[float] = None, fuse_model: bool = True,
         :param fuse_model:  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
         """
-        pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing)
+        pipeline = self._get_pipeline(conf=conf, fuse_model=fuse_model, skip_image_resizing=skip_image_resizing, fp16=fp16)
         pipeline.predict_webcam()
 
     def train(self, mode: bool = True):
diff --git a/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py b/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py
index 4856386716..cff6ca577c 100644
--- a/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py
+++ b/src/super_gradients/training/models/pose_estimation_models/yolo_nas_pose/yolo_nas_pose_variants.py
@@ -149,16 +149,18 @@ def predict(
         batch_size: int = 32,
         fuse_model: bool = True,
         skip_image_resizing: bool = False,
+        fp16: bool = True,
     ) -> PoseEstimationPrediction:
         """Predict an image or a list of images.
 
-        :param images:              Images to predict.
-        :param iou:                 (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
-        :param conf:                (Optional) Below the confidence threshold, prediction are discarded.
-                                    If None, the default value associated to the training is used.
-        :param batch_size:          Maximum number of images to process at the same time.
-        :param fuse_model:          If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param images:     Images to predict.
+        :param iou:        (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:       (Optional) Below the confidence threshold, prediction are discarded.
+                           If None, the default value associated to the training is used.
+        :param batch_size: Maximum number of images to process at the same time.
+        :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:       If True, use mixed precision for inference.
         """
         pipeline = self._get_pipeline(
             iou=iou,
@@ -167,9 +169,43 @@ def predict(
             post_nms_max_predictions=post_nms_max_predictions,
             fuse_model=fuse_model,
             skip_image_resizing=skip_image_resizing,
+            fp16=fp16,
         )
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
+    def predict_webcam(
+        self,
+        iou: Optional[float] = None,
+        conf: Optional[float] = None,
+        pre_nms_max_predictions: Optional[int] = None,
+        post_nms_max_predictions: Optional[int] = None,
+        batch_size: int = 32,
+        fuse_model: bool = True,
+        skip_image_resizing: bool = False,
+        fp16: bool = True,
+    ):
+        """Predict using webcam.
+
+        :param iou:        (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:       (Optional) Below the confidence threshold, prediction are discarded.
+                           If None, the default value associated to the training is used.
+        :param batch_size: Maximum number of images to process at the same time.
+        :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:       If True, use mixed precision for inference.
+
+        """
+        pipeline = self._get_pipeline(
+            iou=iou,
+            conf=conf,
+            pre_nms_max_predictions=pre_nms_max_predictions,
+            post_nms_max_predictions=post_nms_max_predictions,
+            fuse_model=fuse_model,
+            skip_image_resizing=skip_image_resizing,
+            fp16=fp16,
+        )
+        pipeline.predict_webcam()
+
     @lru_cache(maxsize=1)
     def _get_pipeline(
         self,
@@ -179,14 +215,16 @@ def _get_pipeline(
         post_nms_max_predictions: Optional[int] = None,
         fuse_model: bool = True,
         skip_image_resizing: bool = False,
+        fp16: bool = True,
     ) -> PoseEstimationPipeline:
         """Instantiate the prediction pipeline of this model.
 
-        :param iou:                 (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
-        :param conf:                (Optional) Below the confidence threshold, prediction are discarded.
-                                    If None, the default value associated to the training is used.
-        :param fuse_model:          If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param iou:        (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:       (Optional) Below the confidence threshold, prediction are discarded.
+                           If None, the default value associated to the training is used.
+        :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         :param skip_image_resizing: If True, the image processor will not resize the images.
+        :param fp16:       If True, use mixed precision for inference.
         """
         if None in (self._image_processor, self._default_nms_iou, self._default_nms_conf, self._edge_links):
             raise RuntimeError(
@@ -219,6 +257,7 @@ def _get_pipeline(
             edge_links=self._edge_links,
             edge_colors=self._edge_colors,
             keypoint_colors=self._keypoint_colors,
+            fp16=fp16,
         )
         return pipeline
 
diff --git a/src/super_gradients/training/models/segmentation_models/segmentation_module.py b/src/super_gradients/training/models/segmentation_models/segmentation_module.py
index 745d1cd898..86721298a5 100644
--- a/src/super_gradients/training/models/segmentation_models/segmentation_module.py
+++ b/src/super_gradients/training/models/segmentation_models/segmentation_module.py
@@ -88,7 +88,7 @@ def set_dataset_processing_params(
         self._image_processor = image_processor or self._image_processor
 
     @lru_cache(maxsize=1)
-    def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline:
+    def _get_pipeline(self, fuse_model: bool = True, fp16: bool = True) -> SegmentationPipeline:
         """Instantiate the segmentation pipeline of this model.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         """
@@ -102,23 +102,26 @@ def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline:
             image_processor=self._image_processor,
             class_names=self._class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         return pipeline
 
-    def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True) -> ImagesSegmentationPrediction:
+    def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, fp16: bool = True) -> ImagesSegmentationPrediction:
         """Predict an image or a list of images.
         :param images:  Images to predict.
         :param batch_size:  Maximum number of images to process at the same time.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param fp16:                        If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16)
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
-    def predict_webcam(self, fuse_model: bool = True):
+    def predict_webcam(self, fuse_model: bool = True, fp16: bool = True):
         """Predict using webcam.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param fp16:       If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16)
         pipeline.predict_webcam()
 
     def get_input_shape_steps(self) -> Tuple[int, int]:
diff --git a/src/super_gradients/training/models/segmentation_models/stdc.py b/src/super_gradients/training/models/segmentation_models/stdc.py
index 93de46c168..01c75f3338 100644
--- a/src/super_gradients/training/models/segmentation_models/stdc.py
+++ b/src/super_gradients/training/models/segmentation_models/stdc.py
@@ -2,6 +2,7 @@
 Implementation of paper: "Rethinking BiSeNet For Real-time Semantic Segmentation", https://arxiv.org/abs/2104.13188
 Based on original implementation: https://github.com/MichaelFan01/STDC-Seg, cloned 23/08/2021, commit 59ff37f
 """
+
 from functools import lru_cache
 from typing import Union, List, Optional, Callable, Dict, Tuple
 from abc import ABC, abstractmethod
@@ -659,7 +660,7 @@ def set_dataset_processing_params(
         self._image_processor = image_processor or self._image_processor
 
     @lru_cache(1)
-    def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline:
+    def _get_pipeline(self, fuse_model: bool = True, fp16: bool = True) -> SegmentationPipeline:
         """Instantiate the segmentation pipeline of this model.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
         """
@@ -676,20 +677,22 @@ def _get_pipeline(self, fuse_model: bool = True) -> SegmentationPipeline:
         )
         return pipeline
 
-    def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True) -> ImagesSegmentationPrediction:
+    def predict(self, images: ImageSource, batch_size: int = 32, fuse_model: bool = True, fp16: bool = True) -> ImagesSegmentationPrediction:
         """Predict an image or a list of images.
         :param images:  Images to predict.
         :param batch_size:  Maximum number of images to process at the same time.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param fp16:       If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16)
         return pipeline(images, batch_size=batch_size)  # type: ignore
 
-    def predict_webcam(self, fuse_model: bool = True):
+    def predict_webcam(self, fuse_model: bool = True, fp16: bool = True):
         """Predict using webcam.
         :param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+        :param fp16:       If True, use mixed precision for inference.
         """
-        pipeline = self._get_pipeline(fuse_model=fuse_model)
+        pipeline = self._get_pipeline(fuse_model=fuse_model, fp16=fp16)
         pipeline.predict_webcam()
 
     def get_input_shape_steps(self) -> Tuple[int, int]:
diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py
index e79cb9a315..8c625c2183 100644
--- a/src/super_gradients/training/pipelines/pipelines.py
+++ b/src/super_gradients/training/pipelines/pipelines.py
@@ -73,6 +73,7 @@ def __init__(
         device: Optional[str] = None,
         fuse_model: bool = True,
         dtype: Optional[torch.dtype] = None,
+        fp16: bool = True,
     ):
         model_device: torch.device = infer_model_device(model=model)
         if device:
@@ -89,6 +90,7 @@ def __init__(
         self.image_processor = image_processor
 
         self.fuse_model = fuse_model  # If True, the model will be fused in the first forward pass, to make sure it gets the right input_size
+        self.fp16 = fp16
 
     def _fuse_model(self, input_example: torch.Tensor):
         logger.info("Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`")
@@ -205,7 +207,7 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray])
                 )
 
         # Predict
-        with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast():
+        with eval_mode(self.model), torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16):
             torch_inputs = torch.from_numpy(np.array(preprocessed_images)).to(self.device)
             torch_inputs = torch_inputs.to(self.dtype)
 
@@ -284,6 +286,7 @@ class DetectionPipeline(Pipeline):
     :param image_processor:             Single image processor or a list of image processors for preprocessing and postprocessing the images.
     :param device:                      The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support.
     :param fuse_model:                  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+    :param fp16:                        If True, use mixed precision for inference.
     """
 
     def __init__(
@@ -294,6 +297,7 @@ def __init__(
         device: Optional[str] = None,
         image_processor: Union[Processing, List[Processing]] = None,
         fuse_model: bool = True,
+        fp16: bool = True,
     ):
         if isinstance(image_processor, list):
             image_processor = ComposeProcessing(image_processor)
@@ -308,6 +312,7 @@ def __init__(
             image_processor=image_processor,
             class_names=class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         self.post_prediction_callback = post_prediction_callback
 
@@ -378,6 +383,7 @@ def __init__(
         device: Optional[str] = None,
         image_processor: Union[Processing, List[Processing]] = None,
         fuse_model: bool = True,
+        fp16: bool = True,
     ):
         if isinstance(image_processor, list):
             image_processor = ComposeProcessing(image_processor)
@@ -388,6 +394,7 @@ def __init__(
             image_processor=image_processor,
             class_names=None,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
         self.post_prediction_callback = post_prediction_callback
         self.edge_links = np.asarray(edge_links, dtype=int)
@@ -408,9 +415,11 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m
                 PoseEstimationPrediction(
                     poses=image_level_predictions.poses.cpu().numpy() if torch.is_tensor(image_level_predictions.poses) else image_level_predictions.poses,
                     scores=image_level_predictions.scores.cpu().numpy() if torch.is_tensor(image_level_predictions.scores) else image_level_predictions.scores,
-                    bboxes_xyxy=image_level_predictions.bboxes_xyxy.cpu().numpy()
-                    if torch.is_tensor(image_level_predictions.bboxes_xyxy)
-                    else image_level_predictions.bboxes_xyxy,
+                    bboxes_xyxy=(
+                        image_level_predictions.bboxes_xyxy.cpu().numpy()
+                        if torch.is_tensor(image_level_predictions.bboxes_xyxy)
+                        else image_level_predictions.bboxes_xyxy
+                    ),
                     image_shape=image.shape,
                     edge_links=self.edge_links,
                     edge_colors=self.edge_colors,
@@ -450,6 +459,7 @@ class ClassificationPipeline(Pipeline):
     :param image_processor:             Single image processor or a list of image processors for preprocessing and postprocessing the images.
     :param device:                      The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support.
     :param fuse_model:                  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+    :param fp16:                        If True, use mixed precision for inference.
     """
 
     def __init__(
@@ -459,6 +469,7 @@ def __init__(
         device: Optional[str] = None,
         image_processor: Union[Processing, List[Processing]] = None,
         fuse_model: bool = True,
+        fp16: bool = True,
     ):
         super().__init__(
             model=model,
@@ -466,6 +477,7 @@ def __init__(
             image_processor=image_processor,
             class_names=class_names,
             fuse_model=fuse_model,
+            fp16=fp16,
         )
 
     def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[ClassificationPrediction]:
@@ -516,6 +528,7 @@ class SegmentationPipeline(Pipeline):
     :param image_processor:             Single image processor or a list of image processors for preprocessing and postprocessing the images.
     :param device:                      The device on which the model will be run. If None, will run on current model device. Use "cuda" for GPU support.
     :param fuse_model:                  If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
+    :param fp16:                        If True, use mixed precision for inference.
     """
 
     def __init__(
@@ -525,10 +538,11 @@ def __init__(
         device: Optional[str] = None,
         image_processor: Optional[Processing] = None,
         fuse_model: bool = True,
+        fp16: bool = True,
     ):
-        super().__init__(model=model, device=device, image_processor=image_processor, class_names=class_names, fuse_model=fuse_model)
+        super().__init__(model=model, device=device, image_processor=image_processor, class_names=class_names, fuse_model=fuse_model, fp16=fp16)
 
-    def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[DetectionPrediction]:
+    def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[SegmentationPrediction]:
         """Decode the model output, by applying post prediction callback. This includes NMS.
 
         :param model_output:    Direct output of the model, without any post-processing.
@@ -556,7 +570,7 @@ def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], m
 
         return predictions
 
-    def _instantiate_image_prediction(self, image: np.ndarray, prediction: DetectionPrediction) -> ImagePrediction:
+    def _instantiate_image_prediction(self, image: np.ndarray, prediction: SegmentationPrediction) -> ImagePrediction:
         return ImageSegmentationPrediction(image=image, prediction=prediction, class_names=self.class_names)
 
     def _combine_image_prediction_to_images(