Skip to content

Commit

Permalink
Feature/sg 747 support predict video full pipeline master (#829)
Browse files Browse the repository at this point in the history
* wip

* move to imageprocessors

* wip

* add back changes

* making it work fully for yolox and almost for ppyoloe

* minor change

* working for det

* cleaning

* clean

* undo

* replace empty with none

* add _get_shift_params

* minor doc change

* cleaning wip

* working for multiple images

* add ppyoloe

* replace pydantic with dataclasses and fix typing

* add docstrings

* doc improvment and use get_shift_params in transforms

* add tests

* improve comment

* rename

* wip

* add option to keep ratio in rescale

* make functions private

* remove DetectionPaddedRescale

* fix doc

* big commit with wrong things

* try undo bad change

* doc

* minor doc

* add a lot of doc

* fix comment

* minor change

* first draft of load_video

* adding save_video, some parts are still to be checked

* wip

* add __init__.py to pipelines

* replace size with shape

* wip

* cleaning

* wip

* fix rgb to bgr and remove check

* almost working, missing batch

* proposal of predict_video

* wip working on dete

* add yolox

* add flag to visualize

* update

* add streaming

* improve streaming code

* docstring update

* fix stream example

* rename Results

* cleaning

* rename stream to predict_webcam

* doc fixes

* improve docstring and homogenize some names

* rename _images_prediction_lst

* improve doc

* add doc

* minore change

* fix image

* fix ci

* fix merge

* reverse channel properly
  • Loading branch information
Louis-Dupont authored Apr 16, 2023
1 parent 5ffdee5 commit a114860
Show file tree
Hide file tree
Showing 22 changed files with 906 additions and 227 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 5 additions & 4 deletions src/super_gradients/examples/predict/detection_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
model = models.get(Models.PP_YOLOE_S, pretrained_weights="coco")

IMAGES = [
"https://miro.medium.com/v2/resize:fit:500/0*w1s81z-Q72obhE_z",
"https://s.hs-data.com/bilder/spieler/gross/128069.jpg",
"https://datasets-server.huggingface.co/assets/Chris1/cityscapes/--/Chris1--cityscapes/train/28/image/image.jpg",
"../../../../documentation/source/images/examples/countryside.jpg",
"../../../../documentation/source/images/examples/street_busy.jpg",
"https://cdn-attachments.timesofmalta.com/cc1eceadde40d2940bc5dd20692901371622153217-1301777007-4d978a6f-620x348.jpg",
]
prediction = model.predict(IMAGES, iou=0.65, conf=0.5)

prediction = model.predict(IMAGES)
prediction.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from super_gradients.common.object_names import Models
from super_gradients.training import models

# Note that currently only YoloX and PPYoloE are supported.
model = models.get(Models.YOLOX_N, pretrained_weights="coco")

image_folder_path = "../../../../documentation/source/images/examples"
predictions = model.predict(image_folder_path)
predictions.show()
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from super_gradients.common.object_names import Models
from super_gradients.training import models

# Note that currently only YoloX and PPYoloE are supported.
model = models.get(Models.YOLOX_N, pretrained_weights="coco")
model.predict_webcam()
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from super_gradients.common.object_names import Models
from super_gradients.training import models

# Note that currently only YoloX and PPYoloE are supported.
model = models.get(Models.YOLOX_N, pretrained_weights="coco")

video_path = "<path/to/your/video>"
predictions = model.predict(video_path)
predictions.show()
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import List, Optional

from super_gradients.common.abstractions.abstract_logger import get_logger
from super_gradients.training.utils.load_image import is_image
from super_gradients.training.utils.media.image import is_image
from super_gradients.training.datasets.detection_datasets.detection_dataset import DetectionDataset
from super_gradients.training.datasets.data_formats import ConcatenatedTensorFormatConverter
from super_gradients.training.datasets.data_formats.default_formats import XYXY_LABEL, LABEL_NORMALIZED_CXCYWH
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@
* each module accepts in_channels and other parameters
* each module defines out_channels property on construction
"""


from typing import Union, Optional
from typing import Union, Optional, List

from torch import nn
from omegaconf import DictConfig

from super_gradients.training.utils.utils import HpmStruct
from super_gradients.training.models.sg_module import SgModule
import super_gradients.common.factories.detection_modules_factory as det_factory
from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
from super_gradients.training.pipelines.pipelines import DetectionPipeline
from super_gradients.training.transforms.processing import Processing
from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback
from super_gradients.training.utils.media.image import ImageSource


class CustomizableDetector(SgModule):
Expand Down Expand Up @@ -67,6 +70,12 @@ def __init__(

self._initialize_weights(bn_eps, bn_momentum, inplace_act)

# Processing params
self._class_names: Optional[List[str]] = None
self._image_processor: Optional[Processing] = None
self._default_nms_iou: Optional[float] = None
self._default_nms_conf: Optional[float] = None

def forward(self, x):
x = self.backbone(x)
x = self.neck(x)
Expand Down Expand Up @@ -96,3 +105,70 @@ def replace_head(self, new_num_classes: Optional[int] = None, new_head: Optional
self.heads_params = factory.insert_module_param(self.heads_params, "num_classes", new_num_classes)
self.heads = factory.get(factory.insert_module_param(self.heads_params, "in_channels", self.neck.out_channels))
self._initialize_weights(self.bn_eps, self.bn_momentum, self.inplace_act)

@staticmethod
def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
raise NotImplementedError

def set_dataset_processing_params(
self,
class_names: Optional[List[str]] = None,
image_processor: Optional[Processing] = None,
iou: Optional[float] = None,
conf: Optional[float] = None,
) -> None:
"""Set the processing parameters for the dataset.
:param class_names: (Optional) Names of the dataset the model was trained on.
:param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
:param iou: (Optional) IoU threshold for the nms algorithm
:param conf: (Optional) Below the confidence threshold, prediction are discarded
"""
self._class_names = class_names or self._class_names
self._image_processor = image_processor or self._image_processor
self._default_nms_iou = iou or self._default_nms_iou
self._default_nms_conf = conf or self._default_nms_conf

def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
"""Instantiate the prediction pipeline of this model.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
raise RuntimeError(
"You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
)

iou = iou or self._default_nms_iou
conf = conf or self._default_nms_conf

pipeline = DetectionPipeline(
model=self,
image_processor=self._image_processor,
post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
class_names=self._class_names,
)
return pipeline

def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
"""Predict an image or a list of images.
:param images: Images to predict.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
return pipeline(images) # type: ignore

def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
"""Predict using webcam.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
pipeline.predict_webcam()
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from super_gradients.training.utils import HpmStruct
from super_gradients.training.models.arch_params_factory import get_arch_params
from super_gradients.training.models.detection_models.pp_yolo_e.post_prediction_callback import PPYoloEPostPredictionCallback, DetectionPostPredictionCallback
from super_gradients.training.models.results import DetectionResults
from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
from super_gradients.training.pipelines.pipelines import DetectionPipeline
from super_gradients.training.transforms.processing import Processing
from super_gradients.training.utils.media.image import ImageSource


class PPYoloE(SgModule):
Expand All @@ -29,34 +30,75 @@ def __init__(self, arch_params):

self._class_names: Optional[List[str]] = None
self._image_processor: Optional[Processing] = None
self._default_nms_iou: Optional[float] = None
self._default_nms_conf: Optional[float] = None

@staticmethod
def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)

def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
def set_dataset_processing_params(
self,
class_names: Optional[List[str]] = None,
image_processor: Optional[Processing] = None,
iou: Optional[float] = None,
conf: Optional[float] = None,
) -> None:
"""Set the processing parameters for the dataset.
:param class_names: (Optional) Names of the dataset the model was trained on.
:param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
:param iou: (Optional) IoU threshold for the nms algorithm
:param conf: (Optional) Below the confidence threshold, prediction are discarded
"""
self._class_names = class_names or self._class_names
self._image_processor = image_processor or self._image_processor
self._default_nms_iou = iou or self._default_nms_iou
self._default_nms_conf = conf or self._default_nms_conf

def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
"""Instantiate the prediction pipeline of this model.
if self._class_names is None or self._image_processor is None:
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
raise RuntimeError(
"You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
)

iou = iou or self._default_nms_iou
conf = conf or self._default_nms_conf

pipeline = DetectionPipeline(
model=self,
image_processor=self._image_processor,
post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
class_names=self._class_names,
)
return pipeline(images)
return pipeline

def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
"""Predict an image or a list of images.
:param images: Images to predict.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
return pipeline(images) # type: ignore

def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
"""Predict using webcam.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
pipeline.predict_webcam()

def forward(self, x: Tensor):
features = self.backbone(x)
Expand Down
54 changes: 48 additions & 6 deletions src/super_gradients/training/models/detection_models/yolo_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from super_gradients.training.utils import torch_version_is_greater_or_equal
from super_gradients.training.utils.detection_utils import non_max_suppression, matrix_non_max_suppression, NMS_Type, DetectionPostPredictionCallback, Anchors
from super_gradients.training.utils.utils import HpmStruct, check_img_size_divisibility, get_param
from super_gradients.training.models.results import DetectionResults
from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
from super_gradients.training.pipelines.pipelines import DetectionPipeline
from super_gradients.training.transforms.processing import Processing

from super_gradients.training.utils.media.image import ImageSource

COCO_DETECTION_80_CLASSES_BBOX_ANCHORS = Anchors(
[[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], strides=[8, 16, 32]
Expand Down Expand Up @@ -418,33 +418,75 @@ def __init__(self, backbone: Type[nn.Module], arch_params: HpmStruct, initialize

self._class_names: Optional[List[str]] = None
self._image_processor: Optional[Processing] = None
self._default_nms_iou: Optional[float] = None
self._default_nms_conf: Optional[float] = None

@staticmethod
def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
return YoloPostPredictionCallback(conf=conf, iou=iou)

def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
def set_dataset_processing_params(
self,
class_names: Optional[List[str]] = None,
image_processor: Optional[Processing] = None,
iou: Optional[float] = None,
conf: Optional[float] = None,
) -> None:
"""Set the processing parameters for the dataset.
:param class_names: (Optional) Names of the dataset the model was trained on.
:param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
:param iou: (Optional) IoU threshold for the nms algorithm
:param conf: (Optional) Below the confidence threshold, prediction are discarded
"""
self._class_names = class_names or self._class_names
self._image_processor = image_processor or self._image_processor
self._default_nms_iou = iou or self._default_nms_iou
self._default_nms_conf = conf or self._default_nms_conf

def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
"""Instantiate the prediction pipeline of this model.
def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
if self._class_names is None or self._image_processor is None:
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
raise RuntimeError(
"You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
)

iou = iou or self._default_nms_iou
conf = conf or self._default_nms_conf

pipeline = DetectionPipeline(
model=self,
image_processor=self._image_processor,
post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
class_names=self._class_names,
)
return pipeline(images)
return pipeline

def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
"""Predict an image or a list of images.
:param images: Images to predict.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
return pipeline(images) # type: ignore

def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
"""Predict using webcam.
:param iou: (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
:param conf: (Optional) Below the confidence threshold, prediction are discarded.
If None, the default value associated to the training is used.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf)
pipeline.predict_webcam()

def forward(self, x):
out = self._backbone(x)
Expand Down
5 changes: 3 additions & 2 deletions src/super_gradients/training/models/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ def instantiate_model(
net.replace_head(new_num_classes=num_classes_new_head)
arch_params.num_classes = num_classes_new_head

class_names, image_processor = get_pretrained_processing_params(model_name, pretrained_weights)
net.set_dataset_processing_params(class_names, image_processor)
# TODO: remove once we load it from the checkpoint
processing_params = get_pretrained_processing_params(model_name, pretrained_weights)
net.set_dataset_processing_params(**processing_params)

_add_model_name_attribute(net, model_name)

Expand Down
Loading

0 comments on commit a114860

Please sign in to comment.