Add YOLOv5 Python Postprocesses (#185)

* Parameterize anchor, class for yolo * Don't set class attributes * Add YOLOv5 Python postprocesses * Remove num_classes * Return ObjectDetectionResult for YOLO Python PP * Update YOLOv5m/l accuracy * Use new yolo postproc * Update test expectations * Update yolov5m rust accuracy * Update YOLOv5l Rust accuracy * Bump to dev2 * Add accuracy tests, class-aware nms * Use 0.10.0.dev0 furiosa-native-postprocess * Lint, update test oracles * Update yolov5l accuracies * Update accuracy targets * Use torchvision.ops.nms * Update mobilenet rust pp accuracy * Run black, use fnp 0.10.0 release version * Update docs for YOLOv5
furiosa-ai · May 29, 2024 · ca10f51 · ca10f51
1 parent a17fe8a
commit ca10f51
Show file tree

Hide file tree

Showing 19 changed files with 680 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,10 @@ cython_debug/
 # IDE
 .idea
 .vscode
+
+# Benchmarks
+.benchmarks/
+
+# Test data (including symbolic links to it)
+/tests/data
+/tests/data/
diff --git a/docs/models/yolov5l.md b/docs/models/yolov5l.md
@@ -33,28 +33,28 @@ The input is a 3-channel image of 640, 640 (height, width).
 * Optimal Batch Size (minimum: 1): <= 2
 
 ## Outputs
-The outputs are 3 `numpy.float32` tensors in various shapes as the following. 
+The outputs are 3 `numpy.float32` tensors in various shapes as the following.
 You can refer to `postprocess()` function to learn how to decode boxes, classes, and confidence scores.
 
 | Tensor | Shape             | Data Type | Data Type | Description |
 |--------|-------------------|-----------|-----------|-------------|
 | 0      | (1, 45, 80, 80)   | float32   | NCHW      |             |
 | 1      | (1, 45, 40, 40)   | float32   | NCHW      |             |
 | 2      | (1, 45, 20, 20)   | float32   | NCHW      |             |
- 
+
 
 ## Pre/Postprocessing
 `furiosa.models.vision.YOLOv5l` class provides `preprocess` and `postprocess` methods.
-`preprocess` method converts input images to input tensors, and `postprocess` method converts 
-model output tensors to a list of bounding boxes, scores and labels. 
+`preprocess` method converts input images to input tensors, and `postprocess` method converts
+model output tensors to a list of bounding boxes, scores and labels.
 You can find examples at [YOLOv5l Usage](#YOLOv5l_Usage).
- 
+
 ### `furiosa.models.vision.YOLOv5l.preprocess`
 ::: furiosa.models.vision.yolov5.core.YOLOv5PreProcessor.__call__
     options:
         show_source: false
-    
+
 ### `furiosa.models.vision.YOLOv5l.postprocess`
-::: furiosa.models.vision.yolov5.core.YOLOv5PostProcessor.__call__
+::: furiosa.models.vision.yolov5.core.YOLOv5PythonPostProcessor.__call__
     options:
         show_source: false
diff --git a/docs/models/yolov5m.md b/docs/models/yolov5m.md
@@ -33,7 +33,7 @@ The input is a 3-channel image of 640, 640 (height, width).
 * Optimal Batch Size (minimum: 1): <= 4
 
 ## Outputs
-The outputs are 3 `numpy.float32` tensors in various shapes as the following. 
+The outputs are 3 `numpy.float32` tensors in various shapes as the following.
 You can refer to `postprocess()` function to learn how to decode boxes, classes, and confidence scores.
 
 | Tensor | Shape             | Data Type | Data Type | Description |
@@ -44,17 +44,17 @@ You can refer to `postprocess()` function to learn how to decode boxes, classes,
 
 ## Pre/Postprocessing
 `furiosa.models.vision.YOLOv5m` class provides `preprocess` and `postprocess` methods.
-`preprocess` method converts input images to input tensors, and `postprocess` method converts 
-model output tensors to a list of bounding boxes, scores and labels. 
+`preprocess` method converts input images to input tensors, and `postprocess` method converts
+model output tensors to a list of bounding boxes, scores and labels.
 You can find examples at [YOLOv5m Usage](#YOLOv5m_Usage).
- 
+
 ### `furiosa.models.vision.YOLOv5m.preprocess`
 ::: furiosa.models.vision.yolov5.core.YOLOv5PreProcessor.__call__
     options:
         show_source: false
-    
+
 ### `furiosa.models.vision.YOLOv5m.postprocess`
-::: furiosa.models.vision.yolov5.core.YOLOv5PostProcessor.__call__
+::: furiosa.models.vision.yolov5.core.YOLOv5PythonPostProcessor.__call__
     options:
         show_source: false
 
diff --git a/furiosa/models/__init__.py b/furiosa/models/__init__.py
@@ -1,4 +1,5 @@
 """Furiosa Models"""
+
 from . import errors, vision
 
 __version__ = "0.10.0.dev0"

diff --git a/furiosa/models/types.py b/furiosa/models/types.py
@@ -47,14 +47,14 @@ class Format(str, Enum):
 
 class PreProcessor(ABC):
     @abstractmethod
-    def __call__(self, inputs: Any) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]:
-        ...
+    def __call__(self, inputs: Any) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: ...
 
 
 class PostProcessor(ABC):
     @abstractmethod
-    def __call__(self, model_outputs: Sequence[npt.ArrayLike], contexts: Sequence[Context]) -> Any:
-        ...
+    def __call__(
+        self, model_outputs: Sequence[npt.ArrayLike], contexts: Sequence[Context]
+    ) -> Any: ...
 
 
 class RustPostProcessor(PostProcessor):

diff --git a/furiosa/models/vision/yolov5/core.py b/furiosa/models/vision/yolov5/core.py
@@ -16,6 +16,7 @@
 )
 from ...vision.postprocess import LtrbBoundingBox, ObjectDetectionResult
 from ..preprocess import read_image_opencv_if_needed
+from .postprocess import YOLOv5PythonPostProcessor
 
 _INPUT_SIZE = (640, 640)
 _STRIDES = [8, 16, 32]
@@ -153,24 +154,30 @@ def __call__(
         return np.stack(batched_image, axis=0), batched_proc_params
 
 
-class YOLOv5PostProcessor(RustPostProcessor):
+def sigmoid(x: np.ndarray) -> np.ndarray:
+    # pylint: disable=invalid-name
+    return 1 / (1 + np.exp(-x))
+
+
+class YOLOv5NativePostProcessor(RustPostProcessor):
     def __init__(self, anchors: npt.ArrayLike, class_names: Sequence[str]):
         """
-        native (RustProcessor): A native postprocessor. It has several information to decode: (xyxy,
-            confidence threshold, anchor_grid, stride, number of classes).
-        class_names (Sequence[str]): A list of class names.
+        Args:
+            anchors (npt.ArrayLike): A list of anchors.
+            class_names (Sequence[str]): A list of class names.
         """
         self.anchors = anchors
         self.class_names = class_names
         self.anchor_per_layer_count = anchors.shape[1]
-        self.native = native.yolov5.RustPostProcessor(anchors, _STRIDES)
+        self.native = native.yolo.RustPostProcessor(anchors, _STRIDES)
 
     def __call__(
         self,
         model_outputs: Sequence[np.ndarray],
         contexts: Sequence[Dict[str, Any]],
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
+        with_sigmoid: bool = False,
     ) -> List[List[ObjectDetectionResult]]:
         """Convert the outputs of this model to a list of bounding boxes, scores and labels
 
@@ -184,6 +191,8 @@ def __call__(
                 and height.
             conf_thres: Confidence score threshold. The default to 0.25
             iou_thres: IoU threshold value for the NMS processing. The default to 0.45.
+            with_sigmoid: Whether to apply sigmoid function to the model outputs. The default to
+                False.
 
         Returns:
             Detected Bounding Box and its score and label represented as `ObjectDetectionResult`.
@@ -203,7 +212,10 @@ def __call__(
             for f in model_outputs
         ]
 
-        batched_boxes = self.native.eval(model_outputs, conf_thres, iou_thres)
+        if with_sigmoid:
+            model_outputs = sigmoid(model_outputs)
+
+        batched_boxes = self.native.eval(model_outputs, conf_thres, iou_thres, None, None)
 
         batched_detected_boxes = []
         for boxes, preproc_params in zip(batched_boxes, contexts):
@@ -213,16 +225,18 @@ def __call__(
             # rescale boxes
 
             for box in boxes:
+                left, top, right, bottom, score, class_id = box
+                class_id = int(class_id)
                 detected_boxes.append(
                     ObjectDetectionResult(
-                        index=box.class_id,
-                        label=self.class_names[box.class_id],
-                        score=box.score,
+                        index=class_id,
+                        label=self.class_names[class_id],
+                        score=score,
                         boundingbox=LtrbBoundingBox(
-                            left=(box.left - padw) / scale,
-                            top=(box.top - padh) / scale,
-                            right=(box.right - padw) / scale,
-                            bottom=(box.bottom - padh) / scale,
+                            left=(left - padw) / scale,
+                            top=(top - padh) / scale,
+                            right=(right - padw) / scale,
+                            bottom=(bottom - padh) / scale,
                         ),
                     )
                 )
@@ -233,7 +247,8 @@ def __call__(
 
 class YOLOv5Base(ObjectDetectionModel, ABC):
     postprocessor_map: ClassVar[Dict[Platform, Type[PostProcessor]]] = {
-        Platform.RUST: YOLOv5PostProcessor,
+        Platform.PYTHON: YOLOv5PythonPostProcessor,
+        Platform.RUST: YOLOv5NativePostProcessor,
     }
 
     def __init__(self, *args, **kwargs):

diff --git a/furiosa/models/vision/yolov5/large.py b/furiosa/models/vision/yolov5/large.py
@@ -3,6 +3,7 @@
 Attributes:
     CLASSES (List[str]): a list of class names
 """
+
 import pathlib
 from typing import List, Union
 
@@ -24,9 +25,13 @@
 class YOLOv5l(YOLOv5Base):
     """YOLOv5 Large model"""
 
-    classes: List[str] = CLASSES
-
-    def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST):
+    def __init__(
+        self,
+        *,
+        postprocessor_type: Union[str, Platform] = Platform.RUST,
+        classes: List[str] = CLASSES,
+        anchors: np.array = _ANCHORS,
+    ):
         postprocessor_type = Platform(postprocessor_type)
         validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys())
         super().__init__(
@@ -35,7 +40,7 @@ def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST):
                 description="YOLOv5 large model",
                 publication=Publication(url="https://github.com/ultralytics/yolov5"),
             ),
-            postprocessor=self.postprocessor_map[postprocessor_type](_ANCHORS, CLASSES),
+            postprocessor=self.postprocessor_map[postprocessor_type](anchors, classes),
         )
 
         self._artifact_name = "yolov5l"
diff --git a/furiosa/models/vision/yolov5/medium.py b/furiosa/models/vision/yolov5/medium.py
@@ -3,6 +3,7 @@
 Attributes:
     CLASSES (List[str]): a list of class names
 """
+
 import pathlib
 from typing import List, Union
 
@@ -24,9 +25,13 @@
 class YOLOv5m(YOLOv5Base):
     """YOLOv5 Medium model"""
 
-    classes: List[str] = CLASSES
-
-    def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST):
+    def __init__(
+        self,
+        *,
+        postprocessor_type: Union[str, Platform] = Platform.RUST,
+        classes: List[str] = CLASSES,
+        anchors: np.array = _ANCHORS,
+    ):
         postprocessor_type = Platform(postprocessor_type)
         validate_postprocessor_type(postprocessor_type, self.postprocessor_map.keys())
         super().__init__(
@@ -35,7 +40,7 @@ def __init__(self, *, postprocessor_type: Union[str, Platform] = Platform.RUST):
                 description="YOLOv5 medium model",
                 publication=Publication(url="https://github.com/ultralytics/yolov5"),
             ),
-            postprocessor=self.postprocessor_map[postprocessor_type](_ANCHORS, CLASSES),
+            postprocessor=self.postprocessor_map[postprocessor_type](anchors, classes),
         )
 
         self._artifact_name = "yolov5m"