From 246c31e9aebf816c3f18f105fbf67986a01169d2 Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Wed, 1 Nov 2023 13:47:00 +0200 Subject: [PATCH 1/3] Fixed bug in _pad_image that did not support pad_value=(R,B,G) input --- .../training/transforms/utils.py | 24 +++++++++++--- tests/unit_tests/transforms_test.py | 31 ++++++++++++++++++- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/super_gradients/training/transforms/utils.py b/src/super_gradients/training/transforms/utils.py index 617361e465..7c972ae30b 100644 --- a/src/super_gradients/training/transforms/utils.py +++ b/src/super_gradients/training/transforms/utils.py @@ -1,4 +1,6 @@ -from typing import Tuple +import numbers +import typing +from typing import Tuple, Union from dataclasses import dataclass import cv2 @@ -89,20 +91,34 @@ def _get_bottom_right_padding_coordinates(input_shape: Tuple[int, int], output_s return PaddingCoordinates(top=0, bottom=pad_height, left=0, right=pad_width) -def _pad_image(image: np.ndarray, padding_coordinates: PaddingCoordinates, pad_value: int) -> np.ndarray: +def _pad_image(image: np.ndarray, padding_coordinates: PaddingCoordinates, pad_value: Union[int, Tuple[int, ...]]) -> np.ndarray: """Pad an image. :param image: Image to shift. (H, W, C) or (H, W). :param pad_h: Tuple of (padding_top, padding_bottom). :param pad_w: Tuple of (padding_left, padding_right). - :param pad_value: Padding value + :param pad_value: Padding value. Can be a single scalar (Same value for all channels) or a tuple of values. + In the latter case, the tuple length must be equal to the number of channels. :return: Image shifted according to padding coordinates. """ pad_h = (padding_coordinates.top, padding_coordinates.bottom) pad_w = (padding_coordinates.left, padding_coordinates.right) if len(image.shape) == 3: - return np.pad(image, (pad_h, pad_w, (0, 0)), "constant", constant_values=pad_value) + _, _, num_channels = image.shape + + if isinstance(pad_value, numbers.Number): + pad_value = tuple([pad_value] * num_channels) + else: + if isinstance(pad_value, typing.Sized) and len(pad_value) != num_channels: + raise ValueError(f"A pad_value tuple ({pad_value} length should be {num_channels} for an image with {num_channels} channels") + + pad_value = tuple(pad_value) + + padded_channels = [] + for channel_index, pad_value_channel in enumerate(pad_value): + padded_channels.append(np.pad(image[..., channel_index], (pad_h, pad_w), "constant", constant_values=pad_value_channel)) + return np.stack(padded_channels, axis=-1) else: return np.pad(image, (pad_h, pad_w), "constant", constant_values=pad_value) diff --git a/tests/unit_tests/transforms_test.py b/tests/unit_tests/transforms_test.py index 281fc26860..b3c2173ae7 100644 --- a/tests/unit_tests/transforms_test.py +++ b/tests/unit_tests/transforms_test.py @@ -3,6 +3,7 @@ import cv2 import matplotlib.pyplot as plt import numpy as np +from omegaconf import ListConfig from super_gradients.training.transforms import KeypointsMixup, KeypointsCompose from super_gradients.training.transforms.keypoint_transforms import ( @@ -244,7 +245,7 @@ def test_rescale_bboxes(self): rescaled_bboxes = _rescale_bboxes(targets=bboxes, scale_factors=(sy, sx)) np.testing.assert_array_equal(rescaled_bboxes, expected_bboxes) - def test_pad_image(self): + def test_pad_image_with_constant(self): image = np.random.randint(0, 256, size=(640, 480, 3), dtype=np.uint8) padding_coordinates = PaddingCoordinates(top=80, bottom=80, left=60, right=60) pad_value = 0 @@ -258,6 +259,34 @@ def test_pad_image(self): self.assertTrue((shifted_image[:, : padding_coordinates.left, :] == pad_value).all()) self.assertTrue((shifted_image[:, -padding_coordinates.right :, :] == pad_value).all()) + def test_pad_image_with_tuple(self): + image = np.random.randint(0, 256, size=(640, 480, 3), dtype=np.uint8) + padding_coordinates = PaddingCoordinates(top=80, bottom=80, left=60, right=60) + pad_value = (1, 2, 3) + shifted_image = _pad_image(image, padding_coordinates, pad_value) + + # Check if the shifted image has the correct shape + self.assertEqual(shifted_image.shape, (800, 600, 3)) + # Check if the padding values are correct + self.assertTrue((shifted_image[: padding_coordinates.top, :, :] == pad_value).all()) + self.assertTrue((shifted_image[-padding_coordinates.bottom :, :, :] == pad_value).all()) + self.assertTrue((shifted_image[:, : padding_coordinates.left, :] == pad_value).all()) + self.assertTrue((shifted_image[:, -padding_coordinates.right :, :] == pad_value).all()) + + def test_pad_image_with_listconfig(self): + image = np.random.randint(0, 256, size=(640, 480, 3), dtype=np.uint8) + padding_coordinates = PaddingCoordinates(top=80, bottom=80, left=60, right=60) + pad_value = ListConfig([1, 2, 3]) + shifted_image = _pad_image(image, padding_coordinates, pad_value) + + # Check if the shifted image has the correct shape + self.assertEqual(shifted_image.shape, (800, 600, 3)) + # Check if the padding values are correct + self.assertTrue((shifted_image[: padding_coordinates.top, :, :] == pad_value).all()) + self.assertTrue((shifted_image[-padding_coordinates.bottom :, :, :] == pad_value).all()) + self.assertTrue((shifted_image[:, : padding_coordinates.left, :] == pad_value).all()) + self.assertTrue((shifted_image[:, -padding_coordinates.right :, :] == pad_value).all()) + def test_shift_bboxes(self): bboxes = np.array([[10, 20, 50, 60, 1], [30, 40, 80, 90, 2]], dtype=np.float32) shift_w, shift_h = 60, 80 From 0ba650f2b2977a617529c584e08aebcae1b9ad07 Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Thu, 2 Nov 2023 13:42:15 +0200 Subject: [PATCH 2/3] Added checking for pad_value when input is image of HW shape --- src/super_gradients/training/transforms/utils.py | 10 ++++++++++ tests/unit_tests/transforms_test.py | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/super_gradients/training/transforms/utils.py b/src/super_gradients/training/transforms/utils.py index 7c972ae30b..175a63b01d 100644 --- a/src/super_gradients/training/transforms/utils.py +++ b/src/super_gradients/training/transforms/utils.py @@ -120,6 +120,16 @@ def _pad_image(image: np.ndarray, padding_coordinates: PaddingCoordinates, pad_v padded_channels.append(np.pad(image[..., channel_index], (pad_h, pad_w), "constant", constant_values=pad_value_channel)) return np.stack(padded_channels, axis=-1) else: + if isinstance(pad_value, numbers.Number): + pass + elif isinstance(pad_value, typing.Sized): + if len(pad_value) != 1: + raise ValueError(f"A pad_value tuple ({pad_value} length should be 1 for a grayscale image") + else: + (pad_value,) = pad_value # Unpack to a single scalar + else: + raise ValueError(f"Unsupported pad_value type {type(pad_value)}") + return np.pad(image, (pad_h, pad_w), "constant", constant_values=pad_value) diff --git a/tests/unit_tests/transforms_test.py b/tests/unit_tests/transforms_test.py index b3c2173ae7..d8a73dbade 100644 --- a/tests/unit_tests/transforms_test.py +++ b/tests/unit_tests/transforms_test.py @@ -287,6 +287,20 @@ def test_pad_image_with_listconfig(self): self.assertTrue((shifted_image[:, : padding_coordinates.left, :] == pad_value).all()) self.assertTrue((shifted_image[:, -padding_coordinates.right :, :] == pad_value).all()) + def test_pad_grayscale_image(self): + image = np.random.randint(0, 256, size=(640, 480), dtype=np.uint8) + padding_coordinates = PaddingCoordinates(top=80, bottom=80, left=60, right=60) + pad_value = 1 + shifted_image = _pad_image(image, padding_coordinates, pad_value) + + # Check if the shifted image has the correct shape + self.assertEqual(shifted_image.shape, (800, 600)) + # Check if the padding values are correct + self.assertTrue((shifted_image[: padding_coordinates.top, :] == pad_value).all()) + self.assertTrue((shifted_image[-padding_coordinates.bottom :, :] == pad_value).all()) + self.assertTrue((shifted_image[:, : padding_coordinates.left] == pad_value).all()) + self.assertTrue((shifted_image[:, -padding_coordinates.right :] == pad_value).all()) + def test_shift_bboxes(self): bboxes = np.array([[10, 20, 50, 60, 1], [30, 40, 80, 90, 2]], dtype=np.float32) shift_w, shift_h = 60, 80 From a382457af54cd0a765087032f7c7c8dcafe1f566 Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Sun, 5 Nov 2023 14:12:35 +0200 Subject: [PATCH 3/3] More efficient padding implementation --- src/super_gradients/training/transforms/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/super_gradients/training/transforms/utils.py b/src/super_gradients/training/transforms/utils.py index 175a63b01d..3c91725ff0 100644 --- a/src/super_gradients/training/transforms/utils.py +++ b/src/super_gradients/training/transforms/utils.py @@ -115,10 +115,8 @@ def _pad_image(image: np.ndarray, padding_coordinates: PaddingCoordinates, pad_v pad_value = tuple(pad_value) - padded_channels = [] - for channel_index, pad_value_channel in enumerate(pad_value): - padded_channels.append(np.pad(image[..., channel_index], (pad_h, pad_w), "constant", constant_values=pad_value_channel)) - return np.stack(padded_channels, axis=-1) + constant_values = ((pad_value, pad_value), (pad_value, pad_value), (0, 0)) + padding_values = (pad_h, pad_w, (0, 0)) else: if isinstance(pad_value, numbers.Number): pass @@ -130,7 +128,10 @@ def _pad_image(image: np.ndarray, padding_coordinates: PaddingCoordinates, pad_v else: raise ValueError(f"Unsupported pad_value type {type(pad_value)}") - return np.pad(image, (pad_h, pad_w), "constant", constant_values=pad_value) + constant_values = pad_value + padding_values = (pad_h, pad_w) + + return np.pad(image, pad_width=padding_values, mode="constant", constant_values=constant_values) def _shift_bboxes(targets: np.array, shift_w: float, shift_h: float) -> np.array: