diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index eba23d0cd52..94a0433ae48 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -184,13 +184,13 @@ def load(self, device="cpu"): return args, kwargs -DEFAULT_SQUARE_IMAGE_SIZE = 15 -DEFAULT_LANDSCAPE_IMAGE_SIZE = (7, 33) -DEFAULT_PORTRAIT_IMAGE_SIZE = (31, 9) -DEFAULT_IMAGE_SIZES = (DEFAULT_LANDSCAPE_IMAGE_SIZE, DEFAULT_PORTRAIT_IMAGE_SIZE, DEFAULT_SQUARE_IMAGE_SIZE, "random") +DEFAULT_SQUARE_SPATIAL_SIZE = 15 +DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) +DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) +DEFAULT_SPATIAL_SIZES = (DEFAULT_LANDSCAPE_SPATIAL_SIZE, DEFAULT_PORTRAIT_SPATIAL_SIZE, DEFAULT_SQUARE_SPATIAL_SIZE, "random") -def _parse_image_size(size, *, name="size"): +def _parse_spatial_size(size, *, name="size"): if size == "random": return tuple(torch.randint(15, 33, (2,)).tolist()) elif isinstance(size, int) and size > 0: @@ -246,11 +246,11 @@ def load(self, device): @dataclasses.dataclass class ImageLoader(TensorLoader): color_space: features.ColorSpace - image_size: Tuple[int, int] = dataclasses.field(init=False) + spatial_size: Tuple[int, int] = dataclasses.field(init=False) num_channels: int = dataclasses.field(init=False) def __post_init__(self): - self.image_size = self.shape[-2:] + self.spatial_size = self.shape[-2:] self.num_channels = self.shape[-3] @@ -277,7 +277,7 @@ def make_image_loader( dtype=torch.float32, constant_alpha=True, ): - size = _parse_image_size(size) + size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) def fn(shape, dtype, device): @@ -295,7 +295,7 @@ def fn(shape, dtype, device): def make_image_loaders( *, - sizes=DEFAULT_IMAGE_SIZES, + sizes=DEFAULT_SPATIAL_SIZES, color_spaces=( features.ColorSpace.GRAY, features.ColorSpace.GRAY_ALPHA, @@ -316,7 +316,7 @@ def make_image_loaders( @dataclasses.dataclass class BoundingBoxLoader(TensorLoader): format: features.BoundingBoxFormat - image_size: Tuple[int, int] + spatial_size: Tuple[int, int] def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): @@ -331,7 +331,7 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): ).reshape(low.shape) -def make_bounding_box_loader(*, extra_dims=(), format, image_size="random", dtype=torch.float32): +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): if isinstance(format, str): format = features.BoundingBoxFormat[format] if format not in { @@ -341,7 +341,7 @@ def make_bounding_box_loader(*, extra_dims=(), format, image_size="random", dtyp }: raise pytest.UsageError(f"Can't make bounding box in format {format}") - image_size = _parse_image_size(image_size, name="image_size") + spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") def fn(shape, dtype, device): *extra_dims, num_coordinates = shape @@ -350,10 +350,10 @@ def fn(shape, dtype, device): if any(dim == 0 for dim in extra_dims): return features.BoundingBox( - torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=image_size + torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size ) - height, width = image_size + height, width = spatial_size if format == features.BoundingBoxFormat.XYXY: x1 = torch.randint(0, width // 2, extra_dims) @@ -375,10 +375,10 @@ def fn(shape, dtype, device): parts = (cx, cy, w, h) return features.BoundingBox( - torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=image_size + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size ) - return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, image_size=image_size) + return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) make_bounding_box = from_loader(make_bounding_box_loader) @@ -388,11 +388,11 @@ def make_bounding_box_loaders( *, extra_dims=DEFAULT_EXTRA_DIMS, formats=tuple(features.BoundingBoxFormat), - image_size="random", + spatial_size="random", dtypes=(torch.float32, torch.int64), ): for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): - yield make_bounding_box_loader(**params, image_size=image_size) + yield make_bounding_box_loader(**params, spatial_size=spatial_size) make_bounding_boxes = from_loaders(make_bounding_box_loaders) @@ -475,7 +475,7 @@ class MaskLoader(TensorLoader): def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects - size = _parse_image_size(size) + size = _parse_spatial_size(size) num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects def fn(shape, dtype, device): @@ -489,7 +489,7 @@ def fn(shape, dtype, device): def make_detection_mask_loaders( - sizes=DEFAULT_IMAGE_SIZES, + sizes=DEFAULT_SPATIAL_SIZES, num_objects=(1, 0, "random"), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), @@ -503,7 +503,7 @@ def make_detection_mask_loaders( def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_image_size(size) + size = _parse_spatial_size(size) num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories def fn(shape, dtype, device): @@ -518,7 +518,7 @@ def fn(shape, dtype, device): def make_segmentation_mask_loaders( *, - sizes=DEFAULT_IMAGE_SIZES, + sizes=DEFAULT_SPATIAL_SIZES, num_categories=(1, 2, "random"), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), @@ -532,7 +532,7 @@ def make_segmentation_mask_loaders( def make_mask_loaders( *, - sizes=DEFAULT_IMAGE_SIZES, + sizes=DEFAULT_SPATIAL_SIZES, num_objects=(1, 0, "random"), num_categories=(1, 2, "random"), extra_dims=DEFAULT_EXTRA_DIMS, @@ -559,7 +559,7 @@ def make_video_loader( extra_dims=(), dtype=torch.uint8, ): - size = _parse_image_size(size) + size = _parse_spatial_size(size) num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames def fn(shape, dtype, device): @@ -576,7 +576,7 @@ def fn(shape, dtype, device): def make_video_loaders( *, - sizes=DEFAULT_IMAGE_SIZES, + sizes=DEFAULT_SPATIAL_SIZES, color_spaces=( features.ColorSpace.GRAY, features.ColorSpace.RGB, diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py index 62721c90f4a..27832859f07 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/prototype_transforms_kernel_infos.py @@ -145,7 +145,7 @@ def sample_inputs_horizontal_flip_bounding_box(): formats=[features.BoundingBoxFormat.XYXY], dtypes=[torch.float32] ): yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.spatial_size + bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size ) @@ -185,9 +185,9 @@ def sample_inputs_horizontal_flip_video(): ) -def _get_resize_sizes(image_size): - height, width = image_size - length = max(image_size) +def _get_resize_sizes(spatial_size): + height, width = spatial_size + length = max(spatial_size) yield length yield [length] yield (length,) @@ -252,7 +252,7 @@ def reference_inputs_resize_image_tensor(): def sample_inputs_resize_bounding_box(): for bounding_box_loader in make_bounding_box_loaders(): for size in _get_resize_sizes(bounding_box_loader.spatial_size): - yield ArgsKwargs(bounding_box_loader, size=size, image_size=bounding_box_loader.spatial_size) + yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size) def sample_inputs_resize_mask(): @@ -394,7 +394,7 @@ def sample_inputs_affine_bounding_box(): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, - image_size=bounding_box_loader.spatial_size, + spatial_size=bounding_box_loader.spatial_size, **affine_params, ) @@ -422,9 +422,9 @@ def _compute_affine_matrix(angle, translate, scale, shear, center): return true_matrix -def reference_affine_bounding_box(bounding_box, *, format, image_size, angle, translate, scale, shear, center=None): +def reference_affine_bounding_box(bounding_box, *, format, spatial_size, angle, translate, scale, shear, center=None): if center is None: - center = [s * 0.5 for s in image_size[::-1]] + center = [s * 0.5 for s in spatial_size[::-1]] def transform(bbox): affine_matrix = _compute_affine_matrix(angle, translate, scale, shear, center) @@ -473,7 +473,7 @@ def reference_inputs_affine_bounding_box(): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, - image_size=bounding_box_loader.spatial_size, + spatial_size=bounding_box_loader.spatial_size, **affine_kwargs, ) @@ -650,7 +650,7 @@ def sample_inputs_vertical_flip_bounding_box(): formats=[features.BoundingBoxFormat.XYXY], dtypes=[torch.float32] ): yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.spatial_size + bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size ) @@ -729,7 +729,7 @@ def sample_inputs_rotate_bounding_box(): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, - image_size=bounding_box_loader.spatial_size, + spatial_size=bounding_box_loader.spatial_size, angle=_ROTATE_ANGLES[0], ) @@ -1001,7 +1001,7 @@ def sample_inputs_pad_bounding_box(): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, - image_size=bounding_box_loader.spatial_size, + spatial_size=bounding_box_loader.spatial_size, padding=padding, padding_mode="constant", ) @@ -1131,8 +1131,8 @@ def sample_inputs_perspective_video(): ) -def _get_elastic_displacement(image_size): - return torch.rand(1, *image_size, 2) +def _get_elastic_displacement(spatial_size): + return torch.rand(1, *spatial_size, 2) def sample_inputs_elastic_image_tensor(): @@ -1212,7 +1212,7 @@ def sample_inputs_elastic_video(): ) -_CENTER_CROP_IMAGE_SIZES = [(16, 16), (7, 33), (31, 9)] +_CENTER_CROP_SPATIAL_SIZES = [(16, 16), (7, 33), (31, 9)] _CENTER_CROP_OUTPUT_SIZES = [[4, 3], [42, 70], [4], 3, (5, 2), (6,)] @@ -1231,7 +1231,7 @@ def sample_inputs_center_crop_image_tensor(): def reference_inputs_center_crop_image_tensor(): for image_loader, output_size in itertools.product( - make_image_loaders(sizes=_CENTER_CROP_IMAGE_SIZES, extra_dims=[()]), _CENTER_CROP_OUTPUT_SIZES + make_image_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()]), _CENTER_CROP_OUTPUT_SIZES ): yield ArgsKwargs(image_loader, output_size=output_size) @@ -1241,7 +1241,7 @@ def sample_inputs_center_crop_bounding_box(): yield ArgsKwargs( bounding_box_loader, format=bounding_box_loader.format, - image_size=bounding_box_loader.spatial_size, + spatial_size=bounding_box_loader.spatial_size, output_size=output_size, ) @@ -1254,7 +1254,7 @@ def sample_inputs_center_crop_mask(): def reference_inputs_center_crop_mask(): for mask_loader, output_size in itertools.product( - make_mask_loaders(sizes=_CENTER_CROP_IMAGE_SIZES, extra_dims=[()], num_objects=[1]), _CENTER_CROP_OUTPUT_SIZES + make_mask_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], num_objects=[1]), _CENTER_CROP_OUTPUT_SIZES ): yield ArgsKwargs(mask_loader, output_size=output_size) @@ -1820,7 +1820,7 @@ def sample_inputs_adjust_saturation_video(): def sample_inputs_clamp_bounding_box(): for bounding_box_loader in make_bounding_box_loaders(): yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.spatial_size + bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size ) @@ -1834,7 +1834,7 @@ def sample_inputs_clamp_bounding_box(): _FIVE_TEN_CROP_SIZES = [7, (6,), [5], (6, 5), [7, 6]] -def _get_five_ten_crop_image_size(size): +def _get_five_ten_crop_spatial_size(size): if isinstance(size, int): crop_height = crop_width = size elif len(size) == 1: @@ -1847,28 +1847,28 @@ def _get_five_ten_crop_image_size(size): def sample_inputs_five_crop_image_tensor(): for size in _FIVE_TEN_CROP_SIZES: for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_image_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + sizes=[_get_five_ten_crop_spatial_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] ): yield ArgsKwargs(image_loader, size=size) def reference_inputs_five_crop_image_tensor(): for size in _FIVE_TEN_CROP_SIZES: - for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_image_size(size)], extra_dims=[()]): + for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()]): yield ArgsKwargs(image_loader, size=size) def sample_inputs_ten_crop_image_tensor(): for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]): for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_image_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] + sizes=[_get_five_ten_crop_spatial_size(size)], color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32] ): yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip) def reference_inputs_ten_crop_image_tensor(): for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]): - for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_image_size(size)], extra_dims=[()]): + for image_loader in make_image_loaders(sizes=[_get_five_ten_crop_spatial_size(size)], extra_dims=[()]): yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index d5e49078259..2c095fa6e81 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -559,7 +559,7 @@ def test__transform(self, degrees, expand, fill, center, mocker): @pytest.mark.parametrize("angle", [34, -87]) @pytest.mark.parametrize("expand", [False, True]) - def test_boundingbox_image_size(self, angle, expand): + def test_boundingbox_spatial_size(self, angle, expand): # Specific test for BoundingBox.rotate bbox = features.BoundingBox( torch.tensor([1, 2, 3, 4]), format=features.BoundingBoxFormat.XYXY, spatial_size=(32, 32) @@ -1281,7 +1281,7 @@ def test__transform(self, mocker): transform = transforms.RandomIoUCrop() image = features.Image(torch.rand(3, 32, 24)) - bboxes = make_bounding_box(format="XYXY", image_size=(32, 24), extra_dims=(6,)) + bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) label = features.Label(torch.randint(0, 10, size=(6,))) ohe_label = features.OneHotLabel(torch.zeros(6, 10).scatter_(1, label.unsqueeze(1), 1)) masks = make_detection_mask((32, 24), num_objects=6) @@ -1329,12 +1329,12 @@ def test__transform(self, mocker): class TestScaleJitter: def test__get_params(self, mocker): - image_size = (24, 32) + spatial_size = (24, 32) target_size = (16, 12) scale_range = (0.5, 1.5) transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range) - sample = mocker.MagicMock(spec=features.Image, num_channels=3, image_size=image_size) + sample = mocker.MagicMock(spec=features.Image, num_channels=3, spatial_size=spatial_size) n_samples = 5 for _ in range(n_samples): @@ -1347,11 +1347,11 @@ def test__get_params(self, mocker): assert isinstance(size, tuple) and len(size) == 2 height, width = size - r_min = min(target_size[1] / image_size[0], target_size[0] / image_size[1]) * scale_range[0] - r_max = min(target_size[1] / image_size[0], target_size[0] / image_size[1]) * scale_range[1] + r_min = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[0] + r_max = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[1] - assert int(image_size[0] * r_min) <= height <= int(image_size[0] * r_max) - assert int(image_size[1] * r_min) <= width <= int(image_size[1] * r_max) + assert int(spatial_size[0] * r_min) <= height <= int(spatial_size[0] * r_max) + assert int(spatial_size[1] * r_min) <= width <= int(spatial_size[1] * r_max) def test__transform(self, mocker): interpolation_sentinel = mocker.MagicMock() @@ -1379,13 +1379,13 @@ def test__transform(self, mocker): class TestRandomShortestSize: def test__get_params(self, mocker): - image_size = (3, 10) + spatial_size = (3, 10) min_size = [5, 9] max_size = 20 transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size) - sample = mocker.MagicMock(spec=features.Image, num_channels=3, image_size=image_size) + sample = mocker.MagicMock(spec=features.Image, num_channels=3, spatial_size=spatial_size) params = transform._get_params(sample) assert "size" in params @@ -1550,14 +1550,14 @@ class TestFixedSizeCrop: def test__get_params(self, mocker): crop_size = (7, 7) batch_shape = (10,) - image_size = (11, 5) + spatial_size = (11, 5) transform = transforms.FixedSizeCrop(size=crop_size) sample = dict( - image=make_image(size=image_size, color_space=features.ColorSpace.RGB), + image=make_image(size=spatial_size, color_space=features.ColorSpace.RGB), bounding_boxes=make_bounding_box( - format=features.BoundingBoxFormat.XYXY, image_size=image_size, extra_dims=batch_shape + format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape ), ) params = transform._get_params(sample) @@ -1638,7 +1638,7 @@ def test__transform(self, mocker, needs): def test__transform_culling(self, mocker): batch_size = 10 - image_size = (10, 10) + spatial_size = (10, 10) is_valid = torch.randint(0, 2, (batch_size,), dtype=torch.bool) mocker.patch( @@ -1647,17 +1647,17 @@ def test__transform_culling(self, mocker): needs_crop=True, top=0, left=0, - height=image_size[0], - width=image_size[1], + height=spatial_size[0], + width=spatial_size[1], is_valid=is_valid, needs_pad=False, ), ) bounding_boxes = make_bounding_box( - format=features.BoundingBoxFormat.XYXY, image_size=image_size, extra_dims=(batch_size,) + format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) ) - masks = make_detection_mask(size=image_size, extra_dims=(batch_size,)) + masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -1678,7 +1678,7 @@ def test__transform_culling(self, mocker): def test__transform_bounding_box_clamping(self, mocker): batch_size = 3 - image_size = (10, 10) + spatial_size = (10, 10) mocker.patch( "torchvision.prototype.transforms._geometry.FixedSizeCrop._get_params", @@ -1686,15 +1686,15 @@ def test__transform_bounding_box_clamping(self, mocker): needs_crop=True, top=0, left=0, - height=image_size[0], - width=image_size[1], + height=spatial_size[0], + width=spatial_size[1], is_valid=torch.full((batch_size,), fill_value=True), needs_pad=False, ), ) bounding_box = make_bounding_box( - format=features.BoundingBoxFormat.XYXY, image_size=image_size, extra_dims=(batch_size,) + format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) ) mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box") diff --git a/test/test_prototype_transforms_consistency.py b/test/test_prototype_transforms_consistency.py index c8debe1e293..8d5d05dc142 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_prototype_transforms_consistency.py @@ -871,7 +871,7 @@ def make_datapoints(self, with_mask=True): pil_image = to_image_pil(make_image(size=size, color_space=features.ColorSpace.RGB)) target = { - "boxes": make_bounding_box(image_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -881,7 +881,7 @@ def make_datapoints(self, with_mask=True): tensor_image = torch.Tensor(make_image(size=size, color_space=features.ColorSpace.RGB)) target = { - "boxes": make_bounding_box(image_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -891,7 +891,7 @@ def make_datapoints(self, with_mask=True): feature_image = make_image(size=size, color_space=features.ColorSpace.RGB) target = { - "boxes": make_bounding_box(image_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index b1b06b6288b..be34547b618 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -333,16 +333,16 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): @pytest.mark.parametrize("device", cpu_and_gpu()) def test_correctness_affine_bounding_box_on_fixed_input(device): # Check transformation against known expected output - image_size = (64, 64) + spatial_size = (64, 64) # xyxy format in_boxes = [ [20, 25, 35, 45], [50, 5, 70, 22], - [image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10], + [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], [1, 1, 5, 5], ] in_boxes = features.BoundingBox( - in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=image_size, dtype=torch.float64, device=device + in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, dtype=torch.float64, device=device ) # Tested parameters angle = 63 @@ -355,9 +355,9 @@ def test_correctness_affine_bounding_box_on_fixed_input(device): # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox # expected_bboxes = [] # for in_box in in_boxes: - # n_in_box = normalize_bbox(in_box, *image_size) - # n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *image_size) - # out_box = denormalize_bbox(n_out_box, *image_size) + # n_in_box = normalize_bbox(in_box, *spatial_size) + # n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *spatial_size) + # out_box = denormalize_bbox(n_out_box, *spatial_size) # expected_bboxes.append(out_box) expected_bboxes = [ (24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695), @@ -371,7 +371,7 @@ def test_correctness_affine_bounding_box_on_fixed_input(device): in_boxes.format, in_boxes.spatial_size, angle, - (dx * image_size[1], dy * image_size[0]), + (dx * spatial_size[1], dy * spatial_size[0]), scale, shear=(0, 0), ) @@ -455,16 +455,16 @@ def _compute_expected_bbox(bbox, angle_, expand_, center_): (height, width), ) - image_size = (32, 38) + spatial_size = (32, 38) - for bboxes in make_bounding_boxes(image_size=image_size, extra_dims=((4,),)): + for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)): bboxes_format = bboxes.format - bboxes_image_size = bboxes.spatial_size + bboxes_spatial_size = bboxes.spatial_size - output_bboxes, output_image_size = F.rotate_bounding_box( + output_bboxes, output_spatial_size = F.rotate_bounding_box( bboxes, bboxes_format, - image_size=bboxes_image_size, + spatial_size=bboxes_spatial_size, angle=angle, expand=expand, center=center, @@ -472,38 +472,38 @@ def _compute_expected_bbox(bbox, angle_, expand_, center_): center_ = center if center_ is None: - center_ = [s * 0.5 for s in bboxes_image_size[::-1]] + center_ = [s * 0.5 for s in bboxes_spatial_size[::-1]] if bboxes.ndim < 2: bboxes = [bboxes] expected_bboxes = [] for bbox in bboxes: - bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_image_size) - expected_bbox, expected_image_size = _compute_expected_bbox(bbox, -angle, expand, center_) + bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) + expected_bbox, expected_spatial_size = _compute_expected_bbox(bbox, -angle, expand, center_) expected_bboxes.append(expected_bbox) if len(expected_bboxes) > 1: expected_bboxes = torch.stack(expected_bboxes) else: expected_bboxes = expected_bboxes[0] torch.testing.assert_close(output_bboxes, expected_bboxes, atol=1, rtol=0) - torch.testing.assert_close(output_image_size, expected_image_size, atol=1, rtol=0) + torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0) @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("expand", [False]) # expand=True does not match D2 def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): # Check transformation against known expected output - image_size = (64, 64) + spatial_size = (64, 64) # xyxy format in_boxes = [ [1, 1, 5, 5], - [1, image_size[0] - 6, 5, image_size[0] - 2], - [image_size[1] - 6, image_size[0] - 6, image_size[1] - 2, image_size[0] - 2], - [image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10], + [1, spatial_size[0] - 6, 5, spatial_size[0] - 2], + [spatial_size[1] - 6, spatial_size[0] - 6, spatial_size[1] - 2, spatial_size[0] - 2], + [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], ] in_boxes = features.BoundingBox( - in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=image_size, dtype=torch.float64, device=device + in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, dtype=torch.float64, device=device ) # Tested parameters angle = 45 @@ -597,7 +597,7 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width, if format != features.BoundingBoxFormat.XYXY: in_boxes = convert_format_bounding_box(in_boxes, features.BoundingBoxFormat.XYXY, format) - output_boxes, output_image_size = F.crop_bounding_box( + output_boxes, output_spatial_size = F.crop_bounding_box( in_boxes, format, top, @@ -610,7 +610,7 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width, output_boxes = convert_format_bounding_box(output_boxes, format, features.BoundingBoxFormat.XYXY) torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - torch.testing.assert_close(output_image_size, size) + torch.testing.assert_close(output_spatial_size, size) @pytest.mark.parametrize("device", cpu_and_gpu()) @@ -658,7 +658,7 @@ def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_): bbox[3] = (bbox[3] - top_) * size_[0] / height_ return bbox - image_size = (100, 100) + spatial_size = (100, 100) # xyxy format in_boxes = [ [10.0, 10.0, 20.0, 20.0], @@ -670,18 +670,18 @@ def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_): expected_bboxes = torch.tensor(expected_bboxes, device=device) in_boxes = features.BoundingBox( - in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=image_size, device=device + in_boxes, format=features.BoundingBoxFormat.XYXY, spatial_size=spatial_size, device=device ) if format != features.BoundingBoxFormat.XYXY: in_boxes = convert_format_bounding_box(in_boxes, features.BoundingBoxFormat.XYXY, format) - output_boxes, output_image_size = F.resized_crop_bounding_box(in_boxes, format, top, left, height, width, size) + output_boxes, output_spatial_size = F.resized_crop_bounding_box(in_boxes, format, top, left, height, width, size) if format != features.BoundingBoxFormat.XYXY: output_boxes = convert_format_bounding_box(output_boxes, format, features.BoundingBoxFormat.XYXY) torch.testing.assert_close(output_boxes, expected_bboxes) - torch.testing.assert_close(output_image_size, size) + torch.testing.assert_close(output_spatial_size, size) def _parse_padding(padding): @@ -718,7 +718,7 @@ def _compute_expected_bbox(bbox, padding_): bbox = bbox.to(bbox_dtype) return bbox - def _compute_expected_image_size(bbox, padding_): + def _compute_expected_spatial_size(bbox, padding_): pad_left, pad_up, pad_right, pad_down = _parse_padding(padding_) height, width = bbox.spatial_size return height + pad_up + pad_down, width + pad_left + pad_right @@ -726,20 +726,20 @@ def _compute_expected_image_size(bbox, padding_): for bboxes in make_bounding_boxes(): bboxes = bboxes.to(device) bboxes_format = bboxes.format - bboxes_image_size = bboxes.spatial_size + bboxes_spatial_size = bboxes.spatial_size - output_boxes, output_image_size = F.pad_bounding_box( - bboxes, format=bboxes_format, image_size=bboxes_image_size, padding=padding + output_boxes, output_spatial_size = F.pad_bounding_box( + bboxes, format=bboxes_format, spatial_size=bboxes_spatial_size, padding=padding ) - torch.testing.assert_close(output_image_size, _compute_expected_image_size(bboxes, padding)) + torch.testing.assert_close(output_spatial_size, _compute_expected_spatial_size(bboxes, padding)) if bboxes.ndim < 2 or bboxes.shape[0] == 0: bboxes = [bboxes] expected_bboxes = [] for bbox in bboxes: - bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_image_size) + bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) expected_bboxes.append(_compute_expected_bbox(bbox, padding)) if len(expected_bboxes) > 1: @@ -815,15 +815,15 @@ def _compute_expected_bbox(bbox, pcoeffs_): out_bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox.format, copy=False ) - image_size = (32, 38) + spatial_size = (32, 38) pcoeffs = _get_perspective_coeffs(startpoints, endpoints) inv_pcoeffs = _get_perspective_coeffs(endpoints, startpoints) - for bboxes in make_bounding_boxes(image_size=image_size, extra_dims=((4,),)): + for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)): bboxes = bboxes.to(device) bboxes_format = bboxes.format - bboxes_image_size = bboxes.spatial_size + bboxes_spatial_size = bboxes.spatial_size output_bboxes = F.perspective_bounding_box( bboxes, @@ -836,7 +836,7 @@ def _compute_expected_bbox(bbox, pcoeffs_): expected_bboxes = [] for bbox in bboxes: - bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_image_size) + bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) expected_bboxes.append(_compute_expected_bbox(bbox, inv_pcoeffs)) if len(expected_bboxes) > 1: expected_bboxes = torch.stack(expected_bboxes) @@ -853,14 +853,14 @@ def _compute_expected_bbox(bbox, pcoeffs_): def test_correctness_center_crop_bounding_box(device, output_size): def _compute_expected_bbox(bbox, output_size_): format_ = bbox.format - image_size_ = bbox.spatial_size + spatial_size_ = bbox.spatial_size bbox = convert_format_bounding_box(bbox, format_, features.BoundingBoxFormat.XYWH) if len(output_size_) == 1: output_size_.append(output_size_[-1]) - cy = int(round((image_size_[0] - output_size_[0]) * 0.5)) - cx = int(round((image_size_[1] - output_size_[1]) * 0.5)) + cy = int(round((spatial_size_[0] - output_size_[0]) * 0.5)) + cx = int(round((spatial_size_[1] - output_size_[1]) * 0.5)) out_bbox = [ bbox[0].item() - cx, bbox[1].item() - cy, @@ -879,10 +879,10 @@ def _compute_expected_bbox(bbox, output_size_): for bboxes in make_bounding_boxes(extra_dims=((4,),)): bboxes = bboxes.to(device) bboxes_format = bboxes.format - bboxes_image_size = bboxes.spatial_size + bboxes_spatial_size = bboxes.spatial_size - output_boxes, output_image_size = F.center_crop_bounding_box( - bboxes, bboxes_format, bboxes_image_size, output_size + output_boxes, output_spatial_size = F.center_crop_bounding_box( + bboxes, bboxes_format, bboxes_spatial_size, output_size ) if bboxes.ndim < 2: @@ -890,7 +890,7 @@ def _compute_expected_bbox(bbox, output_size_): expected_bboxes = [] for bbox in bboxes: - bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_image_size) + bbox = features.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) expected_bboxes.append(_compute_expected_bbox(bbox, output_size)) if len(expected_bboxes) > 1: @@ -898,7 +898,7 @@ def _compute_expected_bbox(bbox, output_size_): else: expected_bboxes = expected_bboxes[0] torch.testing.assert_close(output_boxes, expected_bboxes) - torch.testing.assert_close(output_image_size, output_size) + torch.testing.assert_close(output_spatial_size, output_size) @pytest.mark.parametrize("device", cpu_and_gpu()) @@ -926,11 +926,11 @@ def _compute_expected_mask(mask, output_size): # Copied from test/test_functional_tensor.py @pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize("image_size", ("small", "large")) +@pytest.mark.parametrize("spatial_size", ("small", "large")) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)]) @pytest.mark.parametrize("sigma", [[0.5, 0.5], (0.5, 0.5), (0.8, 0.8), (1.7, 1.7)]) -def test_correctness_gaussian_blur_image_tensor(device, image_size, dt, ksize, sigma): +def test_correctness_gaussian_blur_image_tensor(device, spatial_size, dt, ksize, sigma): fn = F.gaussian_blur_image_tensor # true_cv2_results = { @@ -950,7 +950,7 @@ def test_correctness_gaussian_blur_image_tensor(device, image_size, dt, ksize, s p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "gaussian_blur_opencv_results.pt") true_cv2_results = torch.load(p) - if image_size == "small": + if spatial_size == "small": tensor = ( torch.from_numpy(np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3))).permute(2, 0, 1).to(device) ) diff --git a/test/test_prototype_transforms_utils.py b/test/test_prototype_transforms_utils.py index 5301559999a..3d5960c9625 100644 --- a/test/test_prototype_transforms_utils.py +++ b/test/test_prototype_transforms_utils.py @@ -11,7 +11,7 @@ IMAGE = make_image(color_space=features.ColorSpace.RGB) -BOUNDING_BOX = make_bounding_box(format=features.BoundingBoxFormat.XYXY, image_size=IMAGE.spatial_size) +BOUNDING_BOX = make_bounding_box(format=features.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size) MASK = make_detection_mask(size=IMAGE.spatial_size)