From 2583987b24cc98dd7b691d93b65aeb507ac6599a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 13:16:03 +0200 Subject: [PATCH 01/17] extract make_* functions out of make_*_loader --- test/common_utils.py | 46 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index abefd07c43d..28d36855d63 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -492,6 +492,34 @@ def get_num_channels(color_space): return num_channels +def make_image( + spatial_size, + *, + color_space="RGB", + batch_dims=(), + dtype=torch.float32, + device="cpu", + constant_alpha=True, + memory_format=torch.contiguous_format, +): + spatial_size = _parse_spatial_size(spatial_size) + num_channels = get_num_channels(color_space) + max_value = get_max_value(dtype) + + data = torch.testing.make_tensor( + (*batch_dims, num_channels, *spatial_size), + low=0, + high=max_value, + dtype=dtype, + device=device, + memory_format=memory_format, + ) + if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: + data[..., -1, :, :] = max_value + + return datapoints.Image(data) + + def make_image_loader( size="random", *, @@ -505,20 +533,20 @@ def make_image_loader( num_channels = get_num_channels(color_space) def fn(shape, dtype, device, memory_format): - max_value = get_max_value(dtype) - data = torch.testing.make_tensor( - shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format + *batch_dims, _, spatial_size = shape + return make_image( + spatial_size, + color_space=color_space, + batch_dims=batch_dims, + dtype=dtype, + device=device, + constant_alpha=constant_alpha, + memory_format=memory_format, ) - if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: - data[..., -1, :, :] = max_value - return datapoints.Image(data) return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format) -make_image = from_loader(make_image_loader) - - def make_image_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, From 093e6d6d58c5a971ce5b732c776313f36879f1c5 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 13:35:42 +0200 Subject: [PATCH 02/17] fix arg name --- test/common_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 28d36855d63..cf7c8623ace 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -493,7 +493,7 @@ def get_num_channels(color_space): def make_image( - spatial_size, + size, *, color_space="RGB", batch_dims=(), @@ -502,7 +502,7 @@ def make_image( constant_alpha=True, memory_format=torch.contiguous_format, ): - spatial_size = _parse_spatial_size(spatial_size) + spatial_size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) max_value = get_max_value(dtype) From c1850ac64b7b621de1e53e762b6ed6a3956f6e5f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 13:59:50 +0200 Subject: [PATCH 03/17] port remaining helpers --- test/common_utils.py | 149 ++++++++++++++++++++++++++----------------- 1 file changed, 91 insertions(+), 58 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index cf7c8623ace..f6cea90c3b0 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -496,10 +496,10 @@ def make_image( size, *, color_space="RGB", + constant_alpha=True, batch_dims=(), dtype=torch.float32, device="cpu", - constant_alpha=True, memory_format=torch.contiguous_format, ): spatial_size = _parse_spatial_size(size) @@ -629,59 +629,64 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): ).reshape(low.shape) +def make_bounding_box( + format=datapoints.BoundingBoxFormat.XYXY, spatial_size="random", batch_dims=(), dtype=torch.float32, device="cpu" +): + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + + spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") + + if any(dim == 0 for dim in batch_dims): + return datapoints.BoundingBox( + torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + height, width = spatial_size + if format == datapoints.BoundingBoxFormat.XYXY: + x1 = torch.randint(0, width // 2, batch_dims) + y1 = torch.randint(0, height // 2, batch_dims) + x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 + y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 + parts = (x1, y1, x2, y2) + elif format == datapoints.BoundingBoxFormat.XYWH: + x = torch.randint(0, width // 2, batch_dims) + y = torch.randint(0, height // 2, batch_dims) + w = randint_with_tensor_bounds(1, width - x) + h = randint_with_tensor_bounds(1, height - y) + parts = (x, y, w, h) + elif format == datapoints.BoundingBoxFormat.CXCYWH: + cx = torch.randint(1, width - 1, batch_dims) + cy = torch.randint(1, height - 1, batch_dims) + w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) + h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) + parts = (cx, cy, w, h) + else: + raise ValueError(f"Can't make bounding box in format {format}") + + return datapoints.BoundingBox( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] - if format not in { - datapoints.BoundingBoxFormat.XYXY, - datapoints.BoundingBoxFormat.XYWH, - datapoints.BoundingBoxFormat.CXCYWH, - }: - raise pytest.UsageError(f"Can't make bounding box in format {format}") spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") def fn(shape, dtype, device): - *extra_dims, num_coordinates = shape + *batch_dims, num_coordinates = shape if num_coordinates != 4: raise pytest.UsageError() - if any(dim == 0 for dim in extra_dims): - return datapoints.BoundingBox( - torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size - ) - - height, width = spatial_size - - if format == datapoints.BoundingBoxFormat.XYXY: - x1 = torch.randint(0, width // 2, extra_dims) - y1 = torch.randint(0, height // 2, extra_dims) - x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 - y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 - parts = (x1, y1, x2, y2) - elif format == datapoints.BoundingBoxFormat.XYWH: - x = torch.randint(0, width // 2, extra_dims) - y = torch.randint(0, height // 2, extra_dims) - w = randint_with_tensor_bounds(1, width - x) - h = randint_with_tensor_bounds(1, height - y) - parts = (x, y, w, h) - else: # format == features.BoundingBoxFormat.CXCYWH: - cx = torch.randint(1, width - 1, extra_dims) - cy = torch.randint(1, height - 1, extra_dims) - w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) - h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) - parts = (cx, cy, w, h) - - return datapoints.BoundingBox( - torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size + return make_bounding_box( + format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device ) return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) -make_bounding_box = from_loader(make_bounding_box_loader) - - def make_bounding_box_loaders( *, extra_dims=DEFAULT_EXTRA_DIMS, @@ -700,21 +705,31 @@ class MaskLoader(TensorLoader): pass +def make_detection_mask(size, *, num_objects="random", batch_dims=(), dtype=torch.bool, device="cpu"): + """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" + spatial_size = _parse_spatial_size(size) + num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects + + data = torch.testing.make_tensor( + (*batch_dims, num_objects, *spatial_size), low=0, high=2, dtype=dtype, device=device + ) + return datapoints.Mask(data) + + def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects size = _parse_spatial_size(size) num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device) - return datapoints.Mask(data) + *batch_dims, num_objects, height, width = shape + return make_detection_mask( + (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device + ) return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) -make_detection_mask = from_loader(make_detection_mask_loader) - - def make_detection_mask_loaders( sizes=DEFAULT_SPATIAL_SIZES, num_objects=(1, 0, "random"), @@ -728,19 +743,28 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): - # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_spatial_size(size) +def make_segmentation_mask(size, *, num_categories="random", batch_dims=(), dtype=torch.uint8, device="cpu"): + """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" + spatial_size = _parse_spatial_size(size) num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories - def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device) - return datapoints.Mask(data) + data = torch.testing.make_tensor( + (*batch_dims, *spatial_size), low=0, high=num_categories, dtype=dtype, device=device + ) + return datapoints.Mask(data) - return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) +def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): + # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values + spatial_size = _parse_spatial_size(size) + + def fn(shape, dtype, device): + *batch_dims, height, width = shape + return make_segmentation_mask( + (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device + ) -make_segmentation_mask = from_loader(make_segmentation_mask_loader) + return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype) def make_segmentation_mask_loaders( @@ -778,6 +802,12 @@ class VideoLoader(ImageLoader): pass +def make_video(size, *, num_frames="random", batch_dims=(), **kwargs): + num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames + + return datapoints.Video(make_image(size=size, batch_dims=(*batch_dims, num_frames), **kwargs)) + + def make_video_loader( size="random", *, @@ -790,17 +820,20 @@ def make_video_loader( num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames def fn(shape, dtype, device, memory_format): - video = make_image( - size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format + *batch_dims, num_frames, _, height, width = shape + return make_video( + (height, width), + num_frames=num_frames, + color_space=color_space, + batch_dims=batch_dims, + dtype=dtype, + device=device, + memory_format=memory_format, ) - return datapoints.Video(video) return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) -make_video = from_loader(make_video_loader) - - def make_video_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, From ec7e472839693f9c78a6e02a5a806af7b0d6d72b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 21:15:47 +0200 Subject: [PATCH 04/17] update --- test/common_utils.py | 22 +++++++++++--------- test/test_transforms_v2.py | 19 ++++++++--------- test/test_transforms_v2_consistency.py | 17 +++++++++++----- test/test_transforms_v2_refactored.py | 28 +++++++++----------------- test/test_transforms_v2_utils.py | 4 ++-- 5 files changed, 46 insertions(+), 44 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index f6cea90c3b0..05d8325db3b 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -496,14 +496,14 @@ def make_image( size, *, color_space="RGB", - constant_alpha=True, batch_dims=(), - dtype=torch.float32, + dtype=None, device="cpu", memory_format=torch.contiguous_format, ): spatial_size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) + dtype = dtype or torch.uint8 max_value = get_max_value(dtype) data = torch.testing.make_tensor( @@ -514,7 +514,7 @@ def make_image( device=device, memory_format=memory_format, ) - if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: + if color_space in {"GRAY_ALPHA", "RGBA"}: data[..., -1, :, :] = max_value return datapoints.Image(data) @@ -529,18 +529,19 @@ def make_image_loader( constant_alpha=True, memory_format=torch.contiguous_format, ): + if not constant_alpha: + raise ValueError("This should never happen") size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) def fn(shape, dtype, device, memory_format): - *batch_dims, _, spatial_size = shape + *batch_dims, _, height, width = shape return make_image( - spatial_size, + (height, width), color_space=color_space, batch_dims=batch_dims, dtype=dtype, device=device, - constant_alpha=constant_alpha, memory_format=memory_format, ) @@ -630,12 +631,13 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): def make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size="random", batch_dims=(), dtype=torch.float32, device="cpu" + format=datapoints.BoundingBoxFormat.XYXY, spatial_size="random", batch_dims=(), dtype=None, device="cpu" ): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") + dtype = dtype or torch.float32 if any(dim == 0 for dim in batch_dims): return datapoints.BoundingBox( @@ -705,10 +707,11 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask(size, *, num_objects="random", batch_dims=(), dtype=torch.bool, device="cpu"): +def make_detection_mask(size, *, num_objects="random", batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" spatial_size = _parse_spatial_size(size) num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects + dtype = dtype or torch.bool data = torch.testing.make_tensor( (*batch_dims, num_objects, *spatial_size), low=0, high=2, dtype=dtype, device=device @@ -743,10 +746,11 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask(size, *, num_categories="random", batch_dims=(), dtype=torch.uint8, device="cpu"): +def make_segmentation_mask(size, *, num_categories="random", batch_dims=(), dtype=None, device="cpu"): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" spatial_size = _parse_spatial_size(size) num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories + dtype = dtype or torch.uint8 data = torch.testing.make_tensor( (*batch_dims, *spatial_size), low=0, high=num_categories, dtype=dtype, device=device diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 093c378aa72..db225429262 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -17,6 +17,7 @@ assert_equal, assert_run_python_script, cpu_and_cuda, + DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_bounding_boxes, make_detection_mask, @@ -167,8 +168,8 @@ class TestSmoke: @pytest.mark.parametrize( "image_or_video", [ - make_image(), - make_video(), + make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_pil_images(color_spaces=["RGB"])), next(make_vanilla_tensor_images()), ], @@ -182,13 +183,13 @@ def test_common(self, transform, adapter, container_type, image_or_video, device video_datapoint=make_video(size=spatial_size), image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), bounding_box_xyxy=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,) + format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,) ), bounding_box_xywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,) + format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,) ), bounding_box_cxcywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,) + format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,) ), bounding_box_degenerate_xyxy=datapoints.BoundingBox( [ @@ -352,7 +353,7 @@ def test_random_resized_crop(self, transform, input): next(make_vanilla_tensor_images()), next(make_vanilla_tensor_images()), next(make_pil_images()), - make_image(), + make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_videos()), ], 3, @@ -1124,7 +1125,7 @@ def test__transform(self, mocker): transform = transforms.RandomIoUCrop() image = datapoints.Image(torch.rand(3, 32, 24)) - bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) + bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), batch_dims=(6,)) masks = make_detection_mask((32, 24), num_objects=6) sample = [image, bboxes, masks] @@ -1346,8 +1347,8 @@ class TestToDtype: ) def test_call(self, dtype, expected_dtypes): sample = dict( - video=make_video(dtype=torch.int64), - image=make_image(dtype=torch.uint8), + video=make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), + image=make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), str="str", int=0, diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index f035dde45ed..ccaa973d8d2 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -17,6 +17,7 @@ ArgsKwargs, assert_close, assert_equal, + DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image, @@ -708,8 +709,14 @@ def test_call_consistency(config, args_kwargs): id=transform_cls.__name__, ) for transform_cls, get_params_args_kwargs in [ - (v2_transforms.RandomResizedCrop, ArgsKwargs(make_image(), scale=[0.3, 0.7], ratio=[0.5, 1.5])), - (v2_transforms.RandomErasing, ArgsKwargs(make_image(), scale=(0.3, 0.7), ratio=(0.5, 1.5))), + ( + v2_transforms.RandomResizedCrop, + ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), + ), + ( + v2_transforms.RandomErasing, + ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), + ), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.ElasticTransform, ArgsKwargs(alpha=[15.3, 27.2], sigma=[2.5, 3.9], size=[17, 31])), (v2_transforms.GaussianBlur, ArgsKwargs(0.3, 1.4)), @@ -1090,7 +1097,7 @@ def make_label(extra_dims, categories): pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1100,7 +1107,7 @@ def make_label(extra_dims, categories): tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1110,7 +1117,7 @@ def make_label(extra_dims, categories): datapoint_image = make_image(size=size, color_space="RGB") target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 2130a8cf50a..43dc9e466ec 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -310,34 +310,24 @@ def wrapper(input, *args, **kwargs): def make_input(input_type, *, dtype=None, device="cpu", spatial_size=(17, 11), mask_type="segmentation", **kwargs): if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: - input = make_image(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs) + input = make_image(size=spatial_size, dtype=dtype, device=device, **kwargs) if input_type is torch.Tensor: input = input.as_subclass(torch.Tensor) elif input_type is PIL.Image.Image: input = F.to_image_pil(input) elif input_type is datapoints.BoundingBox: - kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY) - input = make_bounding_box( - dtype=dtype or torch.float32, - device=device, - spatial_size=spatial_size, - **kwargs, - ) + input = make_bounding_box(dtype=dtype, device=device, spatial_size=spatial_size, **kwargs) elif input_type is datapoints.Mask: - if mask_type == "segmentation": - make_mask = make_segmentation_mask - default_dtype = torch.uint8 - elif mask_type == "detection": - make_mask = make_detection_mask - default_dtype = torch.bool - else: - raise ValueError(f"`mask_type` can be `'segmentation'` or `'detection'`, but got {mask_type}.") - input = make_mask(size=spatial_size, dtype=dtype or default_dtype, device=device, **kwargs) + make_mask = { + "segmentation": make_segmentation_mask, + "detection": make_detection_mask, + }[mask_type] + input = make_mask(size=spatial_size, dtype=dtype, device=device, **kwargs) elif input_type is datapoints.Video: - input = make_video(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs) + input = make_video(size=spatial_size, dtype=dtype, device=device, **kwargs) else: raise TypeError( - f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, " + f"Input type can either be torch.Tensor, PIL.Image.Image, or any TorchVision datapoint class, " f"but got {input_type} instead." ) diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py index 198ab39a475..98271b893d6 100644 --- a/test/test_transforms_v2_utils.py +++ b/test/test_transforms_v2_utils.py @@ -4,14 +4,14 @@ import torch import torchvision.transforms.v2.utils -from common_utils import make_bounding_box, make_detection_mask, make_image +from common_utils import DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image from torchvision import datapoints from torchvision.transforms.v2.functional import to_image_pil from torchvision.transforms.v2.utils import has_all, has_any -IMAGE = make_image(color_space="RGB") +IMAGE = make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") BOUNDING_BOX = make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size) MASK = make_detection_mask(size=IMAGE.spatial_size) From 5b56570762cf27befd8168077b70b6d19e92ade0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 21:33:42 +0200 Subject: [PATCH 05/17] size -> spatial_size --- test/common_utils.py | 23 ++++++++--------------- test/test_prototype_transforms.py | 16 ++++++++-------- test/test_transforms_v2.py | 18 +++++++++--------- test/test_transforms_v2_consistency.py | 22 +++++++++++----------- test/test_transforms_v2_refactored.py | 10 +++++----- test/test_transforms_v2_utils.py | 4 ++-- 6 files changed, 43 insertions(+), 50 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 05d8325db3b..feae7717ecb 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -493,15 +493,9 @@ def get_num_channels(color_space): def make_image( - size, - *, - color_space="RGB", - batch_dims=(), - dtype=None, - device="cpu", - memory_format=torch.contiguous_format, + spatial_size, *, color_space="RGB", batch_dims=(), dtype=None, device="cpu", memory_format=torch.contiguous_format ): - spatial_size = _parse_spatial_size(size) + spatial_size = _parse_spatial_size(spatial_size) num_channels = get_num_channels(color_space) dtype = dtype or torch.uint8 max_value = get_max_value(dtype) @@ -707,9 +701,9 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask(size, *, num_objects="random", batch_dims=(), dtype=None, device="cpu"): +def make_detection_mask(spatial_size, *, num_objects="random", batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" - spatial_size = _parse_spatial_size(size) + spatial_size = _parse_spatial_size(spatial_size) num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects dtype = dtype or torch.bool @@ -746,9 +740,9 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask(size, *, num_categories="random", batch_dims=(), dtype=None, device="cpu"): +def make_segmentation_mask(spatial_size, *, num_categories="random", batch_dims=(), dtype=None, device="cpu"): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" - spatial_size = _parse_spatial_size(size) + spatial_size = _parse_spatial_size(spatial_size) num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories dtype = dtype or torch.uint8 @@ -806,10 +800,9 @@ class VideoLoader(ImageLoader): pass -def make_video(size, *, num_frames="random", batch_dims=(), **kwargs): +def make_video(spatial_size, *, num_frames="random", batch_dims=(), **kwargs): num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames - - return datapoints.Video(make_image(size=size, batch_dims=(*batch_dims, num_frames), **kwargs)) + return datapoints.Video(make_image(spatial_size, batch_dims=(*batch_dims, num_frames), **kwargs)) def make_video_loader( diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 255c3b5c32f..52b572c7628 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -215,7 +215,7 @@ def test__get_params(self, mocker): transform = transforms.FixedSizeCrop(size=crop_size) flat_inputs = [ - make_image(size=spatial_size, color_space="RGB"), + make_image(spatial_size=spatial_size, color_space="RGB"), make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -314,7 +314,7 @@ def test__transform_culling(self, mocker): bounding_boxes = make_bounding_box( format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) ) - masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,)) + masks = make_detection_mask(spatial_size=spatial_size) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -494,29 +494,29 @@ def make_datapoints(): size = (600, 800) num_objects = 22 - pil_image = to_image_pil(make_image(size=size, color_space="RGB")) + pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } yield (pil_image, target) - tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } yield (tensor_image, target) - datapoint_image = make_image(size=size, color_space="RGB") + datapoint_image = make_image(spatial_size=size, color_space="RGB") target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } yield (datapoint_image, target) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index db225429262..9cc95312a06 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -168,8 +168,8 @@ class TestSmoke: @pytest.mark.parametrize( "image_or_video", [ - make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), - make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_video(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_pil_images(color_spaces=["RGB"])), next(make_vanilla_tensor_images()), ], @@ -179,8 +179,8 @@ def test_common(self, transform, adapter, container_type, image_or_video, device spatial_size = F.get_spatial_size(image_or_video) input = dict( image_or_video=image_or_video, - image_datapoint=make_image(size=spatial_size), - video_datapoint=make_video(size=spatial_size), + image_datapoint=make_image(spatial_size=spatial_size), + video_datapoint=make_video(spatial_size=spatial_size), image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), bounding_box_xyxy=make_bounding_box( format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,) @@ -227,8 +227,8 @@ def test_common(self, transform, adapter, container_type, image_or_video, device format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, ), - detection_mask=make_detection_mask(size=spatial_size), - segmentation_mask=make_segmentation_mask(size=spatial_size), + detection_mask=make_detection_mask(spatial_size=spatial_size), + segmentation_mask=make_segmentation_mask(spatial_size=spatial_size), int=0, float=0.0, bool=True, @@ -353,7 +353,7 @@ def test_random_resized_crop(self, transform, input): next(make_vanilla_tensor_images()), next(make_vanilla_tensor_images()), next(make_pil_images()), - make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_videos()), ], 3, @@ -1347,8 +1347,8 @@ class TestToDtype: ) def test_call(self, dtype, expected_dtypes): sample = dict( - video=make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), - image=make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), + video=make_video(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), + image=make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), str="str", int=0, diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index ccaa973d8d2..cf1b6b6b52c 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -711,11 +711,11 @@ def test_call_consistency(config, args_kwargs): for transform_cls, get_params_args_kwargs in [ ( v2_transforms.RandomResizedCrop, - ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), + ArgsKwargs(make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), ), ( v2_transforms.RandomErasing, - ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), + ArgsKwargs(make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), ), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.ElasticTransform, ArgsKwargs(alpha=[15.3, 27.2], sigma=[2.5, 3.9], size=[17, 31])), @@ -724,7 +724,7 @@ def test_call_consistency(config, args_kwargs): v2_transforms.RandomAffine, ArgsKwargs(degrees=[-20.0, 10.0], translate=None, scale_ranges=None, shears=None, img_size=[15, 29]), ), - (v2_transforms.RandomCrop, ArgsKwargs(make_image(size=(61, 47)), output_size=(19, 25))), + (v2_transforms.RandomCrop, ArgsKwargs(make_image(spatial_size=(61, 47)), output_size=(19, 25))), (v2_transforms.RandomPerspective, ArgsKwargs(23, 17, 0.5)), (v2_transforms.RandomRotation, ArgsKwargs(degrees=[-20.0, 10.0])), (v2_transforms.AutoAugment, ArgsKwargs(5)), @@ -1095,33 +1095,33 @@ def make_datapoints(self, with_mask=True): def make_label(extra_dims, categories): return torch.randint(categories, extra_dims, dtype=torch.int64) - pil_image = to_image_pil(make_image(size=size, color_space="RGB")) + pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) yield (pil_image, target) - tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) yield (tensor_image, target) - datapoint_image = make_image(size=size, color_space="RGB") + datapoint_image = make_image(spatial_size=size, color_space="RGB") target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) yield (datapoint_image, target) @@ -1203,8 +1203,8 @@ def make_datapoints(self, supports_pil=True, image_dtype=torch.uint8): conv_fns.extend([torch.Tensor, lambda x: x]) for conv_fn in conv_fns: - datapoint_image = make_image(size=size, color_space="RGB", dtype=image_dtype) - datapoint_mask = make_segmentation_mask(size=size, num_categories=num_categories, dtype=torch.uint8) + datapoint_image = make_image(spatial_size=size, color_space="RGB", dtype=image_dtype) + datapoint_mask = make_segmentation_mask(spatial_size=size, num_categories=num_categories, dtype=torch.uint8) dp = (conv_fn(datapoint_image), datapoint_mask) dp_ref = ( diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 43dc9e466ec..be768900ece 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -308,23 +308,23 @@ def wrapper(input, *args, **kwargs): return wrapper -def make_input(input_type, *, dtype=None, device="cpu", spatial_size=(17, 11), mask_type="segmentation", **kwargs): +def make_input(input_type, *, spatial_size=(17, 11), mask_type="segmentation", **kwargs): if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: - input = make_image(size=spatial_size, dtype=dtype, device=device, **kwargs) + input = make_image(spatial_size=spatial_size, **kwargs) if input_type is torch.Tensor: input = input.as_subclass(torch.Tensor) elif input_type is PIL.Image.Image: input = F.to_image_pil(input) elif input_type is datapoints.BoundingBox: - input = make_bounding_box(dtype=dtype, device=device, spatial_size=spatial_size, **kwargs) + input = make_bounding_box(spatial_size=spatial_size) elif input_type is datapoints.Mask: make_mask = { "segmentation": make_segmentation_mask, "detection": make_detection_mask, }[mask_type] - input = make_mask(size=spatial_size, dtype=dtype, device=device, **kwargs) + input = make_mask(spatial_size, **kwargs) elif input_type is datapoints.Video: - input = make_video(size=spatial_size, dtype=dtype, device=device, **kwargs) + input = make_video(spatial_size, **kwargs) else: raise TypeError( f"Input type can either be torch.Tensor, PIL.Image.Image, or any TorchVision datapoint class, " diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py index 98271b893d6..ad30c223530 100644 --- a/test/test_transforms_v2_utils.py +++ b/test/test_transforms_v2_utils.py @@ -11,9 +11,9 @@ from torchvision.transforms.v2.utils import has_all, has_any -IMAGE = make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") +IMAGE = make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") BOUNDING_BOX = make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size) -MASK = make_detection_mask(size=IMAGE.spatial_size) +MASK = make_detection_mask(spatial_size=IMAGE.spatial_size) @pytest.mark.parametrize( From 9b620c619596bb5942575d490a319015fbe3d40e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 21:41:43 +0200 Subject: [PATCH 06/17] fix prototype tests --- test/test_prototype_transforms.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 52b572c7628..80e5162ed30 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -9,6 +9,7 @@ from common_utils import ( assert_equal, DEFAULT_EXTRA_DIMS, + DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image, @@ -79,8 +80,8 @@ def test_mixup_cutmix(transform, input): for unsup_data in [ make_label(), make_bounding_box(format="XYXY"), - make_detection_mask(), - make_segmentation_mask(), + make_detection_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_segmentation_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE), ]: input_copy["unsupported"] = unsup_data with pytest.raises(TypeError, match=err_msg): @@ -216,7 +217,7 @@ def test__get_params(self, mocker): flat_inputs = [ make_image(spatial_size=spatial_size, color_space="RGB"), - make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape), + make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -312,9 +313,9 @@ def test__transform_culling(self, mocker): ) bounding_boxes = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) - masks = make_detection_mask(spatial_size=spatial_size) + masks = make_detection_mask(spatial_size=spatial_size, batch_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -350,7 +351,7 @@ def test__transform_bounding_box_clamping(self, mocker): ) bounding_box = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box") @@ -389,9 +390,9 @@ class TestPermuteDimensions: ) def test_call(self, dims, inverse_dims): sample = dict( - image=make_image(), - bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), - video=make_video(), + image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE), str="str", int=0, ) @@ -433,9 +434,9 @@ class TestTransposeDimensions: ) def test_call(self, dims): sample = dict( - image=make_image(), - bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), - video=make_video(), + image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE), str="str", int=0, ) @@ -496,7 +497,7 @@ def make_datapoints(): pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } @@ -505,7 +506,7 @@ def make_datapoints(): tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } @@ -514,7 +515,7 @@ def make_datapoints(): datapoint_image = make_image(spatial_size=size, color_space="RGB") target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), } From e14986d8eefbfd1c6796ae5dde4fb9b0f3a676eb Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 3 Jul 2023 23:33:44 +0200 Subject: [PATCH 07/17] fix reference consistency test --- test/test_transforms_v2_consistency.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index cf1b6b6b52c..f65a3407b7c 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -1105,7 +1105,7 @@ def make_label(extra_dims, categories): yield (pil_image, target) - tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB", dtype=torch.float32)) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), @@ -1115,7 +1115,7 @@ def make_label(extra_dims, categories): yield (tensor_image, target) - datapoint_image = make_image(spatial_size=size, color_space="RGB") + datapoint_image = make_image(spatial_size=size, color_space="RGB", dtype=torch.float32) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), From 53e13cc39eb9b51cd652940d5f588c7f179ba1e7 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 4 Jul 2023 14:33:35 +0200 Subject: [PATCH 08/17] remove random parameters --- test/common_utils.py | 47 ++++++++------- test/test_transforms_v2.py | 2 +- test/transforms_v2_kernel_infos.py | 93 ++++++++++++++++-------------- 3 files changed, 73 insertions(+), 69 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index feae7717ecb..2656449cf8c 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -399,6 +399,9 @@ def load(self, device="cpu"): ) +# new v2 default +DEFAULT_SPATIAL_SIZE = (17, 11) +# old v2 defaults DEFAULT_SQUARE_SPATIAL_SIZE = 15 DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) @@ -406,13 +409,12 @@ def load(self, device="cpu"): DEFAULT_LANDSCAPE_SPATIAL_SIZE, DEFAULT_PORTRAIT_SPATIAL_SIZE, DEFAULT_SQUARE_SPATIAL_SIZE, - "random", ) def _parse_spatial_size(size, *, name="size"): if size == "random": - return tuple(torch.randint(15, 33, (2,)).tolist()) + raise ValueError("This should never happen") elif isinstance(size, int) and size > 0: return (size, size) elif ( @@ -515,7 +517,7 @@ def make_image( def make_image_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", extra_dims=(), @@ -563,7 +565,7 @@ def make_image_loaders( def make_image_loader_for_interpolation( - size="random", *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format + size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format ): size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) @@ -625,7 +627,7 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): def make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size="random", batch_dims=(), dtype=None, device="cpu" + format=datapoints.BoundingBoxFormat.XYXY, spatial_size=DEFAULT_SPATIAL_SIZE, batch_dims=(), dtype=None, device="cpu" ): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] @@ -665,7 +667,7 @@ def make_bounding_box( ) -def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] @@ -687,7 +689,7 @@ def make_bounding_box_loaders( *, extra_dims=DEFAULT_EXTRA_DIMS, formats=tuple(datapoints.BoundingBoxFormat), - spatial_size="random", + spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtypes=(torch.float32, torch.float64, torch.int64), ): for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): @@ -701,10 +703,9 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask(spatial_size, *, num_objects="random", batch_dims=(), dtype=None, device="cpu"): +def make_detection_mask(spatial_size, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" spatial_size = _parse_spatial_size(spatial_size) - num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects dtype = dtype or torch.bool data = torch.testing.make_tensor( @@ -713,10 +714,9 @@ def make_detection_mask(spatial_size, *, num_objects="random", batch_dims=(), dt return datapoints.Mask(data) -def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): +def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects size = _parse_spatial_size(size) - num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects def fn(shape, dtype, device): *batch_dims, num_objects, height, width = shape @@ -729,7 +729,7 @@ def fn(shape, dtype, device): def make_detection_mask_loaders( sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), + num_objects=(1, 0, 5), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -740,10 +740,9 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask(spatial_size, *, num_categories="random", batch_dims=(), dtype=None, device="cpu"): +def make_segmentation_mask(spatial_size, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" spatial_size = _parse_spatial_size(spatial_size) - num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories dtype = dtype or torch.uint8 data = torch.testing.make_tensor( @@ -752,7 +751,9 @@ def make_segmentation_mask(spatial_size, *, num_categories="random", batch_dims= return datapoints.Mask(data) -def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): +def make_segmentation_mask_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8 +): # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values spatial_size = _parse_spatial_size(size) @@ -768,7 +769,7 @@ def fn(shape, dtype, device): def make_segmentation_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_categories=(1, 2, "random"), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -782,8 +783,8 @@ def make_segmentation_mask_loaders( def make_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), - num_categories=(1, 2, "random"), + num_objects=(1, 0, 5), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -800,21 +801,19 @@ class VideoLoader(ImageLoader): pass -def make_video(spatial_size, *, num_frames="random", batch_dims=(), **kwargs): - num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames +def make_video(spatial_size, *, num_frames=3, batch_dims=(), **kwargs): return datapoints.Video(make_image(spatial_size, batch_dims=(*batch_dims, num_frames), **kwargs)) def make_video_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", - num_frames="random", + num_frames=3, extra_dims=(), dtype=torch.uint8, ): size = _parse_spatial_size(size) - num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames def fn(shape, dtype, device, memory_format): *batch_dims, num_frames, _, height, width = shape @@ -838,7 +837,7 @@ def make_video_loaders( "GRAY", "RGB", ), - num_frames=(1, 0, "random"), + num_frames=(1, 0, 3), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8, torch.float32, torch.float64), ): diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 9cc95312a06..1c6ee23c95f 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -290,7 +290,7 @@ def test_common(self, transform, adapter, container_type, image_or_video, device ], dtypes=[torch.uint8], extra_dims=[(), (4,)], - **(dict(num_frames=["random"]) if fn is make_videos else dict()), + **(dict(num_frames=[3]) if fn is make_videos else dict()), ) for fn in [ make_images, diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index cae8d3157e9..d9533cbb725 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -11,6 +11,7 @@ from common_utils import ( ArgsKwargs, combinations_grid, + DEFAULT_PORTRAIT_SPATIAL_SIZE, get_num_channels, ImageLoader, InfoBase, @@ -296,7 +297,7 @@ def sample_inputs_crop_bounding_box(): def sample_inputs_crop_mask(): - for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8) @@ -306,7 +307,7 @@ def reference_inputs_crop_mask(): def sample_inputs_crop_video(): - for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]): yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8) @@ -415,7 +416,7 @@ def sample_inputs_resized_crop_mask(): def sample_inputs_resized_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) @@ -457,7 +458,7 @@ def sample_inputs_resized_crop_video(): def sample_inputs_pad_image_tensor(): make_pad_image_loaders = functools.partial( - make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32] + make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32] ) for image_loader, padding in itertools.product( @@ -512,7 +513,7 @@ def sample_inputs_pad_bounding_box(): def sample_inputs_pad_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, padding=[1]) @@ -524,7 +525,7 @@ def reference_inputs_pad_mask(): def sample_inputs_pad_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, padding=[1]) @@ -620,7 +621,7 @@ def pad_xfail_jit_fill_condition(args_kwargs): def sample_inputs_perspective_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs( image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0] @@ -672,7 +673,7 @@ def sample_inputs_perspective_bounding_box(): def sample_inputs_perspective_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_detection_mask_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -686,7 +687,7 @@ def reference_inputs_perspective_mask(): def sample_inputs_perspective_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_video_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -745,7 +746,7 @@ def _get_elastic_displacement(spatial_size): def sample_inputs_elastic_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(image_loader.spatial_size) for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs(image_loader, displacement=displacement, fill=fill) @@ -777,13 +778,13 @@ def sample_inputs_elastic_bounding_box(): def sample_inputs_elastic_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(mask_loader.shape[-2:]) yield ArgsKwargs(mask_loader, displacement=displacement) def sample_inputs_elastic_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): displacement = _get_elastic_displacement(video_loader.shape[-2:]) yield ArgsKwargs(video_loader, displacement=displacement) @@ -854,7 +855,7 @@ def sample_inputs_center_crop_bounding_box(): def sample_inputs_center_crop_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): height, width = mask_loader.shape[-2:] yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2)) @@ -867,7 +868,7 @@ def reference_inputs_center_crop_mask(): def sample_inputs_center_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): height, width = video_loader.shape[-2:] yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2)) @@ -947,7 +948,7 @@ def sample_inputs_gaussian_blur_video(): def sample_inputs_equalize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1008,7 +1009,7 @@ def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_for def sample_inputs_equalize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1031,7 +1032,7 @@ def sample_inputs_equalize_video(): def sample_inputs_invert_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1041,7 +1042,7 @@ def reference_inputs_invert_image_tensor(): def sample_inputs_invert_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1067,7 +1068,7 @@ def sample_inputs_invert_video(): def sample_inputs_posterize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0]) @@ -1080,7 +1081,7 @@ def reference_inputs_posterize_image_tensor(): def sample_inputs_posterize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0]) @@ -1110,7 +1111,7 @@ def _get_solarize_thresholds(dtype): def sample_inputs_solarize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype))) @@ -1125,7 +1126,7 @@ def uint8_to_float32_threshold_adapter(other_args, kwargs): def sample_inputs_solarize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype))) @@ -1149,7 +1150,7 @@ def sample_inputs_solarize_video(): def sample_inputs_autocontrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1159,7 +1160,7 @@ def reference_inputs_autocontrast_image_tensor(): def sample_inputs_autocontrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1189,7 +1190,7 @@ def sample_inputs_autocontrast_video(): def sample_inputs_adjust_sharpness_image_tensor(): for image_loader in make_image_loaders( - sizes=["random", (2, 2)], + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)], color_spaces=("GRAY", "RGB"), ): yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1204,7 +1205,7 @@ def reference_inputs_adjust_sharpness_image_tensor(): def sample_inputs_adjust_sharpness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1228,7 +1229,7 @@ def sample_inputs_adjust_sharpness_video(): def sample_inputs_erase_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(image_loader.num_channels, h, w) @@ -1236,7 +1237,7 @@ def sample_inputs_erase_image_tensor(): def sample_inputs_erase_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(video_loader.num_channels, h, w) @@ -1261,7 +1262,7 @@ def sample_inputs_erase_video(): def sample_inputs_adjust_brightness_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1274,7 +1275,7 @@ def reference_inputs_adjust_brightness_image_tensor(): def sample_inputs_adjust_brightness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1301,7 +1302,7 @@ def sample_inputs_adjust_brightness_video(): def sample_inputs_adjust_contrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1314,7 +1315,7 @@ def reference_inputs_adjust_contrast_image_tensor(): def sample_inputs_adjust_contrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1353,7 +1354,7 @@ def sample_inputs_adjust_contrast_video(): def sample_inputs_adjust_gamma_image_tensor(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, gamma=gamma, gain=gain) @@ -1367,7 +1368,7 @@ def reference_inputs_adjust_gamma_image_tensor(): def sample_inputs_adjust_gamma_video(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, gamma=gamma, gain=gain) @@ -1397,7 +1398,7 @@ def sample_inputs_adjust_gamma_video(): def sample_inputs_adjust_hue_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -1410,7 +1411,7 @@ def reference_inputs_adjust_hue_image_tensor(): def sample_inputs_adjust_hue_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -1439,7 +1440,7 @@ def sample_inputs_adjust_hue_video(): def sample_inputs_adjust_saturation_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -1452,7 +1453,7 @@ def reference_inputs_adjust_saturation_image_tensor(): def sample_inputs_adjust_saturation_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -1612,7 +1613,7 @@ def wrapper(input_tensor, *other_args, **kwargs): def sample_inputs_normalize_image_tensor(): for image_loader, (mean, std) in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]), + make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]), _NORMALIZE_MEANS_STDS, ): yield ArgsKwargs(image_loader, mean=mean, std=std) @@ -1637,7 +1638,7 @@ def reference_inputs_normalize_image_tensor(): def sample_inputs_normalize_video(): mean, std = _NORMALIZE_MEANS_STDS[0] for video_loader in make_video_loaders( - sizes=["random"], color_spaces=["RGB"], num_frames=["random"], dtypes=[torch.float32] + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32] ): yield ArgsKwargs(video_loader, mean=mean, std=std) @@ -1671,7 +1672,9 @@ def sample_inputs_convert_dtype_image_tensor(): # conversion cannot be performed safely continue - for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[input_dtype]): + for image_loader in make_image_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[input_dtype] + ): yield ArgsKwargs(image_loader, dtype=output_dtype) @@ -1736,7 +1739,7 @@ def reference_inputs_convert_dtype_image_tensor(): def sample_inputs_convert_dtype_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1781,7 +1784,7 @@ def sample_inputs_convert_dtype_video(): def sample_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=[4]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]): yield ArgsKwargs(video_loader, num_samples=2) @@ -1797,7 +1800,9 @@ def reference_uniform_temporal_subsample_video(x, num_samples): def reference_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], color_spaces=["RGB"], num_frames=[10]): + for video_loader in make_video_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10] + ): for num_samples in range(1, video_loader.shape[-4] + 1): yield ArgsKwargs(video_loader, num_samples) From cdb7e17351e18f19caabfa06dc0667da5304712f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 4 Jul 2023 14:38:10 +0200 Subject: [PATCH 09/17] move default spatial size into low level functions --- test/common_utils.py | 60 ++++++++++++++++----------- test/test_transforms_v2_refactored.py | 10 ++--- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 2656449cf8c..837f7e1f5b9 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -495,18 +495,20 @@ def get_num_channels(color_space): def make_image( - spatial_size, *, color_space="RGB", batch_dims=(), dtype=None, device="cpu", memory_format=torch.contiguous_format + spatial_size=DEFAULT_SPATIAL_SIZE, + *, + color_space="RGB", + batch_dims=(), + dtype=None, + device="cpu", + memory_format=torch.contiguous_format, ): - spatial_size = _parse_spatial_size(spatial_size) - num_channels = get_num_channels(color_space) - dtype = dtype or torch.uint8 max_value = get_max_value(dtype) - data = torch.testing.make_tensor( - (*batch_dims, num_channels, *spatial_size), + (*batch_dims, get_num_channels(color_space), *spatial_size), low=0, high=max_value, - dtype=dtype, + dtype=dtype or torch.uint8, device=device, memory_format=memory_format, ) @@ -627,12 +629,16 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): def make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=DEFAULT_SPATIAL_SIZE, batch_dims=(), dtype=None, device="cpu" + spatial_size=DEFAULT_SPATIAL_SIZE, + *, + format=datapoints.BoundingBoxFormat.XYXY, + batch_dims=(), + dtype=None, + device="cpu", ): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] - spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") dtype = dtype or torch.float32 if any(dim == 0 for dim in batch_dims): @@ -703,15 +709,17 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask(spatial_size, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): +def make_detection_mask(spatial_size=DEFAULT_SPATIAL_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" - spatial_size = _parse_spatial_size(spatial_size) - dtype = dtype or torch.bool - - data = torch.testing.make_tensor( - (*batch_dims, num_objects, *spatial_size), low=0, high=2, dtype=dtype, device=device + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, num_objects, *spatial_size), + low=0, + high=2, + dtype=dtype or torch.bool, + device=device, + ) ) - return datapoints.Mask(data) def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): @@ -740,15 +748,19 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask(spatial_size, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): +def make_segmentation_mask( + spatial_size=DEFAULT_SPATIAL_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu" +): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" - spatial_size = _parse_spatial_size(spatial_size) - dtype = dtype or torch.uint8 - - data = torch.testing.make_tensor( - (*batch_dims, *spatial_size), low=0, high=num_categories, dtype=dtype, device=device + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, *spatial_size), + low=0, + high=num_categories, + dtype=dtype or torch.uint8, + device=device, + ) ) - return datapoints.Mask(data) def make_segmentation_mask_loader( @@ -801,7 +813,7 @@ class VideoLoader(ImageLoader): pass -def make_video(spatial_size, *, num_frames=3, batch_dims=(), **kwargs): +def make_video(spatial_size=DEFAULT_SPATIAL_SIZE, *, num_frames=3, batch_dims=(), **kwargs): return datapoints.Video(make_image(spatial_size, batch_dims=(*batch_dims, num_frames), **kwargs)) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index be768900ece..57fce82f254 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -308,23 +308,23 @@ def wrapper(input, *args, **kwargs): return wrapper -def make_input(input_type, *, spatial_size=(17, 11), mask_type="segmentation", **kwargs): +def make_input(input_type, *, mask_type="segmentation", **kwargs): if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: - input = make_image(spatial_size=spatial_size, **kwargs) + input = make_image(**kwargs) if input_type is torch.Tensor: input = input.as_subclass(torch.Tensor) elif input_type is PIL.Image.Image: input = F.to_image_pil(input) elif input_type is datapoints.BoundingBox: - input = make_bounding_box(spatial_size=spatial_size) + input = make_bounding_box() elif input_type is datapoints.Mask: make_mask = { "segmentation": make_segmentation_mask, "detection": make_detection_mask, }[mask_type] - input = make_mask(spatial_size, **kwargs) + input = make_mask(**kwargs) elif input_type is datapoints.Video: - input = make_video(spatial_size, **kwargs) + input = make_video(**kwargs) else: raise TypeError( f"Input type can either be torch.Tensor, PIL.Image.Image, or any TorchVision datapoint class, " From 9b3073196b5b10b36e985559c9a4c6e371eb03ea Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 11:47:43 +0200 Subject: [PATCH 10/17] add size parameter to make_bounding_box --- test/common_utils.py | 66 ++++++++++++++++-------------- test/transforms_v2_kernel_infos.py | 3 ++ 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 837f7e1f5b9..ca4dc78811b 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -616,29 +616,32 @@ class BoundingBoxLoader(TensorLoader): spatial_size: Tuple[int, int] -def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): - low, high = torch.broadcast_tensors( - *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))] - ) - return torch.stack( - [ - torch.randint(low_scalar, high_scalar, (), **kwargs) - for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist()) - ] - ).reshape(low.shape) - - def make_bounding_box( - spatial_size=DEFAULT_SPATIAL_SIZE, + size=None, *, format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=None, batch_dims=(), dtype=None, device="cpu", ): + def sample_position(values, max_value): + # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. + # However, if we have batch_dims, we need tensors as limits. + return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape) + if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] + if spatial_size is None: + if size is None: + spatial_size = DEFAULT_SPATIAL_SIZE + else: + height, width = size + height_margin, width_margin = torch.randint(10, (2,)).tolist() + spatial_size = (height + height_margin, width + width_margin) + spatial_height, spatial_width = spatial_size + dtype = dtype or torch.float32 if any(dim == 0 for dim in batch_dims): @@ -646,27 +649,28 @@ def make_bounding_box( torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size ) - height, width = spatial_size - if format == datapoints.BoundingBoxFormat.XYXY: - x1 = torch.randint(0, width // 2, batch_dims) - y1 = torch.randint(0, height // 2, batch_dims) - x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 - y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 - parts = (x1, y1, x2, y2) - elif format == datapoints.BoundingBoxFormat.XYWH: - x = torch.randint(0, width // 2, batch_dims) - y = torch.randint(0, height // 2, batch_dims) - w = randint_with_tensor_bounds(1, width - x) - h = randint_with_tensor_bounds(1, height - y) + if size is None: + h = torch.randint(1, spatial_height - 1, batch_dims) + w = torch.randint(1, spatial_width - 1, batch_dims) + else: + h, w = [torch.full(batch_dims, v, dtype=torch.int) for v in size] + + y = sample_position(h, spatial_height) + x = sample_position(w, spatial_width) + + if format is datapoints.BoundingBoxFormat.XYWH: parts = (x, y, w, h) - elif format == datapoints.BoundingBoxFormat.CXCYWH: - cx = torch.randint(1, width - 1, batch_dims) - cy = torch.randint(1, height - 1, batch_dims) - w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) - h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) + elif format is datapoints.BoundingBoxFormat.XYXY: + x1, y1 = x, y + x2 = x1 + w + y2 = y1 + h + parts = (x1, y1, x2, y2) + elif format is datapoints.BoundingBoxFormat.CXCYWH: + cx = x + w / 2 + cy = y + h / 2 parts = (cx, cy, w, h) else: - raise ValueError(f"Can't make bounding box in format {format}") + raise ValueError(f"Format {format} is not supported") return datapoints.BoundingBox( torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index d9533cbb725..dc04fbfc7a9 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -261,6 +261,9 @@ def reference_inputs_convert_format_bounding_box(): reference_fn=reference_convert_format_bounding_box, reference_inputs_fn=reference_inputs_convert_format_bounding_box, logs_usage=True, + closeness_kwargs={ + (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0), + }, ), ) From 38b589e7222ca6d0f39ceb84107c097aa7e9f684 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 13:09:44 +0200 Subject: [PATCH 11/17] spatial_size -> size --- test/common_utils.py | 24 ++++++++---------- test/test_prototype_transforms.py | 16 ++++++------ test/test_transforms_v2.py | 18 ++++++------- test/test_transforms_v2_consistency.py | 22 ++++++++-------- test/test_transforms_v2_refactored.py | 35 +++++++++++++------------- test/test_transforms_v2_utils.py | 4 +-- 6 files changed, 59 insertions(+), 60 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index ca4dc78811b..6c813f47c03 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -400,7 +400,7 @@ def load(self, device="cpu"): # new v2 default -DEFAULT_SPATIAL_SIZE = (17, 11) +DEFAULT_SIZE = (17, 11) # old v2 defaults DEFAULT_SQUARE_SPATIAL_SIZE = 15 DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) @@ -495,7 +495,7 @@ def get_num_channels(color_space): def make_image( - spatial_size=DEFAULT_SPATIAL_SIZE, + size=DEFAULT_SIZE, *, color_space="RGB", batch_dims=(), @@ -505,7 +505,7 @@ def make_image( ): max_value = get_max_value(dtype) data = torch.testing.make_tensor( - (*batch_dims, get_num_channels(color_space), *spatial_size), + (*batch_dims, get_num_channels(color_space), *size), low=0, high=max_value, dtype=dtype or torch.uint8, @@ -635,7 +635,7 @@ def sample_position(values, max_value): if spatial_size is None: if size is None: - spatial_size = DEFAULT_SPATIAL_SIZE + spatial_size = DEFAULT_SIZE else: height, width = size height_margin, width_margin = torch.randint(10, (2,)).tolist() @@ -713,11 +713,11 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask(spatial_size=DEFAULT_SPATIAL_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): +def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" return datapoints.Mask( torch.testing.make_tensor( - (*batch_dims, num_objects, *spatial_size), + (*batch_dims, num_objects, *size), low=0, high=2, dtype=dtype or torch.bool, @@ -752,13 +752,11 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask( - spatial_size=DEFAULT_SPATIAL_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu" -): +def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" return datapoints.Mask( torch.testing.make_tensor( - (*batch_dims, *spatial_size), + (*batch_dims, *size), low=0, high=num_categories, dtype=dtype or torch.uint8, @@ -817,8 +815,8 @@ class VideoLoader(ImageLoader): pass -def make_video(spatial_size=DEFAULT_SPATIAL_SIZE, *, num_frames=3, batch_dims=(), **kwargs): - return datapoints.Video(make_image(spatial_size, batch_dims=(*batch_dims, num_frames), **kwargs)) +def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): + return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) def make_video_loader( @@ -836,8 +834,8 @@ def fn(shape, dtype, device, memory_format): return make_video( (height, width), num_frames=num_frames, - color_space=color_space, batch_dims=batch_dims, + color_space=color_space, dtype=dtype, device=device, memory_format=memory_format, diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 80e5162ed30..cfbcc7c0557 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -216,7 +216,7 @@ def test__get_params(self, mocker): transform = transforms.FixedSizeCrop(size=crop_size) flat_inputs = [ - make_image(spatial_size=spatial_size, color_space="RGB"), + make_image(size=spatial_size, color_space="RGB"), make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -315,7 +315,7 @@ def test__transform_culling(self, mocker): bounding_boxes = make_bounding_box( format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) - masks = make_detection_mask(spatial_size=spatial_size, batch_dims=(batch_size,)) + masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -495,29 +495,29 @@ def make_datapoints(): size = (600, 800) num_objects = 22 - pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB")) + pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } yield (pil_image, target) - tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } yield (tensor_image, target) - datapoint_image = make_image(spatial_size=size, color_space="RGB") + datapoint_image = make_image(size=size, color_space="RGB") target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), - "masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } yield (datapoint_image, target) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 1c6ee23c95f..0f6c4cc3b2e 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -168,8 +168,8 @@ class TestSmoke: @pytest.mark.parametrize( "image_or_video", [ - make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), - make_video(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_pil_images(color_spaces=["RGB"])), next(make_vanilla_tensor_images()), ], @@ -179,8 +179,8 @@ def test_common(self, transform, adapter, container_type, image_or_video, device spatial_size = F.get_spatial_size(image_or_video) input = dict( image_or_video=image_or_video, - image_datapoint=make_image(spatial_size=spatial_size), - video_datapoint=make_video(spatial_size=spatial_size), + image_datapoint=make_image(size=spatial_size), + video_datapoint=make_video(size=spatial_size), image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), bounding_box_xyxy=make_bounding_box( format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,) @@ -227,8 +227,8 @@ def test_common(self, transform, adapter, container_type, image_or_video, device format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, ), - detection_mask=make_detection_mask(spatial_size=spatial_size), - segmentation_mask=make_segmentation_mask(spatial_size=spatial_size), + detection_mask=make_detection_mask(size=spatial_size), + segmentation_mask=make_segmentation_mask(size=spatial_size), int=0, float=0.0, bool=True, @@ -353,7 +353,7 @@ def test_random_resized_crop(self, transform, input): next(make_vanilla_tensor_images()), next(make_vanilla_tensor_images()), next(make_pil_images()), - make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), next(make_videos()), ], 3, @@ -1347,8 +1347,8 @@ class TestToDtype: ) def test_call(self, dtype, expected_dtypes): sample = dict( - video=make_video(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), - image=make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), + video=make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), + image=make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), str="str", int=0, diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index f65a3407b7c..3f631d7ac94 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -711,11 +711,11 @@ def test_call_consistency(config, args_kwargs): for transform_cls, get_params_args_kwargs in [ ( v2_transforms.RandomResizedCrop, - ArgsKwargs(make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), + ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), ), ( v2_transforms.RandomErasing, - ArgsKwargs(make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), + ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), ), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.ElasticTransform, ArgsKwargs(alpha=[15.3, 27.2], sigma=[2.5, 3.9], size=[17, 31])), @@ -724,7 +724,7 @@ def test_call_consistency(config, args_kwargs): v2_transforms.RandomAffine, ArgsKwargs(degrees=[-20.0, 10.0], translate=None, scale_ranges=None, shears=None, img_size=[15, 29]), ), - (v2_transforms.RandomCrop, ArgsKwargs(make_image(spatial_size=(61, 47)), output_size=(19, 25))), + (v2_transforms.RandomCrop, ArgsKwargs(make_image(size=(61, 47)), output_size=(19, 25))), (v2_transforms.RandomPerspective, ArgsKwargs(23, 17, 0.5)), (v2_transforms.RandomRotation, ArgsKwargs(degrees=[-20.0, 10.0])), (v2_transforms.AutoAugment, ArgsKwargs(5)), @@ -1095,33 +1095,33 @@ def make_datapoints(self, with_mask=True): def make_label(extra_dims, categories): return torch.randint(categories, extra_dims, dtype=torch.int64) - pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB")) + pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) yield (pil_image, target) - tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB", dtype=torch.float32)) + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32)) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) yield (tensor_image, target) - datapoint_image = make_image(spatial_size=size, color_space="RGB", dtype=torch.float32) + datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: - target["masks"] = make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long) + target["masks"] = make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long) yield (datapoint_image, target) @@ -1203,8 +1203,8 @@ def make_datapoints(self, supports_pil=True, image_dtype=torch.uint8): conv_fns.extend([torch.Tensor, lambda x: x]) for conv_fn in conv_fns: - datapoint_image = make_image(spatial_size=size, color_space="RGB", dtype=image_dtype) - datapoint_mask = make_segmentation_mask(spatial_size=size, num_categories=num_categories, dtype=torch.uint8) + datapoint_image = make_image(size=size, color_space="RGB", dtype=image_dtype) + datapoint_mask = make_segmentation_mask(size=size, num_categories=num_categories, dtype=torch.uint8) dp = (conv_fn(datapoint_image), datapoint_mask) dp_ref = ( diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 57fce82f254..e44cd5e9264 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -316,7 +316,7 @@ def make_input(input_type, *, mask_type="segmentation", **kwargs): elif input_type is PIL.Image.Image: input = F.to_image_pil(input) elif input_type is datapoints.BoundingBox: - input = make_bounding_box() + input = make_bounding_box(**kwargs) elif input_type is datapoints.Mask: make_mask = { "segmentation": make_segmentation_mask, @@ -506,7 +506,7 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, check_kernel( F.resize_image_tensor, - make_input(datapoints.Image, dtype=dtype, device=device, spatial_size=self.INPUT_SIZE), + make_input(datapoints.Image, dtype=dtype, device=device, size=self.INPUT_SIZE), size=size, interpolation=interpolation, **max_size_kwarg, @@ -540,14 +540,14 @@ def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): def test_kernel_mask(self, mask_type): check_kernel( F.resize_mask, - make_input(datapoints.Mask, spatial_size=self.INPUT_SIZE, mask_type=mask_type), + make_input(datapoints.Mask, size=self.INPUT_SIZE, mask_type=mask_type), size=self.OUTPUT_SIZES[-1], ) def test_kernel_video(self): check_kernel( F.resize_video, - make_input(datapoints.Video, spatial_size=self.INPUT_SIZE), + make_input(datapoints.Video, size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True, ) @@ -568,7 +568,7 @@ def test_dispatcher(self, size, input_type, kernel): check_dispatcher( F.resize, kernel, - make_input(input_type, spatial_size=self.INPUT_SIZE), + make_input(input_type, size=self.INPUT_SIZE), size=size, antialias=True, check_scripted_smoke=not isinstance(size, int), @@ -595,7 +595,7 @@ def test_dispatcher_signature(self, kernel, input_type): [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], ) def test_transform(self, size, device, input_type): - input = make_input(input_type, device=device, spatial_size=self.INPUT_SIZE) + input = make_input(input_type, device=device, size=self.INPUT_SIZE) check_transform( transforms.Resize, @@ -619,7 +619,7 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu", spatial_size=self.INPUT_SIZE) + image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu", size=self.INPUT_SIZE) actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) expected = F.to_image_tensor( @@ -662,7 +662,7 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_box = make_input(datapoints.BoundingBox, spatial_size=self.INPUT_SIZE) + bounding_box = make_input(datapoints.BoundingBox, size=self.INPUT_SIZE) actual = fn(bounding_box, size=size, **max_size_kwarg) expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg) @@ -676,7 +676,7 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], ) def test_pil_interpolation_compat_smoke(self, interpolation, input_type): - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + input = make_input(input_type, size=self.INPUT_SIZE) with ( contextlib.nullcontext() @@ -692,9 +692,7 @@ def test_pil_interpolation_compat_smoke(self, interpolation, input_type): def test_dispatcher_pil_antialias_warning(self): with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): - F.resize( - make_input(PIL.Image.Image, spatial_size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False - ) + F.resize(make_input(PIL.Image.Image, size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( @@ -711,7 +709,7 @@ def test_max_size_error(self, size, input_type): match = "size should be an int or a sequence of length 1" with pytest.raises(ValueError, match=match): - F.resize(make_input(input_type, spatial_size=self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + F.resize(make_input(input_type, size=self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( @@ -725,7 +723,7 @@ def test_antialias_warning(self, interpolation, input_type): else assert_no_warnings() ): F.resize( - make_input(input_type, spatial_size=self.INPUT_SIZE), + make_input(input_type, size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], interpolation=interpolation, ) @@ -742,7 +740,7 @@ def test_interpolation_int(self, interpolation, input_type): if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: return - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + input = make_input(input_type, size=self.INPUT_SIZE) expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) actual = F.resize( @@ -763,7 +761,10 @@ def test_transform_unknown_size_error(self): [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], ) def test_noop(self, size, input_type): - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + input = make_input( + input_type, + **{"spatial_size" if issubclass(input_type, datapoints.BoundingBox) else "size": self.INPUT_SIZE}, + ) output = F.resize(input, size=size, antialias=True) @@ -785,7 +786,7 @@ def test_no_regression_5405(self, input_type): # Checks that `max_size` is not ignored if `size == small_edge_size` # See https://github.com/pytorch/vision/issues/5405 - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + input = make_input(input_type, size=self.INPUT_SIZE) size = min(F.get_spatial_size(input)) max_size = size + 1 diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py index ad30c223530..98271b893d6 100644 --- a/test/test_transforms_v2_utils.py +++ b/test/test_transforms_v2_utils.py @@ -11,9 +11,9 @@ from torchvision.transforms.v2.utils import has_all, has_any -IMAGE = make_image(spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") +IMAGE = make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") BOUNDING_BOX = make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size) -MASK = make_detection_mask(spatial_size=IMAGE.spatial_size) +MASK = make_detection_mask(size=IMAGE.spatial_size) @pytest.mark.parametrize( From d07343d43594250b23045015ddba54ac4f559866 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 13:42:27 +0200 Subject: [PATCH 12/17] [PoC] remove make_input uses from TestResize --- test/common_utils.py | 10 +- test/test_transforms_v2_refactored.py | 166 ++++++++++++++------------ 2 files changed, 101 insertions(+), 75 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 6c813f47c03..009d9dfb21a 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -27,7 +27,7 @@ from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair from torchvision import datapoints, io from torchvision.transforms._functional_tensor import _max_value as get_max_value -from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor +from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_pil, to_image_tensor IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) @@ -518,6 +518,14 @@ def make_image( return datapoints.Image(data) +def make_image_tensor(*args, **kwargs): + return make_image(*args, **kwargs).as_subclass(torch.Tensor) + + +def make_image_pil(*args, **kwargs): + return to_image_pil(make_image(*args, **kwargs)) + + def make_image_loader( size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index e44cd5e9264..ad2ddd6e051 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -20,6 +20,8 @@ make_bounding_box, make_detection_mask, make_image, + make_image_pil, + make_image_tensor, make_segmentation_mask, make_video, set_rng_seed, @@ -506,7 +508,7 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, check_kernel( F.resize_image_tensor, - make_input(datapoints.Image, dtype=dtype, device=device, size=self.INPUT_SIZE), + make_image(self.INPUT_SIZE, dtype=dtype, device=device), size=size, interpolation=interpolation, **max_size_kwarg, @@ -524,8 +526,11 @@ def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_box = make_input( - datapoints.BoundingBox, dtype=dtype, device=device, format=format, spatial_size=self.INPUT_SIZE + bounding_box = make_bounding_box( + format=format, + spatial_size=self.INPUT_SIZE, + dtype=dtype, + device=device, ) check_kernel( F.resize_bounding_box, @@ -536,53 +541,44 @@ def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): check_scripted_vs_eager=not isinstance(size, int), ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel( - F.resize_mask, - make_input(datapoints.Mask, size=self.INPUT_SIZE, mask_type=mask_type), - size=self.OUTPUT_SIZES[-1], - ) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1]) def test_kernel_video(self): - check_kernel( - F.resize_video, - make_input(datapoints.Video, size=self.INPUT_SIZE), - size=self.OUTPUT_SIZES[-1], - antialias=True, - ) + check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True) @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.resize_image_tensor), - (PIL.Image.Image, F.resize_image_pil), - (datapoints.Image, F.resize_image_tensor), - (datapoints.BoundingBox, F.resize_bounding_box), - (datapoints.Mask, F.resize_mask), - (datapoints.Video, F.resize_video), + (F.resize_image_tensor, make_image_tensor), + (F.resize_image_pil, make_image_pil), + (F.resize_image_tensor, make_image), + (F.resize_bounding_box, make_bounding_box), + (F.resize_mask, make_segmentation_mask), + (F.resize_video, make_video), ], ) - def test_dispatcher(self, size, input_type, kernel): + def test_dispatcher(self, size, kernel, make_input): check_dispatcher( F.resize, kernel, - make_input(input_type, size=self.INPUT_SIZE), + make_input(self.INPUT_SIZE), size=size, antialias=True, check_scripted_smoke=not isinstance(size, int), ) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.resize_image_tensor), - (PIL.Image.Image, F.resize_image_pil), - (datapoints.Image, F.resize_image_tensor), - (datapoints.BoundingBox, F.resize_bounding_box), - (datapoints.Mask, F.resize_mask), - (datapoints.Video, F.resize_video), + (F.resize_image_tensor, torch.Tensor), + (F.resize_image_pil, PIL.Image.Image), + (F.resize_image_tensor, datapoints.Image), + (F.resize_bounding_box, datapoints.BoundingBox), + (F.resize_mask, datapoints.Mask), + (F.resize_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): @@ -591,18 +587,19 @@ def test_dispatcher_signature(self, kernel, input_type): @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_transform(self, size, device, input_type): - input = make_input(input_type, device=device, size=self.INPUT_SIZE) - - check_transform( - transforms.Resize, - input, - size=size, - antialias=True, - ) + def test_transform(self, size, device, make_input): + check_transform(transforms.Resize, make_input(self.INPUT_SIZE, device=device), size=size, antialias=True) def _check_output_size(self, input, output, *, size, max_size): assert tuple(F.get_spatial_size(output)) == self._compute_output_size( @@ -619,7 +616,7 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu", size=self.INPUT_SIZE) + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) expected = F.to_image_tensor( @@ -662,7 +659,7 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_box = make_input(datapoints.BoundingBox, size=self.INPUT_SIZE) + bounding_box = make_bounding_box(format=format, spatial_size=self.INPUT_SIZE) actual = fn(bounding_box, size=size, **max_size_kwarg) expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg) @@ -672,11 +669,11 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES)) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], ) - def test_pil_interpolation_compat_smoke(self, interpolation, input_type): - input = make_input(input_type, size=self.INPUT_SIZE) + def test_pil_interpolation_compat_smoke(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) with ( contextlib.nullcontext() @@ -696,10 +693,18 @@ def test_dispatcher_pil_antialias_warning(self): @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_max_size_error(self, size, input_type): + def test_max_size_error(self, size, make_input): if isinstance(size, int) or len(size) == 1: max_size = (size if isinstance(size, int) else size[0]) - 1 match = "must be strictly greater than the requested size" @@ -709,39 +714,39 @@ def test_max_size_error(self, size, input_type): match = "size should be an int or a sequence of length 1" with pytest.raises(ValueError, match=match): - F.resize(make_input(input_type, size=self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image, make_video], ) - def test_antialias_warning(self, interpolation, input_type): + def test_antialias_warning(self, interpolation, make_input): with ( assert_warns_antialias_default_value() if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC} else assert_no_warnings() ): F.resize( - make_input(input_type, size=self.INPUT_SIZE), + make_input(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], interpolation=interpolation, ) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], ) - def test_interpolation_int(self, interpolation, input_type): + def test_interpolation_int(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a # difference and thus we don't test it here. - if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: + if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: return - input = make_input(input_type, size=self.INPUT_SIZE) - expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) actual = F.resize( input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True @@ -757,14 +762,19 @@ def test_transform_unknown_size_error(self): "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)] ) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_noop(self, size, input_type): - input = make_input( - input_type, - **{"spatial_size" if issubclass(input_type, datapoints.BoundingBox) else "size": self.INPUT_SIZE}, - ) + def test_noop(self, size, make_input): + input = make_input(**{"spatial_size" if make_input is make_bounding_box else "size": self.INPUT_SIZE}) output = F.resize(input, size=size, antialias=True) @@ -779,14 +789,22 @@ def test_noop(self, size, input_type): assert output is input @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_no_regression_5405(self, input_type): + def test_no_regression_5405(self, make_input): # Checks that `max_size` is not ignored if `size == small_edge_size` # See https://github.com/pytorch/vision/issues/5405 - input = make_input(input_type, size=self.INPUT_SIZE) + input = make_input(self.INPUT_SIZE) size = min(F.get_spatial_size(input)) max_size = size + 1 From df2f87115715a956c75f31dbf33fec3f4b08a9c3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 14:07:11 +0200 Subject: [PATCH 13/17] remove make_input --- test/test_transforms_v2_refactored.py | 288 ++++++++++++-------------- 1 file changed, 129 insertions(+), 159 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index ad2ddd6e051..00d395336f4 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -310,32 +310,6 @@ def wrapper(input, *args, **kwargs): return wrapper -def make_input(input_type, *, mask_type="segmentation", **kwargs): - if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: - input = make_image(**kwargs) - if input_type is torch.Tensor: - input = input.as_subclass(torch.Tensor) - elif input_type is PIL.Image.Image: - input = F.to_image_pil(input) - elif input_type is datapoints.BoundingBox: - input = make_bounding_box(**kwargs) - elif input_type is datapoints.Mask: - make_mask = { - "segmentation": make_segmentation_mask, - "detection": make_detection_mask, - }[mask_type] - input = make_mask(**kwargs) - elif input_type is datapoints.Video: - input = make_video(**kwargs) - else: - raise TypeError( - f"Input type can either be torch.Tensor, PIL.Image.Image, or any TorchVision datapoint class, " - f"but got {input_type} instead." - ) - - return input - - def param_value_parametrization(**kwargs): """Helper function to turn @@ -689,7 +663,7 @@ def test_pil_interpolation_compat_smoke(self, interpolation, make_input): def test_dispatcher_pil_antialias_warning(self): with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): - F.resize(make_input(PIL.Image.Image, size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) + F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( @@ -817,13 +791,13 @@ class TestHorizontalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_image_tensor(self, dtype, device): - check_kernel(F.horizontal_flip_image_tensor, make_input(torch.Tensor, dtype=dtype, device=device)) + check_kernel(F.horizontal_flip_image_tensor, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.horizontal_flip_bounding_box, bounding_box, @@ -831,56 +805,54 @@ def test_kernel_bounding_box(self, format, dtype, device): spatial_size=bounding_box.spatial_size, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.horizontal_flip_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.horizontal_flip_mask, make_mask()) def test_kernel_video(self): - check_kernel(F.horizontal_flip_video, make_input(datapoints.Video)) + check_kernel(F.horizontal_flip_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.horizontal_flip_image_tensor), - (PIL.Image.Image, F.horizontal_flip_image_pil), - (datapoints.Image, F.horizontal_flip_image_tensor), - (datapoints.BoundingBox, F.horizontal_flip_bounding_box), - (datapoints.Mask, F.horizontal_flip_mask), - (datapoints.Video, F.horizontal_flip_video), + (F.horizontal_flip_image_tensor, make_image_tensor), + (F.horizontal_flip_image_pil, make_image_pil), + (F.horizontal_flip_image_tensor, make_image), + (F.horizontal_flip_bounding_box, make_bounding_box), + (F.horizontal_flip_mask, make_segmentation_mask), + (F.horizontal_flip_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.horizontal_flip, kernel, make_input(input_type)) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.horizontal_flip, kernel, make_input()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.horizontal_flip_image_tensor), - (PIL.Image.Image, F.horizontal_flip_image_pil), - (datapoints.Image, F.horizontal_flip_image_tensor), - (datapoints.BoundingBox, F.horizontal_flip_bounding_box), - (datapoints.Mask, F.horizontal_flip_mask), - (datapoints.Video, F.horizontal_flip_video), + (F.horizontal_flip_image_tensor, torch.Tensor), + (F.horizontal_flip_image_pil, PIL.Image.Image), + (F.horizontal_flip_image_tensor, datapoints.Image), + (F.horizontal_flip_bounding_box, datapoints.BoundingBox), + (F.horizontal_flip_mask, datapoints.Mask), + (F.horizontal_flip_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.horizontal_flip, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomHorizontalFlip, input, p=1) + def test_transform(self, make_input, device): + check_transform(transforms.RandomHorizontalFlip, make_input(device=device), p=1) @pytest.mark.parametrize( "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) def test_image_correctness(self, fn): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") actual = fn(image) expected = F.to_image_tensor(F.horizontal_flip(F.to_image_pil(image))) @@ -910,7 +882,7 @@ def _reference_horizontal_flip_bounding_box(self, bounding_box): "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) def test_bounding_box_correctness(self, format, fn): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = fn(bounding_box) expected = self._reference_horizontal_flip_bounding_box(bounding_box) @@ -918,12 +890,12 @@ def test_bounding_box_correctness(self, format, fn): torch.testing.assert_close(actual, expected) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform_noop(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) transform = transforms.RandomHorizontalFlip(p=0) @@ -988,7 +960,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): value = adapt_fill(value, dtype=dtype) self._check_kernel( F.affine_image_tensor, - make_input(torch.Tensor, dtype=dtype, device=device), + make_image(dtype=dtype, device=device), **{param: value}, check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), check_cuda_vs_cpu=dict(atol=1, rtol=0) @@ -1006,58 +978,58 @@ def test_kernel_image_tensor(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, param, value, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, format=format, dtype=dtype, device=device) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) self._check_kernel( F.affine_bounding_box, - make_input(datapoints.BoundingBox, format=format, dtype=dtype, device=device), + bounding_box, format=format, spatial_size=bounding_box.spatial_size, **{param: value}, check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))), ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - self._check_kernel(F.affine_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + self._check_kernel(F.affine_mask, make_mask()) def test_kernel_video(self): - self._check_kernel(F.affine_video, make_input(datapoints.Video)) + self._check_kernel(F.affine_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.affine_image_tensor), - (PIL.Image.Image, F.affine_image_pil), - (datapoints.Image, F.affine_image_tensor), - (datapoints.BoundingBox, F.affine_bounding_box), - (datapoints.Mask, F.affine_mask), - (datapoints.Video, F.affine_video), + (F.affine_image_tensor, make_image_tensor), + (F.affine_image_pil, make_image_pil), + (F.affine_image_tensor, make_image), + (F.affine_bounding_box, make_bounding_box), + (F.affine_mask, make_segmentation_mask), + (F.affine_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.affine, kernel, make_input(input_type), **self._MINIMAL_AFFINE_KWARGS) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.affine, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.affine_image_tensor), - (PIL.Image.Image, F.affine_image_pil), - (datapoints.Image, F.affine_image_tensor), - (datapoints.BoundingBox, F.affine_bounding_box), - (datapoints.Mask, F.affine_mask), - (datapoints.Video, F.affine_video), + (F.affine_image_tensor, torch.Tensor), + (F.affine_image_pil, PIL.Image.Image), + (F.affine_image_tensor, datapoints.Image), + (F.affine_bounding_box, datapoints.BoundingBox), + (F.affine_mask, datapoints.Mask), + (F.affine_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.affine, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform(self, make_input, device): + input = make_input(device=device) check_transform(transforms.RandomAffine, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES) @@ -1071,7 +1043,7 @@ def test_transform(self, input_type, device): ) @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1108,7 +1080,7 @@ def test_functional_image_correctness(self, angle, translate, scale, shear, cent @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_image_correctness(self, center, interpolation, fill, seed): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1172,7 +1144,7 @@ def _reference_affine_bounding_box(self, bounding_box, *, angle, translate, scal @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_box_correctness(self, format, angle, translate, scale, shear, center): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = F.affine( bounding_box, @@ -1197,7 +1169,7 @@ def test_functional_bounding_box_correctness(self, format, angle, translate, sca @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_box_correctness(self, format, center, seed): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center) @@ -1217,7 +1189,7 @@ def test_transform_bounding_box_correctness(self, format, center, seed): @pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"]) @pytest.mark.parametrize("seed", list(range(10))) def test_transform_get_params_bounds(self, degrees, translate, scale, shear, seed): - image = make_input(torch.Tensor) + image = make_image() height, width = F.get_spatial_size(image) transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear) @@ -1298,13 +1270,13 @@ class TestVerticalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_image_tensor(self, dtype, device): - check_kernel(F.vertical_flip_image_tensor, make_input(torch.Tensor, dtype=dtype, device=device)) + check_kernel(F.vertical_flip_image_tensor, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.vertical_flip_bounding_box, bounding_box, @@ -1312,54 +1284,52 @@ def test_kernel_bounding_box(self, format, dtype, device): spatial_size=bounding_box.spatial_size, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.vertical_flip_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.vertical_flip_mask, make_mask()) def test_kernel_video(self): - check_kernel(F.vertical_flip_video, make_input(datapoints.Video)) + check_kernel(F.vertical_flip_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.vertical_flip_image_tensor), - (PIL.Image.Image, F.vertical_flip_image_pil), - (datapoints.Image, F.vertical_flip_image_tensor), - (datapoints.BoundingBox, F.vertical_flip_bounding_box), - (datapoints.Mask, F.vertical_flip_mask), - (datapoints.Video, F.vertical_flip_video), + (F.vertical_flip_image_tensor, make_image_tensor), + (F.vertical_flip_image_pil, make_image_pil), + (F.vertical_flip_image_tensor, make_image), + (F.vertical_flip_bounding_box, make_bounding_box), + (F.vertical_flip_mask, make_segmentation_mask), + (F.vertical_flip_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.vertical_flip, kernel, make_input(input_type)) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.vertical_flip, kernel, make_input()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.vertical_flip_image_tensor), - (PIL.Image.Image, F.vertical_flip_image_pil), - (datapoints.Image, F.vertical_flip_image_tensor), - (datapoints.BoundingBox, F.vertical_flip_bounding_box), - (datapoints.Mask, F.vertical_flip_mask), - (datapoints.Video, F.vertical_flip_video), + (F.vertical_flip_image_tensor, torch.Tensor), + (F.vertical_flip_image_pil, PIL.Image.Image), + (F.vertical_flip_image_tensor, datapoints.Image), + (F.vertical_flip_bounding_box, datapoints.BoundingBox), + (F.vertical_flip_mask, datapoints.Mask), + (F.vertical_flip_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.vertical_flip, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomVerticalFlip, input, p=1) + def test_transform(self, make_input, device): + check_transform(transforms.RandomVerticalFlip, make_input(device=device), p=1) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) def test_image_correctness(self, fn): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") actual = fn(image) expected = F.to_image_tensor(F.vertical_flip(F.to_image_pil(image))) @@ -1387,7 +1357,7 @@ def _reference_vertical_flip_bounding_box(self, bounding_box): @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) def test_bounding_box_correctness(self, format, fn): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = fn(bounding_box) expected = self._reference_vertical_flip_bounding_box(bounding_box) @@ -1395,12 +1365,12 @@ def test_bounding_box_correctness(self, format, fn): torch.testing.assert_close(actual, expected) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform_noop(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) transform = transforms.RandomVerticalFlip(p=0) @@ -1443,7 +1413,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] check_kernel( F.rotate_image_tensor, - make_input(torch.Tensor, dtype=dtype, device=device), + make_image(dtype=dtype, device=device), **kwargs, check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), ) @@ -1461,7 +1431,7 @@ def test_kernel_bounding_box(self, param, value, format, dtype, device): if param != "angle": kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.rotate_bounding_box, @@ -1471,50 +1441,50 @@ def test_kernel_bounding_box(self, param, value, format, dtype, device): **kwargs, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.rotate_mask, make_input(datapoints.Mask, mask_type=mask_type), **self._MINIMAL_AFFINE_KWARGS) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS) def test_kernel_video(self): - check_kernel(F.rotate_video, make_input(datapoints.Video), **self._MINIMAL_AFFINE_KWARGS) + check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.rotate_image_tensor), - (PIL.Image.Image, F.rotate_image_pil), - (datapoints.Image, F.rotate_image_tensor), - (datapoints.BoundingBox, F.rotate_bounding_box), - (datapoints.Mask, F.rotate_mask), - (datapoints.Video, F.rotate_video), + (F.rotate_image_tensor, make_image_tensor), + (F.rotate_image_pil, make_image_pil), + (F.rotate_image_tensor, make_image), + (F.rotate_bounding_box, make_bounding_box), + (F.rotate_mask, make_segmentation_mask), + (F.rotate_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.rotate, kernel, make_input(input_type), **self._MINIMAL_AFFINE_KWARGS) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.rotate, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.rotate_image_tensor), - (PIL.Image.Image, F.rotate_image_pil), - (datapoints.Image, F.rotate_image_tensor), - (datapoints.BoundingBox, F.rotate_bounding_box), - (datapoints.Mask, F.rotate_mask), - (datapoints.Video, F.rotate_video), + (F.rotate_image_tensor, torch.Tensor), + (F.rotate_image_pil, PIL.Image.Image), + (F.rotate_image_tensor, datapoints.Image), + (F.rotate_bounding_box, datapoints.BoundingBox), + (F.rotate_mask, datapoints.Mask), + (F.rotate_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.rotate, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomRotation, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES) + def test_transform(self, make_input, device): + check_transform( + transforms.RandomRotation, make_input(device=device), **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES + ) @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @@ -1524,7 +1494,7 @@ def test_transform(self, input_type, device): @pytest.mark.parametrize("expand", [False, True]) @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) def test_functional_image_correctness(self, angle, center, interpolation, expand, fill): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1546,7 +1516,7 @@ def test_functional_image_correctness(self, angle, center, interpolation, expand @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_image_correctness(self, center, interpolation, expand, fill, seed): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1602,7 +1572,7 @@ def _reference_rotate_bounding_box(self, bounding_box, *, angle, expand, center) @pytest.mark.parametrize("expand", [False]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_box_correctness(self, format, angle, expand, center): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = F.rotate(bounding_box, angle=angle, expand=expand, center=center) expected = self._reference_rotate_bounding_box(bounding_box, angle=angle, expand=expand, center=center) @@ -1615,7 +1585,7 @@ def test_functional_bounding_box_correctness(self, format, angle, expand, center @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_box_correctness(self, format, expand, center, seed): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center) From 181ac2a27441b32ee40d52d249f846c545015429 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 14:22:16 +0200 Subject: [PATCH 14/17] refactor TestResize::test_noop --- test/test_transforms_v2_refactored.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 00d395336f4..69180b99dbc 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -748,9 +748,9 @@ def test_transform_unknown_size_error(self): ], ) def test_noop(self, size, make_input): - input = make_input(**{"spatial_size" if make_input is make_bounding_box else "size": self.INPUT_SIZE}) + input = make_input(self.INPUT_SIZE) - output = F.resize(input, size=size, antialias=True) + output = F.resize(input, size=F.get_spatial_size(input), antialias=True) # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there # is a good reason to break this, feel free to downgrade to an equality check. From ee1754a9591b66610af8758261cf47976bf068f8 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 14:35:12 +0200 Subject: [PATCH 15/17] cleanup --- test/test_prototype_transforms.py | 17 ++++++++--------- test/test_transforms_v2.py | 11 +++++------ test/test_transforms_v2_consistency.py | 11 ++--------- test/test_transforms_v2_utils.py | 4 ++-- 4 files changed, 17 insertions(+), 26 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index cfbcc7c0557..c574979e22c 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -9,7 +9,6 @@ from common_utils import ( assert_equal, DEFAULT_EXTRA_DIMS, - DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image, @@ -80,8 +79,8 @@ def test_mixup_cutmix(transform, input): for unsup_data in [ make_label(), make_bounding_box(format="XYXY"), - make_detection_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE), - make_segmentation_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_detection_mask(), + make_segmentation_mask(), ]: input_copy["unsupported"] = unsup_data with pytest.raises(TypeError, match=err_msg): @@ -390,9 +389,9 @@ class TestPermuteDimensions: ) def test_call(self, dims, inverse_dims): sample = dict( - image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE), - bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), - video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE), + image=make_image(), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), + video=make_video(), str="str", int=0, ) @@ -434,9 +433,9 @@ class TestTransposeDimensions: ) def test_call(self, dims): sample = dict( - image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE), - bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE), - video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE), + image=make_image(), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), + video=make_video(), str="str", int=0, ) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 0f6c4cc3b2e..3743581794f 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -17,7 +17,6 @@ assert_equal, assert_run_python_script, cpu_and_cuda, - DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_bounding_boxes, make_detection_mask, @@ -168,8 +167,8 @@ class TestSmoke: @pytest.mark.parametrize( "image_or_video", [ - make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), - make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(), + make_video(), next(make_pil_images(color_spaces=["RGB"])), next(make_vanilla_tensor_images()), ], @@ -353,7 +352,7 @@ def test_random_resized_crop(self, transform, input): next(make_vanilla_tensor_images()), next(make_vanilla_tensor_images()), next(make_pil_images()), - make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), + make_image(), next(make_videos()), ], 3, @@ -1347,8 +1346,8 @@ class TestToDtype: ) def test_call(self, dtype, expected_dtypes): sample = dict( - video=make_video(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.int64), - image=make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.uint8), + video=make_video(dtype=torch.int64), + image=make_image(dtype=torch.uint8), bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), str="str", int=0, diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 3f631d7ac94..bf297473bc2 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -17,7 +17,6 @@ ArgsKwargs, assert_close, assert_equal, - DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image, @@ -709,14 +708,8 @@ def test_call_consistency(config, args_kwargs): id=transform_cls.__name__, ) for transform_cls, get_params_args_kwargs in [ - ( - v2_transforms.RandomResizedCrop, - ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=[0.3, 0.7], ratio=[0.5, 1.5]), - ), - ( - v2_transforms.RandomErasing, - ArgsKwargs(make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE), scale=(0.3, 0.7), ratio=(0.5, 1.5)), - ), + (v2_transforms.RandomResizedCrop, ArgsKwargs(make_image(), scale=[0.3, 0.7], ratio=[0.5, 1.5])), + (v2_transforms.RandomErasing, ArgsKwargs(make_image(), scale=(0.3, 0.7), ratio=(0.5, 1.5))), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.ElasticTransform, ArgsKwargs(alpha=[15.3, 27.2], sigma=[2.5, 3.9], size=[17, 31])), (v2_transforms.GaussianBlur, ArgsKwargs(0.3, 1.4)), diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py index 98271b893d6..198ab39a475 100644 --- a/test/test_transforms_v2_utils.py +++ b/test/test_transforms_v2_utils.py @@ -4,14 +4,14 @@ import torch import torchvision.transforms.v2.utils -from common_utils import DEFAULT_PORTRAIT_SPATIAL_SIZE, make_bounding_box, make_detection_mask, make_image +from common_utils import make_bounding_box, make_detection_mask, make_image from torchvision import datapoints from torchvision.transforms.v2.functional import to_image_pil from torchvision.transforms.v2.utils import has_all, has_any -IMAGE = make_image(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, color_space="RGB") +IMAGE = make_image(color_space="RGB") BOUNDING_BOX = make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, spatial_size=IMAGE.spatial_size) MASK = make_detection_mask(size=IMAGE.spatial_size) From ba94e48d2c99a01643691b3f9154ce92e62184a9 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 16:22:00 +0200 Subject: [PATCH 16/17] add comment --- test/common_utils.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 009d9dfb21a..72ecf104301 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -633,6 +633,32 @@ def make_bounding_box( dtype=None, device="cpu", ): + """ + size: Size of the actual bounding box, i.e. + - (box[3] - box[1], box[2] - box[0]) for XYXY + - (H, W) for XYWH and CXCYWH + spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on + returned datapoints.BoundingBox + + To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker + functions, e.g. + + .. code:: + + image = make_image=(size=size) + bounding_box = make_bounding_box(spatial_size=size) + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + + For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all + other maker functions, e.g. + + .. code:: + + image = make_image=() + bounding_box = make_bounding_box() + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + """ + def sample_position(values, max_value): # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. # However, if we have batch_dims, we need tensors as limits. @@ -648,7 +674,6 @@ def sample_position(values, max_value): height, width = size height_margin, width_margin = torch.randint(10, (2,)).tolist() spatial_size = (height + height_margin, width + width_margin) - spatial_height, spatial_width = spatial_size dtype = dtype or torch.float32 @@ -658,13 +683,12 @@ def sample_position(values, max_value): ) if size is None: - h = torch.randint(1, spatial_height - 1, batch_dims) - w = torch.randint(1, spatial_width - 1, batch_dims) + h, w = [torch.randint(1, s, batch_dims) for s in spatial_size] else: - h, w = [torch.full(batch_dims, v, dtype=torch.int) for v in size] + h, w = [torch.full(batch_dims, s, dtype=torch.int) for s in size] - y = sample_position(h, spatial_height) - x = sample_position(w, spatial_width) + y = sample_position(h, spatial_size[0]) + x = sample_position(w, spatial_size[1]) if format is datapoints.BoundingBoxFormat.XYWH: parts = (x, y, w, h) From 519f0fad9af28e8dc95206df449ffb40cd5ac104 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 5 Jul 2023 20:39:13 +0200 Subject: [PATCH 17/17] remove obsolete tests --- test/test_transforms_v2_functional.py | 157 -------------------------- 1 file changed, 157 deletions(-) diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 79ea20d854e..465cc227107 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -665,163 +665,6 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): return true_matrix -@pytest.mark.parametrize("angle", range(-90, 90, 56)) -@pytest.mark.parametrize("expand, center", [(True, None), (False, None), (False, (12, 14))]) -def test_correctness_rotate_bounding_box(angle, expand, center): - def _compute_expected_bbox(bbox, angle_, expand_, center_): - affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) - affine_matrix = affine_matrix[:2, :] - - height, width = bbox.spatial_size - bbox_xyxy = convert_format_bounding_box(bbox, new_format=datapoints.BoundingBoxFormat.XYXY) - points = np.array( - [ - [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], - # image frame - [0.0, 0.0, 1.0], - [0.0, height, 1.0], - [width, height, 1.0], - [width, 0.0, 1.0], - ] - ) - transformed_points = np.matmul(points, affine_matrix.T) - out_bbox = [ - float(np.min(transformed_points[:4, 0])), - float(np.min(transformed_points[:4, 1])), - float(np.max(transformed_points[:4, 0])), - float(np.max(transformed_points[:4, 1])), - ] - if expand_: - tr_x = np.min(transformed_points[4:, 0]) - tr_y = np.min(transformed_points[4:, 1]) - out_bbox[0] -= tr_x - out_bbox[1] -= tr_y - out_bbox[2] -= tr_x - out_bbox[3] -= tr_y - - height = int(height - 2 * tr_y) - width = int(width - 2 * tr_x) - - out_bbox = datapoints.BoundingBox( - out_bbox, - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(height, width), - dtype=bbox.dtype, - device=bbox.device, - ) - out_bbox = clamp_bounding_box(convert_format_bounding_box(out_bbox, new_format=bbox.format)) - return out_bbox, (height, width) - - spatial_size = (32, 38) - - for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)): - bboxes_format = bboxes.format - bboxes_spatial_size = bboxes.spatial_size - - output_bboxes, output_spatial_size = F.rotate_bounding_box( - bboxes.as_subclass(torch.Tensor), - format=bboxes_format, - spatial_size=bboxes_spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - center_ = center - if center_ is None: - center_ = [s * 0.5 for s in bboxes_spatial_size[::-1]] - - if bboxes.ndim < 2: - bboxes = [bboxes] - - expected_bboxes = [] - for bbox in bboxes: - bbox = datapoints.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) - expected_bbox, expected_spatial_size = _compute_expected_bbox(bbox, -angle, expand, center_) - expected_bboxes.append(expected_bbox) - if len(expected_bboxes) > 1: - expected_bboxes = torch.stack(expected_bboxes) - else: - expected_bboxes = expected_bboxes[0] - torch.testing.assert_close(output_bboxes, expected_bboxes, atol=1, rtol=0) - torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize("expand", [False]) # expand=True does not match D2 -def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): - # Check transformation against known expected output - format = datapoints.BoundingBoxFormat.XYXY - spatial_size = (64, 64) - # xyxy format - in_boxes = [ - [1, 1, 5, 5], - [1, spatial_size[0] - 6, 5, spatial_size[0] - 2], - [spatial_size[1] - 6, spatial_size[0] - 6, spatial_size[1] - 2, spatial_size[0] - 2], - [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], - ] - in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device) - # Tested parameters - angle = 45 - center = None if expand else [12, 23] - - # # Expected bboxes computed using Detectron2: - # from detectron2.data.transforms import RotationTransform, AugmentationList - # from detectron2.data.transforms import AugInput - # import cv2 - # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32")) - # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ]) - # out = augs(inpt) - # print(inpt.boxes) - if expand: - expected_bboxes = [ - [1.65937957, 42.67157288, 7.31623382, 48.32842712], - [41.96446609, 82.9766594, 47.62132034, 88.63351365], - [82.26955262, 42.67157288, 87.92640687, 48.32842712], - [31.35786438, 31.35786438, 59.64213562, 59.64213562], - ] - else: - expected_bboxes = [ - [-11.33452378, 12.39339828, -5.67766953, 18.05025253], - [28.97056275, 52.69848481, 34.627417, 58.35533906], - [69.27564928, 12.39339828, 74.93250353, 18.05025253], - [18.36396103, 1.07968978, 46.64823228, 29.36396103], - ] - expected_bboxes = clamp_bounding_box( - datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size) - ).tolist() - - output_boxes, _ = F.rotate_bounding_box( - in_boxes, - format=format, - spatial_size=spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -def test_correctness_rotate_segmentation_mask_on_fixed_input(device): - # Check transformation against known expected output and CPU/CUDA devices - - # Create a fixed input segmentation mask with 2 square masks - # in top-left, bottom-left corners - mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device) - mask[0, 2:10, 2:10] = 1 - mask[0, 32 - 9 : 32 - 3, 3:9] = 2 - - # Rotate 90 degrees - expected_mask = torch.rot90(mask, k=1, dims=(-2, -1)) - out_mask = F.rotate_mask(mask, 90, expand=False) - torch.testing.assert_close(out_mask, expected_mask) - - @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "format",