diff --git a/test/common_utils.py b/test/common_utils.py index abefd07c43d..72ecf104301 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -27,7 +27,7 @@ from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair from torchvision import datapoints, io from torchvision.transforms._functional_tensor import _max_value as get_max_value -from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor +from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_pil, to_image_tensor IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) @@ -399,6 +399,9 @@ def load(self, device="cpu"): ) +# new v2 default +DEFAULT_SIZE = (17, 11) +# old v2 defaults DEFAULT_SQUARE_SPATIAL_SIZE = 15 DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) @@ -406,13 +409,12 @@ def load(self, device="cpu"): DEFAULT_LANDSCAPE_SPATIAL_SIZE, DEFAULT_PORTRAIT_SPATIAL_SIZE, DEFAULT_SQUARE_SPATIAL_SIZE, - "random", ) def _parse_spatial_size(size, *, name="size"): if size == "random": - return tuple(torch.randint(15, 33, (2,)).tolist()) + raise ValueError("This should never happen") elif isinstance(size, int) and size > 0: return (size, size) elif ( @@ -492,8 +494,40 @@ def get_num_channels(color_space): return num_channels +def make_image( + size=DEFAULT_SIZE, + *, + color_space="RGB", + batch_dims=(), + dtype=None, + device="cpu", + memory_format=torch.contiguous_format, +): + max_value = get_max_value(dtype) + data = torch.testing.make_tensor( + (*batch_dims, get_num_channels(color_space), *size), + low=0, + high=max_value, + dtype=dtype or torch.uint8, + device=device, + memory_format=memory_format, + ) + if color_space in {"GRAY_ALPHA", "RGBA"}: + data[..., -1, :, :] = max_value + + return datapoints.Image(data) + + +def make_image_tensor(*args, **kwargs): + return make_image(*args, **kwargs).as_subclass(torch.Tensor) + + +def make_image_pil(*args, **kwargs): + return to_image_pil(make_image(*args, **kwargs)) + + def make_image_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", extra_dims=(), @@ -501,24 +535,25 @@ def make_image_loader( constant_alpha=True, memory_format=torch.contiguous_format, ): + if not constant_alpha: + raise ValueError("This should never happen") size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) def fn(shape, dtype, device, memory_format): - max_value = get_max_value(dtype) - data = torch.testing.make_tensor( - shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format + *batch_dims, _, height, width = shape + return make_image( + (height, width), + color_space=color_space, + batch_dims=batch_dims, + dtype=dtype, + device=device, + memory_format=memory_format, ) - if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: - data[..., -1, :, :] = max_value - return datapoints.Image(data) return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format) -make_image = from_loader(make_image_loader) - - def make_image_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, @@ -540,7 +575,7 @@ def make_image_loaders( def make_image_loader_for_interpolation( - size="random", *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format + size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format ): size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) @@ -589,76 +624,114 @@ class BoundingBoxLoader(TensorLoader): spatial_size: Tuple[int, int] -def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): - low, high = torch.broadcast_tensors( - *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))] +def make_bounding_box( + size=None, + *, + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=None, + batch_dims=(), + dtype=None, + device="cpu", +): + """ + size: Size of the actual bounding box, i.e. + - (box[3] - box[1], box[2] - box[0]) for XYXY + - (H, W) for XYWH and CXCYWH + spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on + returned datapoints.BoundingBox + + To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker + functions, e.g. + + .. code:: + + image = make_image=(size=size) + bounding_box = make_bounding_box(spatial_size=size) + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + + For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all + other maker functions, e.g. + + .. code:: + + image = make_image=() + bounding_box = make_bounding_box() + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + """ + + def sample_position(values, max_value): + # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. + # However, if we have batch_dims, we need tensors as limits. + return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape) + + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + + if spatial_size is None: + if size is None: + spatial_size = DEFAULT_SIZE + else: + height, width = size + height_margin, width_margin = torch.randint(10, (2,)).tolist() + spatial_size = (height + height_margin, width + width_margin) + + dtype = dtype or torch.float32 + + if any(dim == 0 for dim in batch_dims): + return datapoints.BoundingBox( + torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + if size is None: + h, w = [torch.randint(1, s, batch_dims) for s in spatial_size] + else: + h, w = [torch.full(batch_dims, s, dtype=torch.int) for s in size] + + y = sample_position(h, spatial_size[0]) + x = sample_position(w, spatial_size[1]) + + if format is datapoints.BoundingBoxFormat.XYWH: + parts = (x, y, w, h) + elif format is datapoints.BoundingBoxFormat.XYXY: + x1, y1 = x, y + x2 = x1 + w + y2 = y1 + h + parts = (x1, y1, x2, y2) + elif format is datapoints.BoundingBoxFormat.CXCYWH: + cx = x + w / 2 + cy = y + h / 2 + parts = (cx, cy, w, h) + else: + raise ValueError(f"Format {format} is not supported") + + return datapoints.BoundingBox( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size ) - return torch.stack( - [ - torch.randint(low_scalar, high_scalar, (), **kwargs) - for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist()) - ] - ).reshape(low.shape) -def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] - if format not in { - datapoints.BoundingBoxFormat.XYXY, - datapoints.BoundingBoxFormat.XYWH, - datapoints.BoundingBoxFormat.CXCYWH, - }: - raise pytest.UsageError(f"Can't make bounding box in format {format}") spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") def fn(shape, dtype, device): - *extra_dims, num_coordinates = shape + *batch_dims, num_coordinates = shape if num_coordinates != 4: raise pytest.UsageError() - if any(dim == 0 for dim in extra_dims): - return datapoints.BoundingBox( - torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size - ) - - height, width = spatial_size - - if format == datapoints.BoundingBoxFormat.XYXY: - x1 = torch.randint(0, width // 2, extra_dims) - y1 = torch.randint(0, height // 2, extra_dims) - x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 - y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 - parts = (x1, y1, x2, y2) - elif format == datapoints.BoundingBoxFormat.XYWH: - x = torch.randint(0, width // 2, extra_dims) - y = torch.randint(0, height // 2, extra_dims) - w = randint_with_tensor_bounds(1, width - x) - h = randint_with_tensor_bounds(1, height - y) - parts = (x, y, w, h) - else: # format == features.BoundingBoxFormat.CXCYWH: - cx = torch.randint(1, width - 1, extra_dims) - cy = torch.randint(1, height - 1, extra_dims) - w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) - h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) - parts = (cx, cy, w, h) - - return datapoints.BoundingBox( - torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size + return make_bounding_box( + format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device ) return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) -make_bounding_box = from_loader(make_bounding_box_loader) - - def make_bounding_box_loaders( *, extra_dims=DEFAULT_EXTRA_DIMS, formats=tuple(datapoints.BoundingBoxFormat), - spatial_size="random", + spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtypes=(torch.float32, torch.float64, torch.int64), ): for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): @@ -672,24 +745,35 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): +def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): + """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, num_objects, *size), + low=0, + high=2, + dtype=dtype or torch.bool, + device=device, + ) + ) + + +def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects size = _parse_spatial_size(size) - num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device) - return datapoints.Mask(data) + *batch_dims, num_objects, height, width = shape + return make_detection_mask( + (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device + ) return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) -make_detection_mask = from_loader(make_detection_mask_loader) - - def make_detection_mask_loaders( sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), + num_objects=(1, 0, 5), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -700,25 +784,38 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): - # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_spatial_size(size) - num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories +def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): + """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, *size), + low=0, + high=num_categories, + dtype=dtype or torch.uint8, + device=device, + ) + ) - def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device) - return datapoints.Mask(data) - return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) +def make_segmentation_mask_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8 +): + # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values + spatial_size = _parse_spatial_size(size) + def fn(shape, dtype, device): + *batch_dims, height, width = shape + return make_segmentation_mask( + (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device + ) -make_segmentation_mask = from_loader(make_segmentation_mask_loader) + return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype) def make_segmentation_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_categories=(1, 2, "random"), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -732,8 +829,8 @@ def make_segmentation_mask_loaders( def make_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), - num_categories=(1, 2, "random"), + num_objects=(1, 0, 5), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -750,29 +847,35 @@ class VideoLoader(ImageLoader): pass +def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): + return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) + + def make_video_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", - num_frames="random", + num_frames=3, extra_dims=(), dtype=torch.uint8, ): size = _parse_spatial_size(size) - num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames def fn(shape, dtype, device, memory_format): - video = make_image( - size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format + *batch_dims, num_frames, _, height, width = shape + return make_video( + (height, width), + num_frames=num_frames, + batch_dims=batch_dims, + color_space=color_space, + dtype=dtype, + device=device, + memory_format=memory_format, ) - return datapoints.Video(video) return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) -make_video = from_loader(make_video_loader) - - def make_video_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, @@ -780,7 +883,7 @@ def make_video_loaders( "GRAY", "RGB", ), - num_frames=(1, 0, "random"), + num_frames=(1, 0, 3), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8, torch.float32, torch.float64), ): diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 255c3b5c32f..c574979e22c 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -216,7 +216,7 @@ def test__get_params(self, mocker): flat_inputs = [ make_image(size=spatial_size, color_space="RGB"), - make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape), + make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -312,9 +312,9 @@ def test__transform_culling(self, mocker): ) bounding_boxes = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) - masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,)) + masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -350,7 +350,7 @@ def test__transform_bounding_box_clamping(self, mocker): ) bounding_box = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box") @@ -496,7 +496,7 @@ def make_datapoints(): pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -505,7 +505,7 @@ def make_datapoints(): tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -514,7 +514,7 @@ def make_datapoints(): datapoint_image = make_image(size=size, color_space="RGB") target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 093c378aa72..3743581794f 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -182,13 +182,13 @@ def test_common(self, transform, adapter, container_type, image_or_video, device video_datapoint=make_video(size=spatial_size), image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), bounding_box_xyxy=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,) + format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,) ), bounding_box_xywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,) + format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,) ), bounding_box_cxcywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,) + format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,) ), bounding_box_degenerate_xyxy=datapoints.BoundingBox( [ @@ -289,7 +289,7 @@ def test_common(self, transform, adapter, container_type, image_or_video, device ], dtypes=[torch.uint8], extra_dims=[(), (4,)], - **(dict(num_frames=["random"]) if fn is make_videos else dict()), + **(dict(num_frames=[3]) if fn is make_videos else dict()), ) for fn in [ make_images, @@ -1124,7 +1124,7 @@ def test__transform(self, mocker): transform = transforms.RandomIoUCrop() image = datapoints.Image(torch.rand(3, 32, 24)) - bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) + bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), batch_dims=(6,)) masks = make_detection_mask((32, 24), num_objects=6) sample = [image, bboxes, masks] diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index f035dde45ed..bf297473bc2 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -1090,7 +1090,7 @@ def make_label(extra_dims, categories): pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1098,9 +1098,9 @@ def make_label(extra_dims, categories): yield (pil_image, target) - tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32)) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1108,9 +1108,9 @@ def make_label(extra_dims, categories): yield (tensor_image, target) - datapoint_image = make_image(size=size, color_space="RGB") + datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 79ea20d854e..465cc227107 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -665,163 +665,6 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): return true_matrix -@pytest.mark.parametrize("angle", range(-90, 90, 56)) -@pytest.mark.parametrize("expand, center", [(True, None), (False, None), (False, (12, 14))]) -def test_correctness_rotate_bounding_box(angle, expand, center): - def _compute_expected_bbox(bbox, angle_, expand_, center_): - affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) - affine_matrix = affine_matrix[:2, :] - - height, width = bbox.spatial_size - bbox_xyxy = convert_format_bounding_box(bbox, new_format=datapoints.BoundingBoxFormat.XYXY) - points = np.array( - [ - [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], - # image frame - [0.0, 0.0, 1.0], - [0.0, height, 1.0], - [width, height, 1.0], - [width, 0.0, 1.0], - ] - ) - transformed_points = np.matmul(points, affine_matrix.T) - out_bbox = [ - float(np.min(transformed_points[:4, 0])), - float(np.min(transformed_points[:4, 1])), - float(np.max(transformed_points[:4, 0])), - float(np.max(transformed_points[:4, 1])), - ] - if expand_: - tr_x = np.min(transformed_points[4:, 0]) - tr_y = np.min(transformed_points[4:, 1]) - out_bbox[0] -= tr_x - out_bbox[1] -= tr_y - out_bbox[2] -= tr_x - out_bbox[3] -= tr_y - - height = int(height - 2 * tr_y) - width = int(width - 2 * tr_x) - - out_bbox = datapoints.BoundingBox( - out_bbox, - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(height, width), - dtype=bbox.dtype, - device=bbox.device, - ) - out_bbox = clamp_bounding_box(convert_format_bounding_box(out_bbox, new_format=bbox.format)) - return out_bbox, (height, width) - - spatial_size = (32, 38) - - for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)): - bboxes_format = bboxes.format - bboxes_spatial_size = bboxes.spatial_size - - output_bboxes, output_spatial_size = F.rotate_bounding_box( - bboxes.as_subclass(torch.Tensor), - format=bboxes_format, - spatial_size=bboxes_spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - center_ = center - if center_ is None: - center_ = [s * 0.5 for s in bboxes_spatial_size[::-1]] - - if bboxes.ndim < 2: - bboxes = [bboxes] - - expected_bboxes = [] - for bbox in bboxes: - bbox = datapoints.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) - expected_bbox, expected_spatial_size = _compute_expected_bbox(bbox, -angle, expand, center_) - expected_bboxes.append(expected_bbox) - if len(expected_bboxes) > 1: - expected_bboxes = torch.stack(expected_bboxes) - else: - expected_bboxes = expected_bboxes[0] - torch.testing.assert_close(output_bboxes, expected_bboxes, atol=1, rtol=0) - torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize("expand", [False]) # expand=True does not match D2 -def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): - # Check transformation against known expected output - format = datapoints.BoundingBoxFormat.XYXY - spatial_size = (64, 64) - # xyxy format - in_boxes = [ - [1, 1, 5, 5], - [1, spatial_size[0] - 6, 5, spatial_size[0] - 2], - [spatial_size[1] - 6, spatial_size[0] - 6, spatial_size[1] - 2, spatial_size[0] - 2], - [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], - ] - in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device) - # Tested parameters - angle = 45 - center = None if expand else [12, 23] - - # # Expected bboxes computed using Detectron2: - # from detectron2.data.transforms import RotationTransform, AugmentationList - # from detectron2.data.transforms import AugInput - # import cv2 - # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32")) - # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ]) - # out = augs(inpt) - # print(inpt.boxes) - if expand: - expected_bboxes = [ - [1.65937957, 42.67157288, 7.31623382, 48.32842712], - [41.96446609, 82.9766594, 47.62132034, 88.63351365], - [82.26955262, 42.67157288, 87.92640687, 48.32842712], - [31.35786438, 31.35786438, 59.64213562, 59.64213562], - ] - else: - expected_bboxes = [ - [-11.33452378, 12.39339828, -5.67766953, 18.05025253], - [28.97056275, 52.69848481, 34.627417, 58.35533906], - [69.27564928, 12.39339828, 74.93250353, 18.05025253], - [18.36396103, 1.07968978, 46.64823228, 29.36396103], - ] - expected_bboxes = clamp_bounding_box( - datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size) - ).tolist() - - output_boxes, _ = F.rotate_bounding_box( - in_boxes, - format=format, - spatial_size=spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -def test_correctness_rotate_segmentation_mask_on_fixed_input(device): - # Check transformation against known expected output and CPU/CUDA devices - - # Create a fixed input segmentation mask with 2 square masks - # in top-left, bottom-left corners - mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device) - mask[0, 2:10, 2:10] = 1 - mask[0, 32 - 9 : 32 - 3, 3:9] = 2 - - # Rotate 90 degrees - expected_mask = torch.rot90(mask, k=1, dims=(-2, -1)) - out_mask = F.rotate_mask(mask, 90, expand=False) - torch.testing.assert_close(out_mask, expected_mask) - - @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "format", diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 2130a8cf50a..69180b99dbc 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -20,6 +20,8 @@ make_bounding_box, make_detection_mask, make_image, + make_image_pil, + make_image_tensor, make_segmentation_mask, make_video, set_rng_seed, @@ -308,42 +310,6 @@ def wrapper(input, *args, **kwargs): return wrapper -def make_input(input_type, *, dtype=None, device="cpu", spatial_size=(17, 11), mask_type="segmentation", **kwargs): - if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: - input = make_image(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs) - if input_type is torch.Tensor: - input = input.as_subclass(torch.Tensor) - elif input_type is PIL.Image.Image: - input = F.to_image_pil(input) - elif input_type is datapoints.BoundingBox: - kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY) - input = make_bounding_box( - dtype=dtype or torch.float32, - device=device, - spatial_size=spatial_size, - **kwargs, - ) - elif input_type is datapoints.Mask: - if mask_type == "segmentation": - make_mask = make_segmentation_mask - default_dtype = torch.uint8 - elif mask_type == "detection": - make_mask = make_detection_mask - default_dtype = torch.bool - else: - raise ValueError(f"`mask_type` can be `'segmentation'` or `'detection'`, but got {mask_type}.") - input = make_mask(size=spatial_size, dtype=dtype or default_dtype, device=device, **kwargs) - elif input_type is datapoints.Video: - input = make_video(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs) - else: - raise TypeError( - f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, " - f"but got {input_type} instead." - ) - - return input - - def param_value_parametrization(**kwargs): """Helper function to turn @@ -516,7 +482,7 @@ def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, check_kernel( F.resize_image_tensor, - make_input(datapoints.Image, dtype=dtype, device=device, spatial_size=self.INPUT_SIZE), + make_image(self.INPUT_SIZE, dtype=dtype, device=device), size=size, interpolation=interpolation, **max_size_kwarg, @@ -534,8 +500,11 @@ def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_box = make_input( - datapoints.BoundingBox, dtype=dtype, device=device, format=format, spatial_size=self.INPUT_SIZE + bounding_box = make_bounding_box( + format=format, + spatial_size=self.INPUT_SIZE, + dtype=dtype, + device=device, ) check_kernel( F.resize_bounding_box, @@ -546,53 +515,44 @@ def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): check_scripted_vs_eager=not isinstance(size, int), ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel( - F.resize_mask, - make_input(datapoints.Mask, spatial_size=self.INPUT_SIZE, mask_type=mask_type), - size=self.OUTPUT_SIZES[-1], - ) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1]) def test_kernel_video(self): - check_kernel( - F.resize_video, - make_input(datapoints.Video, spatial_size=self.INPUT_SIZE), - size=self.OUTPUT_SIZES[-1], - antialias=True, - ) + check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True) @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.resize_image_tensor), - (PIL.Image.Image, F.resize_image_pil), - (datapoints.Image, F.resize_image_tensor), - (datapoints.BoundingBox, F.resize_bounding_box), - (datapoints.Mask, F.resize_mask), - (datapoints.Video, F.resize_video), + (F.resize_image_tensor, make_image_tensor), + (F.resize_image_pil, make_image_pil), + (F.resize_image_tensor, make_image), + (F.resize_bounding_box, make_bounding_box), + (F.resize_mask, make_segmentation_mask), + (F.resize_video, make_video), ], ) - def test_dispatcher(self, size, input_type, kernel): + def test_dispatcher(self, size, kernel, make_input): check_dispatcher( F.resize, kernel, - make_input(input_type, spatial_size=self.INPUT_SIZE), + make_input(self.INPUT_SIZE), size=size, antialias=True, check_scripted_smoke=not isinstance(size, int), ) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.resize_image_tensor), - (PIL.Image.Image, F.resize_image_pil), - (datapoints.Image, F.resize_image_tensor), - (datapoints.BoundingBox, F.resize_bounding_box), - (datapoints.Mask, F.resize_mask), - (datapoints.Video, F.resize_video), + (F.resize_image_tensor, torch.Tensor), + (F.resize_image_pil, PIL.Image.Image), + (F.resize_image_tensor, datapoints.Image), + (F.resize_bounding_box, datapoints.BoundingBox), + (F.resize_mask, datapoints.Mask), + (F.resize_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): @@ -601,18 +561,19 @@ def test_dispatcher_signature(self, kernel, input_type): @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_transform(self, size, device, input_type): - input = make_input(input_type, device=device, spatial_size=self.INPUT_SIZE) - - check_transform( - transforms.Resize, - input, - size=size, - antialias=True, - ) + def test_transform(self, size, device, make_input): + check_transform(transforms.Resize, make_input(self.INPUT_SIZE, device=device), size=size, antialias=True) def _check_output_size(self, input, output, *, size, max_size): assert tuple(F.get_spatial_size(output)) == self._compute_output_size( @@ -629,7 +590,7 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu", spatial_size=self.INPUT_SIZE) + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) expected = F.to_image_tensor( @@ -672,7 +633,7 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_box = make_input(datapoints.BoundingBox, spatial_size=self.INPUT_SIZE) + bounding_box = make_bounding_box(format=format, spatial_size=self.INPUT_SIZE) actual = fn(bounding_box, size=size, **max_size_kwarg) expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg) @@ -682,11 +643,11 @@ def test_bounding_box_correctness(self, format, size, use_max_size, fn): @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES)) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], ) - def test_pil_interpolation_compat_smoke(self, interpolation, input_type): - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + def test_pil_interpolation_compat_smoke(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) with ( contextlib.nullcontext() @@ -702,16 +663,22 @@ def test_pil_interpolation_compat_smoke(self, interpolation, input_type): def test_dispatcher_pil_antialias_warning(self): with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): - F.resize( - make_input(PIL.Image.Image, spatial_size=self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False - ) + F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_max_size_error(self, size, input_type): + def test_max_size_error(self, size, make_input): if isinstance(size, int) or len(size) == 1: max_size = (size if isinstance(size, int) else size[0]) - 1 match = "must be strictly greater than the requested size" @@ -721,39 +688,39 @@ def test_max_size_error(self, size, input_type): match = "size should be an int or a sequence of length 1" with pytest.raises(ValueError, match=match): - F.resize(make_input(input_type, spatial_size=self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image, make_video], ) - def test_antialias_warning(self, interpolation, input_type): + def test_antialias_warning(self, interpolation, make_input): with ( assert_warns_antialias_default_value() if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC} else assert_no_warnings() ): F.resize( - make_input(input_type, spatial_size=self.INPUT_SIZE), + make_input(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], interpolation=interpolation, ) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], ) - def test_interpolation_int(self, interpolation, input_type): + def test_interpolation_int(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a # difference and thus we don't test it here. - if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: + if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: return - input = make_input(input_type, spatial_size=self.INPUT_SIZE) - expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) actual = F.resize( input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True @@ -769,13 +736,21 @@ def test_transform_unknown_size_error(self): "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)] ) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_noop(self, size, input_type): - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + def test_noop(self, size, make_input): + input = make_input(self.INPUT_SIZE) - output = F.resize(input, size=size, antialias=True) + output = F.resize(input, size=F.get_spatial_size(input), antialias=True) # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there # is a good reason to break this, feel free to downgrade to an equality check. @@ -788,14 +763,22 @@ def test_noop(self, size, input_type): assert output is input @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], ) - def test_no_regression_5405(self, input_type): + def test_no_regression_5405(self, make_input): # Checks that `max_size` is not ignored if `size == small_edge_size` # See https://github.com/pytorch/vision/issues/5405 - input = make_input(input_type, spatial_size=self.INPUT_SIZE) + input = make_input(self.INPUT_SIZE) size = min(F.get_spatial_size(input)) max_size = size + 1 @@ -808,13 +791,13 @@ class TestHorizontalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_image_tensor(self, dtype, device): - check_kernel(F.horizontal_flip_image_tensor, make_input(torch.Tensor, dtype=dtype, device=device)) + check_kernel(F.horizontal_flip_image_tensor, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.horizontal_flip_bounding_box, bounding_box, @@ -822,56 +805,54 @@ def test_kernel_bounding_box(self, format, dtype, device): spatial_size=bounding_box.spatial_size, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.horizontal_flip_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.horizontal_flip_mask, make_mask()) def test_kernel_video(self): - check_kernel(F.horizontal_flip_video, make_input(datapoints.Video)) + check_kernel(F.horizontal_flip_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.horizontal_flip_image_tensor), - (PIL.Image.Image, F.horizontal_flip_image_pil), - (datapoints.Image, F.horizontal_flip_image_tensor), - (datapoints.BoundingBox, F.horizontal_flip_bounding_box), - (datapoints.Mask, F.horizontal_flip_mask), - (datapoints.Video, F.horizontal_flip_video), + (F.horizontal_flip_image_tensor, make_image_tensor), + (F.horizontal_flip_image_pil, make_image_pil), + (F.horizontal_flip_image_tensor, make_image), + (F.horizontal_flip_bounding_box, make_bounding_box), + (F.horizontal_flip_mask, make_segmentation_mask), + (F.horizontal_flip_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.horizontal_flip, kernel, make_input(input_type)) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.horizontal_flip, kernel, make_input()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.horizontal_flip_image_tensor), - (PIL.Image.Image, F.horizontal_flip_image_pil), - (datapoints.Image, F.horizontal_flip_image_tensor), - (datapoints.BoundingBox, F.horizontal_flip_bounding_box), - (datapoints.Mask, F.horizontal_flip_mask), - (datapoints.Video, F.horizontal_flip_video), + (F.horizontal_flip_image_tensor, torch.Tensor), + (F.horizontal_flip_image_pil, PIL.Image.Image), + (F.horizontal_flip_image_tensor, datapoints.Image), + (F.horizontal_flip_bounding_box, datapoints.BoundingBox), + (F.horizontal_flip_mask, datapoints.Mask), + (F.horizontal_flip_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.horizontal_flip, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomHorizontalFlip, input, p=1) + def test_transform(self, make_input, device): + check_transform(transforms.RandomHorizontalFlip, make_input(device=device), p=1) @pytest.mark.parametrize( "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) def test_image_correctness(self, fn): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") actual = fn(image) expected = F.to_image_tensor(F.horizontal_flip(F.to_image_pil(image))) @@ -901,7 +882,7 @@ def _reference_horizontal_flip_bounding_box(self, bounding_box): "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) def test_bounding_box_correctness(self, format, fn): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = fn(bounding_box) expected = self._reference_horizontal_flip_bounding_box(bounding_box) @@ -909,12 +890,12 @@ def test_bounding_box_correctness(self, format, fn): torch.testing.assert_close(actual, expected) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform_noop(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) transform = transforms.RandomHorizontalFlip(p=0) @@ -979,7 +960,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): value = adapt_fill(value, dtype=dtype) self._check_kernel( F.affine_image_tensor, - make_input(torch.Tensor, dtype=dtype, device=device), + make_image(dtype=dtype, device=device), **{param: value}, check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), check_cuda_vs_cpu=dict(atol=1, rtol=0) @@ -997,58 +978,58 @@ def test_kernel_image_tensor(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, param, value, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, format=format, dtype=dtype, device=device) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) self._check_kernel( F.affine_bounding_box, - make_input(datapoints.BoundingBox, format=format, dtype=dtype, device=device), + bounding_box, format=format, spatial_size=bounding_box.spatial_size, **{param: value}, check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))), ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - self._check_kernel(F.affine_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + self._check_kernel(F.affine_mask, make_mask()) def test_kernel_video(self): - self._check_kernel(F.affine_video, make_input(datapoints.Video)) + self._check_kernel(F.affine_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.affine_image_tensor), - (PIL.Image.Image, F.affine_image_pil), - (datapoints.Image, F.affine_image_tensor), - (datapoints.BoundingBox, F.affine_bounding_box), - (datapoints.Mask, F.affine_mask), - (datapoints.Video, F.affine_video), + (F.affine_image_tensor, make_image_tensor), + (F.affine_image_pil, make_image_pil), + (F.affine_image_tensor, make_image), + (F.affine_bounding_box, make_bounding_box), + (F.affine_mask, make_segmentation_mask), + (F.affine_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.affine, kernel, make_input(input_type), **self._MINIMAL_AFFINE_KWARGS) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.affine, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.affine_image_tensor), - (PIL.Image.Image, F.affine_image_pil), - (datapoints.Image, F.affine_image_tensor), - (datapoints.BoundingBox, F.affine_bounding_box), - (datapoints.Mask, F.affine_mask), - (datapoints.Video, F.affine_video), + (F.affine_image_tensor, torch.Tensor), + (F.affine_image_pil, PIL.Image.Image), + (F.affine_image_tensor, datapoints.Image), + (F.affine_bounding_box, datapoints.BoundingBox), + (F.affine_mask, datapoints.Mask), + (F.affine_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.affine, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform(self, make_input, device): + input = make_input(device=device) check_transform(transforms.RandomAffine, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES) @@ -1062,7 +1043,7 @@ def test_transform(self, input_type, device): ) @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1099,7 +1080,7 @@ def test_functional_image_correctness(self, angle, translate, scale, shear, cent @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_image_correctness(self, center, interpolation, fill, seed): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1163,7 +1144,7 @@ def _reference_affine_bounding_box(self, bounding_box, *, angle, translate, scal @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_box_correctness(self, format, angle, translate, scale, shear, center): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = F.affine( bounding_box, @@ -1188,7 +1169,7 @@ def test_functional_bounding_box_correctness(self, format, angle, translate, sca @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_box_correctness(self, format, center, seed): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center) @@ -1208,7 +1189,7 @@ def test_transform_bounding_box_correctness(self, format, center, seed): @pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"]) @pytest.mark.parametrize("seed", list(range(10))) def test_transform_get_params_bounds(self, degrees, translate, scale, shear, seed): - image = make_input(torch.Tensor) + image = make_image() height, width = F.get_spatial_size(image) transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear) @@ -1289,13 +1270,13 @@ class TestVerticalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_image_tensor(self, dtype, device): - check_kernel(F.vertical_flip_image_tensor, make_input(torch.Tensor, dtype=dtype, device=device)) + check_kernel(F.vertical_flip_image_tensor, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_box(self, format, dtype, device): - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.vertical_flip_bounding_box, bounding_box, @@ -1303,54 +1284,52 @@ def test_kernel_bounding_box(self, format, dtype, device): spatial_size=bounding_box.spatial_size, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.vertical_flip_mask, make_input(datapoints.Mask, mask_type=mask_type)) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.vertical_flip_mask, make_mask()) def test_kernel_video(self): - check_kernel(F.vertical_flip_video, make_input(datapoints.Video)) + check_kernel(F.vertical_flip_video, make_video()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.vertical_flip_image_tensor), - (PIL.Image.Image, F.vertical_flip_image_pil), - (datapoints.Image, F.vertical_flip_image_tensor), - (datapoints.BoundingBox, F.vertical_flip_bounding_box), - (datapoints.Mask, F.vertical_flip_mask), - (datapoints.Video, F.vertical_flip_video), + (F.vertical_flip_image_tensor, make_image_tensor), + (F.vertical_flip_image_pil, make_image_pil), + (F.vertical_flip_image_tensor, make_image), + (F.vertical_flip_bounding_box, make_bounding_box), + (F.vertical_flip_mask, make_segmentation_mask), + (F.vertical_flip_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.vertical_flip, kernel, make_input(input_type)) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.vertical_flip, kernel, make_input()) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.vertical_flip_image_tensor), - (PIL.Image.Image, F.vertical_flip_image_pil), - (datapoints.Image, F.vertical_flip_image_tensor), - (datapoints.BoundingBox, F.vertical_flip_bounding_box), - (datapoints.Mask, F.vertical_flip_mask), - (datapoints.Video, F.vertical_flip_video), + (F.vertical_flip_image_tensor, torch.Tensor), + (F.vertical_flip_image_pil, PIL.Image.Image), + (F.vertical_flip_image_tensor, datapoints.Image), + (F.vertical_flip_bounding_box, datapoints.BoundingBox), + (F.vertical_flip_mask, datapoints.Mask), + (F.vertical_flip_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.vertical_flip, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomVerticalFlip, input, p=1) + def test_transform(self, make_input, device): + check_transform(transforms.RandomVerticalFlip, make_input(device=device), p=1) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) def test_image_correctness(self, fn): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") actual = fn(image) expected = F.to_image_tensor(F.vertical_flip(F.to_image_pil(image))) @@ -1378,7 +1357,7 @@ def _reference_vertical_flip_bounding_box(self, bounding_box): @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) def test_bounding_box_correctness(self, format, fn): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = fn(bounding_box) expected = self._reference_vertical_flip_bounding_box(bounding_box) @@ -1386,12 +1365,12 @@ def test_bounding_box_correctness(self, format, fn): torch.testing.assert_close(actual, expected) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform_noop(self, input_type, device): - input = make_input(input_type, device=device) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) transform = transforms.RandomVerticalFlip(p=0) @@ -1434,7 +1413,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] check_kernel( F.rotate_image_tensor, - make_input(torch.Tensor, dtype=dtype, device=device), + make_image(dtype=dtype, device=device), **kwargs, check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), ) @@ -1452,7 +1431,7 @@ def test_kernel_bounding_box(self, param, value, format, dtype, device): if param != "angle": kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] - bounding_box = make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) check_kernel( F.rotate_bounding_box, @@ -1462,50 +1441,50 @@ def test_kernel_bounding_box(self, param, value, format, dtype, device): **kwargs, ) - @pytest.mark.parametrize("mask_type", ["segmentation", "detection"]) - def test_kernel_mask(self, mask_type): - check_kernel(F.rotate_mask, make_input(datapoints.Mask, mask_type=mask_type), **self._MINIMAL_AFFINE_KWARGS) + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS) def test_kernel_video(self): - check_kernel(F.rotate_video, make_input(datapoints.Video), **self._MINIMAL_AFFINE_KWARGS) + check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "make_input"), [ - (torch.Tensor, F.rotate_image_tensor), - (PIL.Image.Image, F.rotate_image_pil), - (datapoints.Image, F.rotate_image_tensor), - (datapoints.BoundingBox, F.rotate_bounding_box), - (datapoints.Mask, F.rotate_mask), - (datapoints.Video, F.rotate_video), + (F.rotate_image_tensor, make_image_tensor), + (F.rotate_image_pil, make_image_pil), + (F.rotate_image_tensor, make_image), + (F.rotate_bounding_box, make_bounding_box), + (F.rotate_mask, make_segmentation_mask), + (F.rotate_video, make_video), ], ) - def test_dispatcher(self, kernel, input_type): - check_dispatcher(F.rotate, kernel, make_input(input_type), **self._MINIMAL_AFFINE_KWARGS) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.rotate, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) @pytest.mark.parametrize( - ("input_type", "kernel"), + ("kernel", "input_type"), [ - (torch.Tensor, F.rotate_image_tensor), - (PIL.Image.Image, F.rotate_image_pil), - (datapoints.Image, F.rotate_image_tensor), - (datapoints.BoundingBox, F.rotate_bounding_box), - (datapoints.Mask, F.rotate_mask), - (datapoints.Video, F.rotate_video), + (F.rotate_image_tensor, torch.Tensor), + (F.rotate_image_pil, PIL.Image.Image), + (F.rotate_image_tensor, datapoints.Image), + (F.rotate_bounding_box, datapoints.BoundingBox), + (F.rotate_mask, datapoints.Mask), + (F.rotate_video, datapoints.Video), ], ) def test_dispatcher_signature(self, kernel, input_type): check_dispatcher_signatures_match(F.rotate, kernel=kernel, input_type=input_type) @pytest.mark.parametrize( - "input_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_transform(self, input_type, device): - input = make_input(input_type, device=device) - - check_transform(transforms.RandomRotation, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES) + def test_transform(self, make_input, device): + check_transform( + transforms.RandomRotation, make_input(device=device), **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES + ) @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @@ -1515,7 +1494,7 @@ def test_transform(self, input_type, device): @pytest.mark.parametrize("expand", [False, True]) @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) def test_functional_image_correctness(self, angle, center, interpolation, expand, fill): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1537,7 +1516,7 @@ def test_functional_image_correctness(self, angle, center, interpolation, expand @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_image_correctness(self, center, interpolation, expand, fill, seed): - image = make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + image = make_image(dtype=torch.uint8, device="cpu") fill = adapt_fill(fill, dtype=torch.uint8) @@ -1593,7 +1572,7 @@ def _reference_rotate_bounding_box(self, bounding_box, *, angle, expand, center) @pytest.mark.parametrize("expand", [False]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_box_correctness(self, format, angle, expand, center): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) actual = F.rotate(bounding_box, angle=angle, expand=expand, center=center) expected = self._reference_rotate_bounding_box(bounding_box, angle=angle, expand=expand, center=center) @@ -1606,7 +1585,7 @@ def test_functional_bounding_box_correctness(self, format, angle, expand, center @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_box_correctness(self, format, expand, center, seed): - bounding_box = make_input(datapoints.BoundingBox, format=format) + bounding_box = make_bounding_box(format=format) transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center) diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index cae8d3157e9..dc04fbfc7a9 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -11,6 +11,7 @@ from common_utils import ( ArgsKwargs, combinations_grid, + DEFAULT_PORTRAIT_SPATIAL_SIZE, get_num_channels, ImageLoader, InfoBase, @@ -260,6 +261,9 @@ def reference_inputs_convert_format_bounding_box(): reference_fn=reference_convert_format_bounding_box, reference_inputs_fn=reference_inputs_convert_format_bounding_box, logs_usage=True, + closeness_kwargs={ + (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0), + }, ), ) @@ -296,7 +300,7 @@ def sample_inputs_crop_bounding_box(): def sample_inputs_crop_mask(): - for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8) @@ -306,7 +310,7 @@ def reference_inputs_crop_mask(): def sample_inputs_crop_video(): - for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]): yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8) @@ -415,7 +419,7 @@ def sample_inputs_resized_crop_mask(): def sample_inputs_resized_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) @@ -457,7 +461,7 @@ def sample_inputs_resized_crop_video(): def sample_inputs_pad_image_tensor(): make_pad_image_loaders = functools.partial( - make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32] + make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32] ) for image_loader, padding in itertools.product( @@ -512,7 +516,7 @@ def sample_inputs_pad_bounding_box(): def sample_inputs_pad_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, padding=[1]) @@ -524,7 +528,7 @@ def reference_inputs_pad_mask(): def sample_inputs_pad_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, padding=[1]) @@ -620,7 +624,7 @@ def pad_xfail_jit_fill_condition(args_kwargs): def sample_inputs_perspective_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs( image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0] @@ -672,7 +676,7 @@ def sample_inputs_perspective_bounding_box(): def sample_inputs_perspective_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_detection_mask_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -686,7 +690,7 @@ def reference_inputs_perspective_mask(): def sample_inputs_perspective_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_video_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -745,7 +749,7 @@ def _get_elastic_displacement(spatial_size): def sample_inputs_elastic_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(image_loader.spatial_size) for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs(image_loader, displacement=displacement, fill=fill) @@ -777,13 +781,13 @@ def sample_inputs_elastic_bounding_box(): def sample_inputs_elastic_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(mask_loader.shape[-2:]) yield ArgsKwargs(mask_loader, displacement=displacement) def sample_inputs_elastic_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): displacement = _get_elastic_displacement(video_loader.shape[-2:]) yield ArgsKwargs(video_loader, displacement=displacement) @@ -854,7 +858,7 @@ def sample_inputs_center_crop_bounding_box(): def sample_inputs_center_crop_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): height, width = mask_loader.shape[-2:] yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2)) @@ -867,7 +871,7 @@ def reference_inputs_center_crop_mask(): def sample_inputs_center_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): height, width = video_loader.shape[-2:] yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2)) @@ -947,7 +951,7 @@ def sample_inputs_gaussian_blur_video(): def sample_inputs_equalize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1008,7 +1012,7 @@ def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_for def sample_inputs_equalize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1031,7 +1035,7 @@ def sample_inputs_equalize_video(): def sample_inputs_invert_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1041,7 +1045,7 @@ def reference_inputs_invert_image_tensor(): def sample_inputs_invert_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1067,7 +1071,7 @@ def sample_inputs_invert_video(): def sample_inputs_posterize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0]) @@ -1080,7 +1084,7 @@ def reference_inputs_posterize_image_tensor(): def sample_inputs_posterize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0]) @@ -1110,7 +1114,7 @@ def _get_solarize_thresholds(dtype): def sample_inputs_solarize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype))) @@ -1125,7 +1129,7 @@ def uint8_to_float32_threshold_adapter(other_args, kwargs): def sample_inputs_solarize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype))) @@ -1149,7 +1153,7 @@ def sample_inputs_solarize_video(): def sample_inputs_autocontrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1159,7 +1163,7 @@ def reference_inputs_autocontrast_image_tensor(): def sample_inputs_autocontrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1189,7 +1193,7 @@ def sample_inputs_autocontrast_video(): def sample_inputs_adjust_sharpness_image_tensor(): for image_loader in make_image_loaders( - sizes=["random", (2, 2)], + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)], color_spaces=("GRAY", "RGB"), ): yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1204,7 +1208,7 @@ def reference_inputs_adjust_sharpness_image_tensor(): def sample_inputs_adjust_sharpness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1228,7 +1232,7 @@ def sample_inputs_adjust_sharpness_video(): def sample_inputs_erase_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(image_loader.num_channels, h, w) @@ -1236,7 +1240,7 @@ def sample_inputs_erase_image_tensor(): def sample_inputs_erase_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(video_loader.num_channels, h, w) @@ -1261,7 +1265,7 @@ def sample_inputs_erase_video(): def sample_inputs_adjust_brightness_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1274,7 +1278,7 @@ def reference_inputs_adjust_brightness_image_tensor(): def sample_inputs_adjust_brightness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1301,7 +1305,7 @@ def sample_inputs_adjust_brightness_video(): def sample_inputs_adjust_contrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1314,7 +1318,7 @@ def reference_inputs_adjust_contrast_image_tensor(): def sample_inputs_adjust_contrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1353,7 +1357,7 @@ def sample_inputs_adjust_contrast_video(): def sample_inputs_adjust_gamma_image_tensor(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, gamma=gamma, gain=gain) @@ -1367,7 +1371,7 @@ def reference_inputs_adjust_gamma_image_tensor(): def sample_inputs_adjust_gamma_video(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, gamma=gamma, gain=gain) @@ -1397,7 +1401,7 @@ def sample_inputs_adjust_gamma_video(): def sample_inputs_adjust_hue_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -1410,7 +1414,7 @@ def reference_inputs_adjust_hue_image_tensor(): def sample_inputs_adjust_hue_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -1439,7 +1443,7 @@ def sample_inputs_adjust_hue_video(): def sample_inputs_adjust_saturation_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -1452,7 +1456,7 @@ def reference_inputs_adjust_saturation_image_tensor(): def sample_inputs_adjust_saturation_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -1612,7 +1616,7 @@ def wrapper(input_tensor, *other_args, **kwargs): def sample_inputs_normalize_image_tensor(): for image_loader, (mean, std) in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]), + make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]), _NORMALIZE_MEANS_STDS, ): yield ArgsKwargs(image_loader, mean=mean, std=std) @@ -1637,7 +1641,7 @@ def reference_inputs_normalize_image_tensor(): def sample_inputs_normalize_video(): mean, std = _NORMALIZE_MEANS_STDS[0] for video_loader in make_video_loaders( - sizes=["random"], color_spaces=["RGB"], num_frames=["random"], dtypes=[torch.float32] + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32] ): yield ArgsKwargs(video_loader, mean=mean, std=std) @@ -1671,7 +1675,9 @@ def sample_inputs_convert_dtype_image_tensor(): # conversion cannot be performed safely continue - for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[input_dtype]): + for image_loader in make_image_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[input_dtype] + ): yield ArgsKwargs(image_loader, dtype=output_dtype) @@ -1736,7 +1742,7 @@ def reference_inputs_convert_dtype_image_tensor(): def sample_inputs_convert_dtype_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1781,7 +1787,7 @@ def sample_inputs_convert_dtype_video(): def sample_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=[4]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]): yield ArgsKwargs(video_loader, num_samples=2) @@ -1797,7 +1803,9 @@ def reference_uniform_temporal_subsample_video(x, num_samples): def reference_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], color_spaces=["RGB"], num_frames=[10]): + for video_loader in make_video_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10] + ): for num_samples in range(1, video_loader.shape[-4] + 1): yield ArgsKwargs(video_loader, num_samples)