From 348231de00005798f499a70facafaf996bb71e70 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 11:00:33 +0200 Subject: [PATCH 01/27] port tests for F.equalize and transforms.RandomEqualize --- test/test_transforms_v2_consistency.py | 9 --- test/test_transforms_v2_functional.py | 11 --- test/test_transforms_v2_refactored.py | 95 ++++++++++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 8 --- test/transforms_v2_kernel_infos.py | 86 ----------------------- 5 files changed, 95 insertions(+), 114 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 185dff84ca4..1cf8a7be443 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -158,15 +158,6 @@ def __init__( # images given that the transform does nothing but call it anyway. supports_pil=False, ), - ConsistencyConfig( - v2_transforms.RandomEqualize, - legacy_transforms.RandomEqualize, - [ - ArgsKwargs(p=0), - ArgsKwargs(p=1), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[torch.uint8]), - ), ConsistencyConfig( v2_transforms.RandomInvert, legacy_transforms.RandomInvert, diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index be32c9c7d4c..2082a789a92 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -551,17 +551,6 @@ def test_to_pil_image(inpt, mode): assert np.asarray(inpt).sum() == np.asarray(output).sum() -def test_equalize_image_tensor_edge_cases(): - inpt = torch.zeros(3, 200, 200, dtype=torch.uint8) - output = F.equalize_image(inpt) - torch.testing.assert_close(inpt, output) - - inpt = torch.zeros(5, 3, 200, 200, dtype=torch.uint8) - inpt[..., 100:, 100:] = 1 - output = F.equalize_image(inpt) - assert output.unique().tolist() == [0, 255] - - @pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_uniform_temporal_subsample(device): video = torch.arange(10, device=device)[:, None, None, None].expand(-1, 3, 8, 8) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 6ad36b10988..9c33741ad44 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -3881,3 +3881,98 @@ def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, fo ) assert_close(actual, expected, rtol=0, atol=1) + + +class TestEqualize: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.equalize_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.equalize_image, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.equalize, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.equalize_image, torch.Tensor), + (F._equalize_image_pil, PIL.Image.Image), + (F.equalize_image, tv_tensors.Image), + (F.equalize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.equalize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.RandomEqualize(p=1), make_input()) + + # We are not using the default `make_image` here since that uniformly samples the values over the whole value range. + # Since the whole point of F.equalize is to transform an arbitrary distribution of values into a uniform one, + # the information gain is low if we already provide something really close to the expected value. + def _make_correctness_image(self, *, type, **kwargs): + shape = (3, 117, 253) + dtype = torch.uint8 + device = "cpu" + + max_value = get_max_value(dtype) + + def make_constant_image(*, value_factor=0.0): + return torch.full(shape, value_factor * max_value, dtype=dtype, device=device) + + def make_uniform_band_distributed_image(*, low_factor=0.1, high_factor=0.9): + return torch.testing.make_tensor( + shape, dtype=dtype, device=device, low=low_factor * max_value, high=high_factor * max_value + ) + + def make_beta_distributed_image(*, alpha=2.0, beta=5.0): + image = torch.distributions.Beta(alpha, beta).sample(shape) + image.mul_(get_max_value(dtype)).round_() + return image.to(dtype=dtype, device=device) + + make_fn = { + "constant": make_constant_image, + "uniform_band_distributed": make_uniform_band_distributed_image, + "beta_distributed": make_beta_distributed_image, + }[type] + return tv_tensors.Image(make_fn(**kwargs)) + + @pytest.mark.parametrize( + "make_correctness_image_kwargs", + [ + *[dict(type="constant", value_factor=value_factor) for value_factor in [0.0, 0.5, 1.0]], + *[ + dict(type="uniform_band_distributed", low_factor=low_factor, high_factor=high_factor) + for low_factor, high_factor in [ + (0.0, 0.25), + (0.25, 0.75), + (0.75, 1.0), + ] + ], + *[ + dict(type="beta_distributed", alpha=alpha, beta=beta) + for alpha, beta in [ + (0.5, 0.5), + (2.0, 2.0), + (2.0, 5.0), + (5.0, 2.0), + ] + ], + ], + ) + @pytest.mark.parametrize("fn", [F.equalize, transform_cls_to_functional(transforms.RandomEqualize, p=1)]) + def test_image_correctness(self, make_correctness_image_kwargs, fn): + image = self._make_correctness_image(**make_correctness_image_kwargs) + + actual = fn(image) + expected = F.to_image(F.equalize(F.to_pil_image(image))) + + assert_equal(actual, expected) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index a49b75afdb4..52552807cd0 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -122,14 +122,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): pil_kernel_info=PILKernelInfo(F._elastic_image_pil), test_marks=[xfail_jit_python_scalar_arg("fill")], ), - DispatcherInfo( - F.equalize, - kernels={ - tv_tensors.Image: F.equalize_image, - tv_tensors.Video: F.equalize_video, - }, - pil_kernel_info=PILKernelInfo(F._equalize_image_pil, kernel_name="equalize_image_pil"), - ), DispatcherInfo( F.invert, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index ed5e7e62220..c3da546a5b5 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -9,8 +9,6 @@ from transforms_v2_legacy_utils import ( ArgsKwargs, DEFAULT_PORTRAIT_SPATIAL_SIZE, - get_num_channels, - ImageLoader, InfoBase, make_bounding_box_loaders, make_image_loader, @@ -255,90 +253,6 @@ def sample_inputs_elastic_video(): ) -def sample_inputs_equalize_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader) - - -def reference_inputs_equalize_image_tensor(): - # We are not using `make_image_loaders` here since that uniformly samples the values over the whole value range. - # Since the whole point of this kernel is to transform an arbitrary distribution of values into a uniform one, - # the information gain is low if we already provide something really close to the expected value. - def make_uniform_band_image(shape, dtype, device, *, low_factor, high_factor, memory_format): - if dtype.is_floating_point: - low = low_factor - high = high_factor - else: - max_value = torch.iinfo(dtype).max - low = int(low_factor * max_value) - high = int(high_factor * max_value) - return torch.testing.make_tensor(shape, dtype=dtype, device=device, low=low, high=high).to( - memory_format=memory_format, copy=True - ) - - def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_format): - image = torch.distributions.Beta(alpha, beta).sample(shape) - if not dtype.is_floating_point: - image.mul_(torch.iinfo(dtype).max).round_() - return image.to(dtype=dtype, device=device, memory_format=memory_format, copy=True) - - canvas_size = (256, 256) - for dtype, color_space, fn in itertools.product( - [torch.uint8], - ["GRAY", "RGB"], - [ - lambda shape, dtype, device, memory_format: torch.zeros(shape, dtype=dtype, device=device).to( - memory_format=memory_format, copy=True - ), - lambda shape, dtype, device, memory_format: torch.full( - shape, 1.0 if dtype.is_floating_point else torch.iinfo(dtype).max, dtype=dtype, device=device - ).to(memory_format=memory_format, copy=True), - *[ - functools.partial(make_uniform_band_image, low_factor=low_factor, high_factor=high_factor) - for low_factor, high_factor in [ - (0.0, 0.25), - (0.25, 0.75), - (0.75, 1.0), - ] - ], - *[ - functools.partial(make_beta_distributed_image, alpha=alpha, beta=beta) - for alpha, beta in [ - (0.5, 0.5), - (2, 2), - (2, 5), - (5, 2), - ] - ], - ], - ): - image_loader = ImageLoader(fn, shape=(get_num_channels(color_space), *canvas_size), dtype=dtype) - yield ArgsKwargs(image_loader) - - -def sample_inputs_equalize_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.equalize_image, - kernel_name="equalize_image_tensor", - sample_inputs_fn=sample_inputs_equalize_image_tensor, - reference_fn=pil_reference_wrapper(F._equalize_image_pil), - float32_vs_uint8=True, - reference_inputs_fn=reference_inputs_equalize_image_tensor, - ), - KernelInfo( - F.equalize_video, - sample_inputs_fn=sample_inputs_equalize_video, - ), - ] -) - - def sample_inputs_invert_image_tensor(): for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) From 7882863d778523ad93887e9ec3d8ba65c9c9427a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 11:14:37 +0200 Subject: [PATCH 02/27] port tests for F.uniform_temporal_subsample and transforms.UniformTemporalSubsample --- test/test_transforms_v2.py | 19 ---------- test/test_transforms_v2_functional.py | 10 ------ test/test_transforms_v2_refactored.py | 49 ++++++++++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 9 ----- test/transforms_v2_kernel_infos.py | 34 ------------------ 5 files changed, 49 insertions(+), 72 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 5ab35fc873b..7abdf44d087 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -738,25 +738,6 @@ def test__get_params(self): assert min_size <= size < max_size -class TestUniformTemporalSubsample: - @pytest.mark.parametrize( - "inpt", - [ - torch.zeros(10, 3, 8, 8), - torch.zeros(1, 10, 3, 8, 8), - tv_tensors.Video(torch.zeros(1, 10, 3, 8, 8)), - ], - ) - def test__transform(self, inpt): - num_samples = 5 - transform = transforms.UniformTemporalSubsample(num_samples) - - output = transform(inpt) - assert type(output) is type(inpt) - assert output.shape[-4] == num_samples - assert output.dtype == inpt.dtype - - # TODO: remove this test in 0.17 when the default of antialias changes to True def test_antialias_warning(): pil_img = PIL.Image.new("RGB", size=(10, 10), color=127) diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 2082a789a92..878915a4d86 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -549,13 +549,3 @@ def test_to_pil_image(inpt, mode): assert isinstance(output, PIL.Image.Image) assert np.asarray(inpt).sum() == np.asarray(output).sum() - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -def test_correctness_uniform_temporal_subsample(device): - video = torch.arange(10, device=device)[:, None, None, None].expand(-1, 3, 8, 8) - out_video = F.uniform_temporal_subsample(video, 5) - assert out_video.unique().tolist() == [0, 2, 4, 6, 9] - - out_video = F.uniform_temporal_subsample(video, 8) - assert out_video.unique().tolist() == [0, 1, 2, 3, 5, 6, 7, 9] diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 9c33741ad44..57bce9ab813 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -3976,3 +3976,52 @@ def test_image_correctness(self, make_correctness_image_kwargs, fn): expected = F.to_image(F.equalize(F.to_pil_image(image))) assert_equal(actual, expected) + + +class TestUniformTemporalSubsample: + def test_kernel_video(self): + check_kernel(F.uniform_temporal_subsample_video, make_video(), num_samples=2) + + @pytest.mark.parametrize("make_input", [make_video_tensor, make_video]) + def test_functional(self, make_input): + check_functional(F.uniform_temporal_subsample, make_input(), num_samples=2) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.uniform_temporal_subsample_video, torch.Tensor), + (F.uniform_temporal_subsample_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.uniform_temporal_subsample, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_video_tensor, make_video]) + def test_transform(self, make_input): + check_transform(transforms.UniformTemporalSubsample(num_samples=2), make_input()) + + def _reference_uniform_temporal_subsample_video(self, video, *, num_samples): + # Adapted from + # https://github.com/facebookresearch/pytorchvideo/blob/c8d23d8b7e597586a9e2d18f6ed31ad8aa379a7a/pytorchvideo/transforms/functional.py#L19 + t = video.shape[-4] + assert num_samples > 0 and t > 0 + # Sample by nearest neighbor interpolation if num_samples > t. + indices = torch.linspace(0, t - 1, num_samples, device=video.device) + indices = torch.clamp(indices, 0, t - 1).long() + return tv_tensors.Video(torch.index_select(video, -4, indices)) + + CORRECTNESS_NUM_FRAMES = 5 + + @pytest.mark.parametrize("num_samples", list(range(1, CORRECTNESS_NUM_FRAMES + 1))) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "fn", [F.uniform_temporal_subsample, transform_cls_to_functional(transforms.UniformTemporalSubsample)] + ) + def test_video_correctness(self, num_samples, dtype, device, fn): + video = make_video(num_frames=self.CORRECTNESS_NUM_FRAMES, dtype=dtype, device=device) + + actual = fn(video, num_samples=num_samples) + expected = self._reference_uniform_temporal_subsample_video(video, num_samples=num_samples) + + assert_equal(actual, expected) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 52552807cd0..28b4bce612d 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -229,15 +229,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): xfail_jit_python_scalar_arg("std"), ], ), - DispatcherInfo( - F.uniform_temporal_subsample, - kernels={ - tv_tensors.Video: F.uniform_temporal_subsample_video, - }, - test_marks=[ - skip_dispatch_tv_tensor, - ], - ), DispatcherInfo( F.clamp_bounding_boxes, kernels={tv_tensors.BoundingBoxes: F.clamp_bounding_boxes}, diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index c3da546a5b5..6a00f62323b 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -813,37 +813,3 @@ def sample_inputs_normalize_video(): ), ] ) - - -def sample_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]): - yield ArgsKwargs(video_loader, num_samples=2) - - -def reference_uniform_temporal_subsample_video(x, num_samples): - # Copy-pasted from - # https://github.com/facebookresearch/pytorchvideo/blob/c8d23d8b7e597586a9e2d18f6ed31ad8aa379a7a/pytorchvideo/transforms/functional.py#L19 - t = x.shape[-4] - assert num_samples > 0 and t > 0 - # Sample by nearest neighbor interpolation if num_samples > t. - indices = torch.linspace(0, t - 1, num_samples) - indices = torch.clamp(indices, 0, t - 1).long() - return torch.index_select(x, -4, indices) - - -def reference_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders( - sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10] - ): - for num_samples in range(1, video_loader.shape[-4] + 1): - yield ArgsKwargs(video_loader, num_samples) - - -KERNEL_INFOS.append( - KernelInfo( - F.uniform_temporal_subsample_video, - sample_inputs_fn=sample_inputs_uniform_temporal_subsample_video, - reference_fn=reference_uniform_temporal_subsample_video, - reference_inputs_fn=reference_inputs_uniform_temporal_subsample_video, - ) -) From 9b5b7c4c5926812f8970488e8a5193b5e927f3f1 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 12:22:55 +0200 Subject: [PATCH 03/27] port tests for F.normalize and transforms.Normalize --- test/test_transforms_v2.py | 18 ------- test/test_transforms_v2_consistency.py | 9 ---- test/test_transforms_v2_functional.py | 22 +------- test/test_transforms_v2_refactored.py | 72 ++++++++++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 11 ---- test/transforms_v2_kernel_infos.py | 60 --------------------- 6 files changed, 73 insertions(+), 119 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 7abdf44d087..087039da69b 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -310,24 +310,6 @@ def test_common(self, transform, adapter, container_type, image_or_video, de_ser def test_auto_augment(self, transform, input): transform(input) - @parametrize( - [ - ( - transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]), - itertools.chain.from_iterable( - fn(color_spaces=["RGB"], dtypes=[torch.float32]) - for fn in [ - make_images, - make_vanilla_tensor_images, - make_videos, - ] - ), - ), - ] - ) - def test_normalize(self, transform, input): - transform(input) - @pytest.mark.parametrize( "flat_inputs", diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 1cf8a7be443..cc25d72f81b 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -72,15 +72,6 @@ def __init__( LINEAR_TRANSFORMATION_MATRIX = torch.rand([LINEAR_TRANSFORMATION_MEAN.numel()] * 2) CONSISTENCY_CONFIGS = [ - ConsistencyConfig( - v2_transforms.Normalize, - legacy_transforms.Normalize, - [ - ArgsKwargs(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), - ], - supports_pil=False, - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[torch.float]), - ), ConsistencyConfig( v2_transforms.FiveCrop, legacy_transforms.FiveCrop, diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 878915a4d86..edde2c9b209 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -13,11 +13,7 @@ from torchvision.transforms.v2._utils import is_pure_tensor from transforms_v2_dispatcher_infos import DISPATCHER_INFOS from transforms_v2_kernel_infos import KERNEL_INFOS -from transforms_v2_legacy_utils import ( - DEFAULT_SQUARE_SPATIAL_SIZE, - make_multiple_bounding_boxes, - parametrized_error_message, -) +from transforms_v2_legacy_utils import make_multiple_bounding_boxes, parametrized_error_message KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS} @@ -471,22 +467,6 @@ def test_alias(alias, target): assert alias is target -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize("num_channels", [1, 3]) -def test_normalize_image_tensor_stats(device, num_channels): - stats = pytest.importorskip("scipy.stats", reason="SciPy is not available") - - def assert_samples_from_standard_normal(t): - p_value = stats.kstest(t.flatten(), cdf="norm", args=(0, 1)).pvalue - return p_value > 1e-4 - - image = torch.rand(num_channels, DEFAULT_SQUARE_SPATIAL_SIZE, DEFAULT_SQUARE_SPATIAL_SIZE) - mean = image.mean(dim=(1, 2)).tolist() - std = image.std(dim=(1, 2)).tolist() - - assert_samples_from_standard_normal(F.normalize_image(image, mean, std)) - - class TestClampBoundingBoxes: @pytest.mark.parametrize( "metadata", diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 57bce9ab813..68871aaf6b4 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -48,6 +48,11 @@ from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal +try: + import scipy.stats +except ModuleNotFoundError: + scipy = None + @pytest.fixture(autouse=True) def fix_rng_seed(): @@ -4025,3 +4030,70 @@ def test_video_correctness(self, num_samples, dtype, device, fn): expected = self._reference_uniform_temporal_subsample_video(video, num_samples=num_samples) assert_equal(actual, expected) + + +class TestNormalize: + MEANS_STDS = [ + ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]), + (0.5, 2.0), + ] + MEAN, STD = MEANS_STDS[0] + + @pytest.mark.parametrize(("mean", "std"), MEANS_STDS) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, mean, std, device): + check_kernel(F.normalize_image, make_image(dtype=torch.float32, device=device), mean=self.MEAN, std=self.STD) + + def test_kernel_video(self): + check_kernel(F.normalize_video, make_video(dtype=torch.float32), mean=self.MEAN, std=self.STD) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + def test_functional(self, make_input): + check_functional(F.equalize, make_input(dtype=torch.float32)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.normalize_image, torch.Tensor), + (F.normalize_image, tv_tensors.Image), + (F.normalize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.normalize, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="should be a float tensor"): + F.normalize_image(make_image(dtype=torch.uint8), mean=self.MEAN, std=self.STD) + + with pytest.raises(ValueError, match="tensor image of size"): + F.normalize_image(torch.rand(16, 16, dtype=torch.float32), mean=self.MEAN, std=self.STD) + + for std in [0, [0, 0, 0], [0, 1, 1]]: + with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"): + F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.Normalize(mean=self.MEAN, std=self.STD), make_input(dtype=torch.float32)) + + def _assert_is_standard_normal_distributed(self, tensor): + result = scipy.stats.kstest(tensor.flatten().cpu(), cdf="norm", args=(0, 1)) + assert result.pvalue > 1e-4 + + @pytest.mark.skipif(scipy is None, reason="SciPy is not available") + @pytest.mark.parametrize("dtype", [torch.float16, torch.float32, torch.float64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("fn", [F.normalize, transform_cls_to_functional(transforms.Normalize)]) + def test_correctness_image(self, dtype, device, fn): + input = tv_tensors.Image(torch.rand(3, 10, 10, dtype=dtype, device=device)) + mean = input.mean(dim=(-2, -1)).tolist() + std = input.std(dim=(-2, -1)).tolist() + + output = fn(input, mean=mean, std=std) + + self._assert_is_standard_normal_distributed(output) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 28b4bce612d..ba27981087c 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -218,17 +218,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): ], pil_kernel_info=PILKernelInfo(F._ten_crop_image_pil), ), - DispatcherInfo( - F.normalize, - kernels={ - tv_tensors.Image: F.normalize_image, - tv_tensors.Video: F.normalize_video, - }, - test_marks=[ - xfail_jit_python_scalar_arg("mean"), - xfail_jit_python_scalar_arg("std"), - ], - ), DispatcherInfo( F.clamp_bounding_boxes, kernels={tv_tensors.BoundingBoxes: F.clamp_bounding_boxes}, diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 6a00f62323b..c9d76faf60d 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -11,7 +11,6 @@ DEFAULT_PORTRAIT_SPATIAL_SIZE, InfoBase, make_bounding_box_loaders, - make_image_loader, make_image_loaders, make_image_loaders_for_interpolation, make_mask_loaders, @@ -754,62 +753,3 @@ def wrapper(input_tensor, *other_args, **kwargs): ), ] ) - -_NORMALIZE_MEANS_STDS = [ - ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), - ([0.0, 0.0, 0.0], [1.0, 1.0, 1.0]), - (0.5, 2.0), -] - - -def sample_inputs_normalize_image_tensor(): - for image_loader, (mean, std) in itertools.product( - make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]), - _NORMALIZE_MEANS_STDS, - ): - yield ArgsKwargs(image_loader, mean=mean, std=std) - - -def reference_normalize_image_tensor(image, mean, std, inplace=False): - mean = torch.tensor(mean).view(-1, 1, 1) - std = torch.tensor(std).view(-1, 1, 1) - - sub = torch.Tensor.sub_ if inplace else torch.Tensor.sub - return sub(image, mean).div_(std) - - -def reference_inputs_normalize_image_tensor(): - yield ArgsKwargs( - make_image_loader(size=(32, 32), color_space="RGB", extra_dims=[1]), - mean=[0.5, 0.5, 0.5], - std=[1.0, 1.0, 1.0], - ) - - -def sample_inputs_normalize_video(): - mean, std = _NORMALIZE_MEANS_STDS[0] - for video_loader in make_video_loaders( - sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32] - ): - yield ArgsKwargs(video_loader, mean=mean, std=std) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.normalize_image, - kernel_name="normalize_image_tensor", - sample_inputs_fn=sample_inputs_normalize_image_tensor, - reference_fn=reference_normalize_image_tensor, - reference_inputs_fn=reference_inputs_normalize_image_tensor, - test_marks=[ - xfail_jit_python_scalar_arg("mean"), - xfail_jit_python_scalar_arg("std"), - ], - ), - KernelInfo( - F.normalize_video, - sample_inputs_fn=sample_inputs_normalize_video, - ), - ] -) From c69eb284c9a0255a251bee225724dad25580ca44 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 12:27:55 +0200 Subject: [PATCH 04/27] cleanup --- test/test_transforms_v2.py | 67 -------------- test/test_transforms_v2_consistency.py | 33 ------- test/transforms_v2_dispatcher_infos.py | 11 --- test/transforms_v2_kernel_infos.py | 116 ------------------------- 4 files changed, 227 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 087039da69b..3e8456e1ef1 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -277,39 +277,6 @@ def test_common(self, transform, adapter, container_type, image_or_video, de_ser ) assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4) - @parametrize( - [ - ( - transform, - itertools.chain.from_iterable( - fn( - color_spaces=[ - "GRAY", - "RGB", - ], - dtypes=[torch.uint8], - extra_dims=[(), (4,)], - **(dict(num_frames=[3]) if fn is make_videos else dict()), - ) - for fn in [ - make_images, - make_vanilla_tensor_images, - make_pil_images, - make_videos, - ] - ), - ) - for transform in ( - transforms.RandAugment(), - transforms.TrivialAugmentWide(), - transforms.AutoAugment(), - transforms.AugMix(), - ) - ] - ) - def test_auto_augment(self, transform, input): - transform(input) - @pytest.mark.parametrize( "flat_inputs", @@ -400,40 +367,6 @@ def test__get_params(self, fill, side_range): assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h -class TestElasticTransform: - def test_assertions(self): - - with pytest.raises(TypeError, match="alpha should be a number or a sequence of numbers"): - transforms.ElasticTransform({}) - - with pytest.raises(ValueError, match="alpha is a sequence its length should be 1 or 2"): - transforms.ElasticTransform([1.0, 2.0, 3.0]) - - with pytest.raises(TypeError, match="sigma should be a number or a sequence of numbers"): - transforms.ElasticTransform(1.0, {}) - - with pytest.raises(ValueError, match="sigma is a sequence its length should be 1 or 2"): - transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0]) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.ElasticTransform(1.0, 2.0, fill="abc") - - def test__get_params(self): - alpha = 2.0 - sigma = 3.0 - transform = transforms.ElasticTransform(alpha, sigma) - - h, w = size = (24, 32) - image = make_image(size) - - params = transform._get_params([image]) - - displacement = params["displacement"] - assert displacement.shape == (1, h, w, 2) - assert (-alpha / w <= displacement[0, ..., 0]).all() and (displacement[0, ..., 0] <= alpha / w).all() - assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all() - - class TestTransform: @pytest.mark.parametrize( "inpt_type", diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index cc25d72f81b..aa706a21a24 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -251,22 +251,6 @@ def __init__( v2_transforms.RandomOrder, legacy_transforms.RandomOrder, ), - ConsistencyConfig( - v2_transforms.AugMix, - legacy_transforms.AugMix, - ), - ConsistencyConfig( - v2_transforms.AutoAugment, - legacy_transforms.AutoAugment, - ), - ConsistencyConfig( - v2_transforms.RandAugment, - legacy_transforms.RandAugment, - ), - ConsistencyConfig( - v2_transforms.TrivialAugmentWide, - legacy_transforms.TrivialAugmentWide, - ), ] @@ -445,7 +429,6 @@ def test_call_consistency(config, args_kwargs): ) for transform_cls, get_params_args_kwargs in [ (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), - (v2_transforms.AutoAugment, ArgsKwargs(5)), ] ], ) @@ -817,36 +800,20 @@ def test_common(self, t_ref, t, data_kwargs): (legacy_F.pil_to_tensor, {}), (legacy_F.convert_image_dtype, {}), (legacy_F.to_pil_image, {}), - (legacy_F.normalize, {}), - (legacy_F.resize, {"interpolation"}), - (legacy_F.pad, {"padding", "fill"}), - (legacy_F.crop, {}), - (legacy_F.center_crop, {}), - (legacy_F.resized_crop, {"interpolation"}), - (legacy_F.hflip, {}), - (legacy_F.perspective, {"startpoints", "endpoints", "fill", "interpolation"}), - (legacy_F.vflip, {}), (legacy_F.five_crop, {}), (legacy_F.ten_crop, {}), - (legacy_F.adjust_brightness, {}), (legacy_F.adjust_contrast, {}), (legacy_F.adjust_saturation, {}), (legacy_F.adjust_hue, {}), (legacy_F.adjust_gamma, {}), - (legacy_F.rotate, {"center", "fill", "interpolation"}), - (legacy_F.affine, {"angle", "translate", "center", "fill", "interpolation"}), (legacy_F.to_grayscale, {}), (legacy_F.rgb_to_grayscale, {}), (legacy_F.to_tensor, {}), - (legacy_F.erase, {}), - (legacy_F.gaussian_blur, {}), (legacy_F.invert, {}), (legacy_F.posterize, {}), (legacy_F.solarize, {}), (legacy_F.adjust_sharpness, {}), (legacy_F.autocontrast, {}), - (legacy_F.equalize, {}), - (legacy_F.elastic_transform, {"fill", "interpolation"}), ], ) def test_dispatcher_signature_consistency(legacy_dispatcher, name_only_params): diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index ba27981087c..846c74ab33b 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -111,17 +111,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): DISPATCHER_INFOS = [ - DispatcherInfo( - F.elastic, - kernels={ - tv_tensors.Image: F.elastic_image, - tv_tensors.Video: F.elastic_video, - tv_tensors.BoundingBoxes: F.elastic_bounding_boxes, - tv_tensors.Mask: F.elastic_mask, - }, - pil_kernel_info=PILKernelInfo(F._elastic_image_pil), - test_marks=[xfail_jit_python_scalar_arg("fill")], - ), DispatcherInfo( F.invert, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index c9d76faf60d..2276223843e 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -12,8 +12,6 @@ InfoBase, make_bounding_box_loaders, make_image_loaders, - make_image_loaders_for_interpolation, - make_mask_loaders, make_video_loaders, mark_framework_limitation, TestMark, @@ -96,12 +94,6 @@ def float32_vs_uint8_pixel_difference(atol=1, mae=False): } -def scripted_vs_eager_float64_tolerances(device, atol=1e-6, rtol=1e-6): - return { - (("TestKernels", "test_scripted_vs_eager"), torch.float64, device): {"atol": atol, "rtol": rtol, "mae": False}, - } - - def pil_reference_wrapper(pil_kernel): @functools.wraps(pil_kernel) def wrapper(input_tensor, *other_args, **kwargs): @@ -144,114 +136,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): KERNEL_INFOS = [] -def get_fills(*, num_channels, dtype): - yield None - - int_value = get_max_value(dtype) - float_value = int_value / 2 - yield int_value - yield float_value - - for vector_type in [list, tuple]: - yield vector_type([int_value]) - yield vector_type([float_value]) - - if num_channels > 1: - yield vector_type(float_value * c / 10 for c in range(num_channels)) - yield vector_type(int_value if c % 2 == 0 else 0 for c in range(num_channels)) - - -def float32_vs_uint8_fill_adapter(other_args, kwargs): - fill = kwargs.get("fill") - if fill is None: - return other_args, kwargs - - if isinstance(fill, (int, float)): - fill /= 255 - else: - fill = type(fill)(fill_ / 255 for fill_ in fill) - - return other_args, dict(kwargs, fill=fill) - - -def _get_elastic_displacement(canvas_size): - return torch.rand(1, *canvas_size, 2) - - -def sample_inputs_elastic_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): - displacement = _get_elastic_displacement(image_loader.canvas_size) - for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): - yield ArgsKwargs(image_loader, displacement=displacement, fill=fill) - - -def reference_inputs_elastic_image_tensor(): - for image_loader, interpolation in itertools.product( - make_image_loaders_for_interpolation(), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - ], - ): - displacement = _get_elastic_displacement(image_loader.canvas_size) - for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): - yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill) - - -def sample_inputs_elastic_bounding_boxes(): - for bounding_boxes_loader in make_bounding_box_loaders(): - displacement = _get_elastic_displacement(bounding_boxes_loader.canvas_size) - yield ArgsKwargs( - bounding_boxes_loader, - format=bounding_boxes_loader.format, - canvas_size=bounding_boxes_loader.canvas_size, - displacement=displacement, - ) - - -def sample_inputs_elastic_mask(): - for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): - displacement = _get_elastic_displacement(mask_loader.shape[-2:]) - yield ArgsKwargs(mask_loader, displacement=displacement) - - -def sample_inputs_elastic_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - displacement = _get_elastic_displacement(video_loader.shape[-2:]) - yield ArgsKwargs(video_loader, displacement=displacement) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.elastic_image, - sample_inputs_fn=sample_inputs_elastic_image_tensor, - reference_inputs_fn=reference_inputs_elastic_image_tensor, - float32_vs_uint8=float32_vs_uint8_fill_adapter, - closeness_kwargs={ - **float32_vs_uint8_pixel_difference(6, mae=True), - **cuda_vs_cpu_pixel_difference(), - }, - test_marks=[xfail_jit_python_scalar_arg("fill")], - ), - KernelInfo( - F.elastic_bounding_boxes, - sample_inputs_fn=sample_inputs_elastic_bounding_boxes, - ), - KernelInfo( - F.elastic_mask, - sample_inputs_fn=sample_inputs_elastic_mask, - ), - KernelInfo( - F.elastic_video, - sample_inputs_fn=sample_inputs_elastic_video, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), - ), - ] -) - - def sample_inputs_invert_image_tensor(): for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) From 0a6413ac5f51cdb1fef6cc6548fe56e7a5bce503 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 13:07:46 +0200 Subject: [PATCH 05/27] port tests for F.clamp_bounding_boxes and transforms.ClampBoundingBoxes --- test/test_transforms_v2_functional.py | 36 +----------------- test/test_transforms_v2_refactored.py | 38 +++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 7 ---- test/transforms_v2_kernel_infos.py | 18 --------- torchvision/transforms/v2/functional/_meta.py | 2 +- 5 files changed, 40 insertions(+), 61 deletions(-) diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index edde2c9b209..badaca69ca8 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -13,7 +13,7 @@ from torchvision.transforms.v2._utils import is_pure_tensor from transforms_v2_dispatcher_infos import DISPATCHER_INFOS from transforms_v2_kernel_infos import KERNEL_INFOS -from transforms_v2_legacy_utils import make_multiple_bounding_boxes, parametrized_error_message +from transforms_v2_legacy_utils import parametrized_error_message KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS} @@ -467,40 +467,6 @@ def test_alias(alias, target): assert alias is target -class TestClampBoundingBoxes: - @pytest.mark.parametrize( - "metadata", - [ - dict(), - dict(format=tv_tensors.BoundingBoxFormat.XYXY), - dict(canvas_size=(1, 1)), - ], - ) - def test_pure_tensor_insufficient_metadata(self, metadata): - pure_tensor = next(make_multiple_bounding_boxes()).as_subclass(torch.Tensor) - - with pytest.raises(ValueError, match=re.escape("`format` and `canvas_size` has to be passed")): - F.clamp_bounding_boxes(pure_tensor, **metadata) - - @pytest.mark.parametrize( - "metadata", - [ - dict(format=tv_tensors.BoundingBoxFormat.XYXY), - dict(canvas_size=(1, 1)), - dict(format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=(1, 1)), - ], - ) - def test_tv_tensor_explicit_metadata(self, metadata): - tv_tensor = next(make_multiple_bounding_boxes()) - - with pytest.raises(ValueError, match=re.escape("`format` and `canvas_size` must not be passed")): - F.clamp_bounding_boxes(tv_tensor, **metadata) - - -# TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in -# `transforms_v2_kernel_infos.py` - - @pytest.mark.parametrize( "inpt", [ diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 68871aaf6b4..896ac6f1e07 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4097,3 +4097,41 @@ def test_correctness_image(self, dtype, device, fn): output = fn(input, mean=mean, std=std) self._assert_is_standard_normal_distributed(output) + + +class TestClampBoundingBoxes: + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel(self, format, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + check_kernel( + F.clamp_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + ) + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_functional(self, format): + check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format)) + + def test_errors(self): + input_tv_tensor = make_bounding_boxes() + input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor) + format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size + + for format_, canvas_size_ in [(None, None), (format, None), (None, canvas_size)]: + with pytest.raises( + ValueError, match="For pure tensor inputs, `format` and `canvas_size` have to be passed." + ): + F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_) + + for format_, canvas_size_ in [(format, canvas_size), (format, None), (None, canvas_size)]: + with pytest.raises( + ValueError, match="For bounding box tv_tensor inputs, `format` and `canvas_size` must not be passed." + ): + F.clamp_bounding_boxes(input_tv_tensor, format=format_, canvas_size=canvas_size_) + + def test_transform(self): + check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes()) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 846c74ab33b..ce040ca9f77 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -207,11 +207,4 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): ], pil_kernel_info=PILKernelInfo(F._ten_crop_image_pil), ), - DispatcherInfo( - F.clamp_bounding_boxes, - kernels={tv_tensors.BoundingBoxes: F.clamp_bounding_boxes}, - test_marks=[ - skip_dispatch_tv_tensor, - ], - ), ] diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 2276223843e..dc2e714adeb 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -10,7 +10,6 @@ ArgsKwargs, DEFAULT_PORTRAIT_SPATIAL_SIZE, InfoBase, - make_bounding_box_loaders, make_image_loaders, make_video_loaders, mark_framework_limitation, @@ -516,23 +515,6 @@ def sample_inputs_adjust_saturation_video(): ) -def sample_inputs_clamp_bounding_boxes(): - for bounding_boxes_loader in make_bounding_box_loaders(): - yield ArgsKwargs( - bounding_boxes_loader, - format=bounding_boxes_loader.format, - canvas_size=bounding_boxes_loader.canvas_size, - ) - - -KERNEL_INFOS.append( - KernelInfo( - F.clamp_bounding_boxes, - sample_inputs_fn=sample_inputs_clamp_bounding_boxes, - logs_usage=True, - ) -) - _FIVE_TEN_CROP_SIZES = [7, (6,), [5], (6, 5), [7, 6]] diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py index 61e21ef8175..5e045391630 100644 --- a/torchvision/transforms/v2/functional/_meta.py +++ b/torchvision/transforms/v2/functional/_meta.py @@ -261,7 +261,7 @@ def clamp_bounding_boxes( if torch.jit.is_scripting() or is_pure_tensor(inpt): if format is None or canvas_size is None: - raise ValueError("For pure tensor inputs, `format` and `canvas_size` has to be passed.") + raise ValueError("For pure tensor inputs, `format` and `canvas_size` have to be passed.") return _clamp_bounding_boxes(inpt, format=format, canvas_size=canvas_size) elif isinstance(inpt, tv_tensors.BoundingBoxes): if format is not None or canvas_size is not None: From 227ca27a1c6e6ca0c75c0ad0f4a3cb15fd61701a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 13:22:37 +0200 Subject: [PATCH 06/27] fix normalize tests --- test/test_transforms_v2_refactored.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 896ac6f1e07..9a95c0364e3 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4045,12 +4045,28 @@ class TestNormalize: def test_kernel_image(self, mean, std, device): check_kernel(F.normalize_image, make_image(dtype=torch.float32, device=device), mean=self.MEAN, std=self.STD) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_inplace(self, device): + input = make_image_tensor(dtype=torch.float32, device=device) + input_version = input._version + + output_out_of_place = F.normalize_image(input, mean=self.MEAN, std=self.STD) + assert output_out_of_place.data_ptr() != input.data_ptr() + assert output_out_of_place is not input + + output_inplace = F.normalize_image(input, mean=self.MEAN, std=self.STD, inplace=True) + assert output_inplace.data_ptr() == input.data_ptr() + assert output_inplace._version > input_version + assert output_inplace is input + + assert_equal(output_inplace, output_out_of_place) + def test_kernel_video(self): check_kernel(F.normalize_video, make_video(dtype=torch.float32), mean=self.MEAN, std=self.STD) @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) def test_functional(self, make_input): - check_functional(F.equalize, make_input(dtype=torch.float32)) + check_functional(F.normalize, make_input(dtype=torch.float32)) @pytest.mark.parametrize( ("kernel", "input_type"), @@ -4074,10 +4090,7 @@ def test_functional_error(self): with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"): F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std) - @pytest.mark.parametrize( - "make_input", - [make_image_tensor, make_image, make_video], - ) + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) def test_transform(self, make_input): check_transform(transforms.Normalize(mean=self.MEAN, std=self.STD), make_input(dtype=torch.float32)) From 734d0b7c7460c44aedb11582717b071e55279741 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 13:24:09 +0200 Subject: [PATCH 07/27] port tests for F.invert and transforms.RandomInvert --- test/test_transforms_v2_consistency.py | 9 ------ test/test_transforms_v2_refactored.py | 39 ++++++++++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 8 ------ test/transforms_v2_kernel_infos.py | 33 ---------------------- 4 files changed, 39 insertions(+), 50 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index aa706a21a24..010198dd41a 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -149,14 +149,6 @@ def __init__( # images given that the transform does nothing but call it anyway. supports_pil=False, ), - ConsistencyConfig( - v2_transforms.RandomInvert, - legacy_transforms.RandomInvert, - [ - ArgsKwargs(p=0), - ArgsKwargs(p=1), - ], - ), ConsistencyConfig( v2_transforms.RandomPosterize, legacy_transforms.RandomPosterize, @@ -809,7 +801,6 @@ def test_common(self, t_ref, t, data_kwargs): (legacy_F.to_grayscale, {}), (legacy_F.rgb_to_grayscale, {}), (legacy_F.to_tensor, {}), - (legacy_F.invert, {}), (legacy_F.posterize, {}), (legacy_F.solarize, {}), (legacy_F.adjust_sharpness, {}), diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 9a95c0364e3..c1afba718a5 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4148,3 +4148,42 @@ def test_errors(self): def test_transform(self): check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes()) + + +class TestInvert: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.invert_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.invert_video, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.invert, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.invert_image, torch.Tensor), + (F._invert_image_pil, PIL.Image.Image), + (F.invert_image, tv_tensors.Image), + (F.invert_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.invert, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomInvert(p=1), make_input()) + + @pytest.mark.parametrize("fn", [F.invert, transform_cls_to_functional(transforms.RandomInvert, p=1)]) + def test_correctness_image(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.invert(F.to_pil_image(image))) + + assert_equal(actual, expected) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index ce040ca9f77..a84923b620e 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -111,14 +111,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): DISPATCHER_INFOS = [ - DispatcherInfo( - F.invert, - kernels={ - tv_tensors.Image: F.invert_image, - tv_tensors.Video: F.invert_video, - }, - pil_kernel_info=PILKernelInfo(F._invert_image_pil, kernel_name="invert_image_pil"), - ), DispatcherInfo( F.posterize, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index dc2e714adeb..8371e60ffd9 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -135,39 +135,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): KERNEL_INFOS = [] -def sample_inputs_invert_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader) - - -def reference_inputs_invert_image_tensor(): - for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_invert_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.invert_image, - kernel_name="invert_image_tensor", - sample_inputs_fn=sample_inputs_invert_image_tensor, - reference_fn=pil_reference_wrapper(F._invert_image_pil), - reference_inputs_fn=reference_inputs_invert_image_tensor, - float32_vs_uint8=True, - ), - KernelInfo( - F.invert_video, - sample_inputs_fn=sample_inputs_invert_video, - ), - ] -) - - _POSTERIZE_BITS = [1, 4, 8] From 845ac36afd94b7a7b2a29d17cff49ccd99874c3c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:31:15 +0200 Subject: [PATCH 08/27] add new tests for F.posterize and transforms.RandomPosterize --- test/test_transforms_v2_refactored.py | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index c1afba718a5..d1a0d84404e 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4187,3 +4187,43 @@ def test_correctness_image(self, fn): expected = F.to_image(F.invert(F.to_pil_image(image))) assert_equal(actual, expected) + + +class TestPosterize: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.posterize_image, make_image(dtype=dtype, device=device), bits=1) + + def test_kernel_video(self): + check_kernel(F.posterize_video, make_video(), bits=1) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.posterize, make_input(), bits=1) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.posterize_image, torch.Tensor), + (F._posterize_image_pil, PIL.Image.Image), + (F.posterize_image, tv_tensors.Image), + (F.posterize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.posterize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomPosterize(bits=1, p=1), make_input()) + + @pytest.mark.parametrize("bits", [1, 4, 8]) + @pytest.mark.parametrize("fn", [F.posterize, transform_cls_to_functional(transforms.RandomPosterize, p=1)]) + def test_correctness_image(self, bits, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image, bits=bits) + expected = F.to_image(F.posterize(F.to_pil_image(image), bits=bits)) + + assert_equal(actual, expected) From 472ec15bb3b38d2c223872f432c6454235de2428 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:40:47 +0200 Subject: [PATCH 09/27] add new tests for F.solarize and transforms.RandomSolarize --- test/test_transforms_v2_refactored.py | 53 +++++++++++++++++++++++++++ torchvision/transforms/v2/_color.py | 5 +++ 2 files changed, 58 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index d1a0d84404e..fe5d7a07f07 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4227,3 +4227,56 @@ def test_correctness_image(self, bits, fn): expected = F.to_image(F.posterize(F.to_pil_image(image), bits=bits)) assert_equal(actual, expected) + + +class TestSolarize: + def _make_threshold(self, input, *, factor=0.5): + dtype = input.dtype if isinstance(input, torch.Tensor) else torch.uint8 + return (float if dtype.is_floating_point else int)(get_max_value(dtype) * factor) + + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + image = make_image(dtype=dtype, device=device) + check_kernel(F.solarize_image, image, threshold=self._make_threshold(image)) + + def test_kernel_video(self): + video = make_video() + check_kernel(F.solarize_video, video, threshold=self._make_threshold(video)) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + input = make_input() + check_functional(F.solarize, input, threshold=self._make_threshold(input)) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.solarize_image, torch.Tensor), + (F._solarize_image_pil, PIL.Image.Image), + (F.solarize_image, tv_tensors.Image), + (F.solarize_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.solarize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize(("dtype", "threshold"), [(torch.uint8, 256), (torch.float, 1.5)]) + def test_functional_error(self, dtype, threshold): + with pytest.raises(TypeError, match="Threshold should be less or equal the maximum value of the dtype"): + F.solarize(make_image(dtype=dtype), threshold=threshold) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + input = make_input() + check_transform(transforms.RandomSolarize(threshold=self._make_threshold(input), p=1), input) + + @pytest.mark.parametrize("threshold", [0.0, 0.1, 0.5, 0.9, 1.0]) + @pytest.mark.parametrize("fn", [F.solarize, transform_cls_to_functional(transforms.RandomSolarize, p=1)]) + def test_correctness_image(self, threshold, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image, threshold=threshold) + expected = F.to_image(F.solarize(F.to_pil_image(image), threshold=threshold)) + + assert_equal(actual, expected) diff --git a/torchvision/transforms/v2/_color.py b/torchvision/transforms/v2/_color.py index efe731b5ec9..2715eefa21c 100644 --- a/torchvision/transforms/v2/_color.py +++ b/torchvision/transforms/v2/_color.py @@ -328,6 +328,11 @@ class RandomSolarize(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomSolarize + def _extract_params_for_v1_transform(self) -> Dict[str, Any]: + params = super()._extract_params_for_v1_transform() + params["threshold"] = float(params["threshold"]) + return params + def __init__(self, threshold: float, p: float = 0.5) -> None: super().__init__(p=p) self.threshold = threshold From 41cfdadd43cff42d986de46743109b2adff1f387 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:45:01 +0200 Subject: [PATCH 10/27] add new tests for F.autocontrast and transforms.RandomAutocontrast --- test/test_transforms_v2_refactored.py | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index fe5d7a07f07..49ec6ae4650 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4280,3 +4280,42 @@ def test_correctness_image(self, threshold, fn): expected = F.to_image(F.solarize(F.to_pil_image(image), threshold=threshold)) assert_equal(actual, expected) + + +class TestAutocontrast: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.int16, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.autocontrast_image, make_image(dtype=dtype, device=device)) + + def test_kernel_video(self): + check_kernel(F.autocontrast_video, make_video()) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.autocontrast, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.autocontrast_image, torch.Tensor), + (F._autocontrast_image_pil, PIL.Image.Image), + (F.autocontrast_image, tv_tensors.Image), + (F.autocontrast_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.autocontrast, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomAutocontrast(p=1), make_input(), check_v1_compatibility=dict(rtol=0, atol=1)) + + @pytest.mark.parametrize("fn", [F.autocontrast, transform_cls_to_functional(transforms.RandomAutocontrast, p=1)]) + def test_correctness_image(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image(F.autocontrast(F.to_pil_image(image))) + + assert_close(actual, expected, rtol=0, atol=1) From 40f21b2ee67f0a4bb51f0bc45779a374b64bd18b Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:50:43 +0200 Subject: [PATCH 11/27] add new tests for F.adjust_sharpness and transforms.RandomAdjustSharpness --- test/test_transforms_v2_refactored.py | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 49ec6ae4650..044a179347c 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4319,3 +4319,52 @@ def test_correctness_image(self, fn): expected = F.to_image(F.autocontrast(F.to_pil_image(image))) assert_close(actual, expected, rtol=0, atol=1) + + +class TestAdjustSharpness: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_sharpness_image, make_image(dtype=dtype, device=device), sharpness_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_sharpness_video, make_video(), sharpness_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_sharpness, make_input(), sharpness_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_sharpness_image, torch.Tensor), + (F._adjust_sharpness_image_pil, PIL.Image.Image), + (F.adjust_sharpness_image, tv_tensors.Image), + (F.adjust_sharpness_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_sharpness, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video]) + def test_transform(self, make_input): + check_transform(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1), make_input()) + + def test_functional_error(self): + with pytest.raises(TypeError, match="can have 1 or 3 channels"): + F.adjust_sharpness(make_image(color_space="RGBA"), sharpness_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_sharpness(make_image(), sharpness_factor=-1) + + @pytest.mark.parametrize("sharpness_factor", [0.1, 0.5, 1.0]) + @pytest.mark.parametrize( + "fn", [F.adjust_sharpness, transform_cls_to_functional(transforms.RandomAdjustSharpness, p=1)] + ) + def test_correctness_image(self, sharpness_factor, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image, sharpness_factor=sharpness_factor) + expected = F.to_image(F.adjust_sharpness(F.to_pil_image(image), sharpness_factor=sharpness_factor)) + + assert_equal(actual, expected) From 635c2c4381b6d7d7a122a138a67de8505ac99908 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:54:34 +0200 Subject: [PATCH 12/27] add new tests for F.adjust_contrast --- test/test_transforms_v2_refactored.py | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 044a179347c..b58b5bff739 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4368,3 +4368,45 @@ def test_correctness_image(self, sharpness_factor, fn): expected = F.to_image(F.adjust_sharpness(F.to_pil_image(image), sharpness_factor=sharpness_factor)) assert_equal(actual, expected) + + +class TestAdjustContrast: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_contrast_image, make_image(dtype=dtype, device=device), contrast_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_contrast_video, make_video(), contrast_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_contrast, make_input(), contrast_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_contrast_image, torch.Tensor), + (F._adjust_contrast_image_pil, PIL.Image.Image), + (F.adjust_contrast_image, tv_tensors.Image), + (F.adjust_contrast_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_contrast, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_contrast(make_image(color_space="RGBA"), contrast_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_contrast(make_image(), contrast_factor=-1) + + @pytest.mark.parametrize("contrast_factor", [0.1, 0.5, 1.0]) + def test_correctness_image(self, contrast_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_contrast(image, contrast_factor=contrast_factor) + expected = F.to_image(F.adjust_contrast(F.to_pil_image(image), contrast_factor=contrast_factor)) + + assert_close(actual, expected, rtol=0, atol=1) From 257d0f403f36137530514800a86f008b3ee0a5ca Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 22:58:38 +0200 Subject: [PATCH 13/27] add new tests for F.adjust_gamma --- test/test_transforms_v2_refactored.py | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index b58b5bff739..16bad683980 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4410,3 +4410,43 @@ def test_correctness_image(self, contrast_factor): expected = F.to_image(F.adjust_contrast(F.to_pil_image(image), contrast_factor=contrast_factor)) assert_close(actual, expected, rtol=0, atol=1) + + +class TestAdjustGamma: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_gamma_image, make_image(dtype=dtype, device=device), gamma=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_gamma_video, make_video(), gamma=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_gamma, make_input(), gamma=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_gamma_image, torch.Tensor), + (F._adjust_gamma_image_pil, PIL.Image.Image), + (F.adjust_gamma_image, tv_tensors.Image), + (F.adjust_gamma_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_gamma, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(ValueError, match="Gamma should be a non-negative real number"): + F.adjust_gamma(make_image(), gamma=-1) + + @pytest.mark.parametrize("gamma", [0.1, 0.5, 1.0]) + @pytest.mark.parametrize("gain", [0.1, 1.0, 2.0]) + def test_correctness_image(self, gamma, gain): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_gamma(image, gamma=gamma, gain=gain) + expected = F.to_image(F.adjust_gamma(F.to_pil_image(image), gamma=gamma, gain=gain)) + + assert_equal(actual, expected) From c9595904e2bef0addffbdc4ac188e2577578d235 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 23:03:16 +0200 Subject: [PATCH 14/27] add new tests for F.adjust_hue --- test/test_transforms_v2_refactored.py | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 16bad683980..cb856d4d798 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4450,3 +4450,47 @@ def test_correctness_image(self, gamma, gain): expected = F.to_image(F.adjust_gamma(F.to_pil_image(image), gamma=gamma, gain=gain)) assert_equal(actual, expected) + + +class TestAdjustHue: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_hue_image, make_image(dtype=dtype, device=device), hue_factor=0.25) + + def test_kernel_video(self): + check_kernel(F.adjust_hue_video, make_video(), hue_factor=0.25) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_hue, make_input(), hue_factor=0.25) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_hue_image, torch.Tensor), + (F._adjust_hue_image_pil, PIL.Image.Image), + (F.adjust_hue_image, tv_tensors.Image), + (F.adjust_hue_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_hue, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_hue(make_image(color_space="RGBA"), hue_factor=0.25) + + for hue_factor in [-1, 1]: + with pytest.raises(ValueError, match=re.escape("is not in [-0.5, 0.5]")): + F.adjust_hue(make_image(), hue_factor=hue_factor) + + @pytest.mark.parametrize("hue_factor", [-0.5, -0.3, 0.0, 0.2, 0.5]) + def test_correctness_image(self, hue_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_hue(image, hue_factor=hue_factor) + expected = F.to_image(F.adjust_hue(F.to_pil_image(image), hue_factor=hue_factor)) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 From 8cd9fdfcb8845581b994e892dd1a9f181dd37dac Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 23:04:34 +0200 Subject: [PATCH 15/27] add new tests for F.adjust_saturation --- test/test_transforms_v2_refactored.py | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index cb856d4d798..ddadad8ca54 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4494,3 +4494,45 @@ def test_correctness_image(self, hue_factor): mae = (actual.float() - expected.float()).abs().mean() assert mae < 2 + + +class TestAdjustSaturation: + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.adjust_saturation_image, make_image(dtype=dtype, device=device), saturation_factor=0.5) + + def test_kernel_video(self): + check_kernel(F.adjust_saturation_video, make_video(), saturation_factor=0.5) + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video]) + def test_functional(self, make_input): + check_functional(F.adjust_saturation, make_input(), saturation_factor=0.5) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.adjust_saturation_image, torch.Tensor), + (F._adjust_saturation_image_pil, PIL.Image.Image), + (F.adjust_saturation_image, tv_tensors.Image), + (F.adjust_saturation_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.adjust_saturation, kernel=kernel, input_type=input_type) + + def test_functional_error(self): + with pytest.raises(TypeError, match="permitted channel values are 1 or 3"): + F.adjust_saturation(make_image(color_space="RGBA"), saturation_factor=0.5) + + with pytest.raises(ValueError, match="is not non-negative"): + F.adjust_saturation(make_image(), saturation_factor=-1) + + @pytest.mark.parametrize("saturation_factor", [0.1, 0.5, 1.0]) + def test_correctness_image(self, saturation_factor): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = F.adjust_saturation(image, saturation_factor=saturation_factor) + expected = F.to_image(F.adjust_saturation(F.to_pil_image(image), saturation_factor=saturation_factor)) + + assert_close(actual, expected, rtol=0, atol=1) From 555d44a3c802dad010151b94b3385193427997a3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 11 Sep 2023 23:07:27 +0200 Subject: [PATCH 16/27] cleanup --- test/test_transforms_v2_consistency.py | 50 ---- test/transforms_v2_dispatcher_infos.py | 64 ----- test/transforms_v2_kernel_infos.py | 378 ------------------------- 3 files changed, 492 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 010198dd41a..c2fe5723f69 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -149,48 +149,6 @@ def __init__( # images given that the transform does nothing but call it anyway. supports_pil=False, ), - ConsistencyConfig( - v2_transforms.RandomPosterize, - legacy_transforms.RandomPosterize, - [ - ArgsKwargs(p=0, bits=5), - ArgsKwargs(p=1, bits=1), - ArgsKwargs(p=1, bits=3), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[torch.uint8]), - ), - ConsistencyConfig( - v2_transforms.RandomSolarize, - legacy_transforms.RandomSolarize, - [ - ArgsKwargs(p=0, threshold=0.5), - ArgsKwargs(p=1, threshold=0.3), - ArgsKwargs(p=1, threshold=0.99), - ], - ), - *[ - ConsistencyConfig( - v2_transforms.RandomAutocontrast, - legacy_transforms.RandomAutocontrast, - [ - ArgsKwargs(p=0), - ArgsKwargs(p=1), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[dt]), - closeness_kwargs=ckw, - ) - for dt, ckw in [(torch.uint8, dict(atol=1, rtol=0)), (torch.float32, dict(rtol=None, atol=None))] - ], - ConsistencyConfig( - v2_transforms.RandomAdjustSharpness, - legacy_transforms.RandomAdjustSharpness, - [ - ArgsKwargs(p=0, sharpness_factor=0.5), - ArgsKwargs(p=1, sharpness_factor=0.2), - ArgsKwargs(p=1, sharpness_factor=0.99), - ], - closeness_kwargs={"atol": 1e-6, "rtol": 1e-6}, - ), ConsistencyConfig( v2_transforms.RandomGrayscale, legacy_transforms.RandomGrayscale, @@ -794,17 +752,9 @@ def test_common(self, t_ref, t, data_kwargs): (legacy_F.to_pil_image, {}), (legacy_F.five_crop, {}), (legacy_F.ten_crop, {}), - (legacy_F.adjust_contrast, {}), - (legacy_F.adjust_saturation, {}), - (legacy_F.adjust_hue, {}), - (legacy_F.adjust_gamma, {}), (legacy_F.to_grayscale, {}), (legacy_F.rgb_to_grayscale, {}), (legacy_F.to_tensor, {}), - (legacy_F.posterize, {}), - (legacy_F.solarize, {}), - (legacy_F.adjust_sharpness, {}), - (legacy_F.autocontrast, {}), ], ) def test_dispatcher_signature_consistency(legacy_dispatcher, name_only_params): diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index a84923b620e..af5f48b148b 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -111,70 +111,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): DISPATCHER_INFOS = [ - DispatcherInfo( - F.posterize, - kernels={ - tv_tensors.Image: F.posterize_image, - tv_tensors.Video: F.posterize_video, - }, - pil_kernel_info=PILKernelInfo(F._posterize_image_pil, kernel_name="posterize_image_pil"), - ), - DispatcherInfo( - F.solarize, - kernels={ - tv_tensors.Image: F.solarize_image, - tv_tensors.Video: F.solarize_video, - }, - pil_kernel_info=PILKernelInfo(F._solarize_image_pil, kernel_name="solarize_image_pil"), - ), - DispatcherInfo( - F.autocontrast, - kernels={ - tv_tensors.Image: F.autocontrast_image, - tv_tensors.Video: F.autocontrast_video, - }, - pil_kernel_info=PILKernelInfo(F._autocontrast_image_pil, kernel_name="autocontrast_image_pil"), - ), - DispatcherInfo( - F.adjust_sharpness, - kernels={ - tv_tensors.Image: F.adjust_sharpness_image, - tv_tensors.Video: F.adjust_sharpness_video, - }, - pil_kernel_info=PILKernelInfo(F._adjust_sharpness_image_pil, kernel_name="adjust_sharpness_image_pil"), - ), - DispatcherInfo( - F.adjust_contrast, - kernels={ - tv_tensors.Image: F.adjust_contrast_image, - tv_tensors.Video: F.adjust_contrast_video, - }, - pil_kernel_info=PILKernelInfo(F._adjust_contrast_image_pil, kernel_name="adjust_contrast_image_pil"), - ), - DispatcherInfo( - F.adjust_gamma, - kernels={ - tv_tensors.Image: F.adjust_gamma_image, - tv_tensors.Video: F.adjust_gamma_video, - }, - pil_kernel_info=PILKernelInfo(F._adjust_gamma_image_pil, kernel_name="adjust_gamma_image_pil"), - ), - DispatcherInfo( - F.adjust_hue, - kernels={ - tv_tensors.Image: F.adjust_hue_image, - tv_tensors.Video: F.adjust_hue_video, - }, - pil_kernel_info=PILKernelInfo(F._adjust_hue_image_pil, kernel_name="adjust_hue_image_pil"), - ), - DispatcherInfo( - F.adjust_saturation, - kernels={ - tv_tensors.Image: F.adjust_saturation_image, - tv_tensors.Video: F.adjust_saturation_video, - }, - pil_kernel_info=PILKernelInfo(F._adjust_saturation_image_pil, kernel_name="adjust_saturation_image_pil"), - ), DispatcherInfo( F.five_crop, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 8371e60ffd9..c97ef48e707 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -5,10 +5,8 @@ import pytest import torch.testing import torchvision.transforms.v2.functional as F -from torchvision.transforms._functional_tensor import _max_value as get_max_value from transforms_v2_legacy_utils import ( ArgsKwargs, - DEFAULT_PORTRAIT_SPATIAL_SIZE, InfoBase, make_image_loaders, make_video_loaders, @@ -64,35 +62,6 @@ def __init__( self.logs_usage = logs_usage -def pixel_difference_closeness_kwargs(uint8_atol, *, dtype=torch.uint8, mae=False): - return dict(atol=uint8_atol / 255 * get_max_value(dtype), rtol=0, mae=mae) - - -def cuda_vs_cpu_pixel_difference(atol=1): - return { - (("TestKernels", "test_cuda_vs_cpu"), dtype, "cuda"): pixel_difference_closeness_kwargs(atol, dtype=dtype) - for dtype in [torch.uint8, torch.float32] - } - - -def pil_reference_pixel_difference(atol=1, mae=False): - return { - (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs( - atol, mae=mae - ) - } - - -def float32_vs_uint8_pixel_difference(atol=1, mae=False): - return { - ( - ("TestKernels", "test_float32_vs_uint8"), - torch.float32, - "cpu", - ): pixel_difference_closeness_kwargs(atol, dtype=torch.float32, mae=mae) - } - - def pil_reference_wrapper(pil_kernel): @functools.wraps(pil_kernel) def wrapper(input_tensor, *other_args, **kwargs): @@ -135,353 +104,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): KERNEL_INFOS = [] -_POSTERIZE_BITS = [1, 4, 8] - - -def sample_inputs_posterize_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0]) - - -def reference_inputs_posterize_image_tensor(): - for image_loader, bits in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _POSTERIZE_BITS, - ): - yield ArgsKwargs(image_loader, bits=bits) - - -def sample_inputs_posterize_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.posterize_image, - kernel_name="posterize_image_tensor", - sample_inputs_fn=sample_inputs_posterize_image_tensor, - reference_fn=pil_reference_wrapper(F._posterize_image_pil), - reference_inputs_fn=reference_inputs_posterize_image_tensor, - float32_vs_uint8=True, - closeness_kwargs=float32_vs_uint8_pixel_difference(), - ), - KernelInfo( - F.posterize_video, - sample_inputs_fn=sample_inputs_posterize_video, - ), - ] -) - - -def _get_solarize_thresholds(dtype): - for factor in [0.1, 0.5]: - max_value = get_max_value(dtype) - yield (float if dtype.is_floating_point else int)(max_value * factor) - - -def sample_inputs_solarize_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype))) - - -def reference_inputs_solarize_image_tensor(): - for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]): - for threshold in _get_solarize_thresholds(image_loader.dtype): - yield ArgsKwargs(image_loader, threshold=threshold) - - -def uint8_to_float32_threshold_adapter(other_args, kwargs): - return other_args, dict(threshold=kwargs["threshold"] / 255) - - -def sample_inputs_solarize_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype))) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.solarize_image, - kernel_name="solarize_image_tensor", - sample_inputs_fn=sample_inputs_solarize_image_tensor, - reference_fn=pil_reference_wrapper(F._solarize_image_pil), - reference_inputs_fn=reference_inputs_solarize_image_tensor, - float32_vs_uint8=uint8_to_float32_threshold_adapter, - closeness_kwargs=float32_vs_uint8_pixel_difference(), - ), - KernelInfo( - F.solarize_video, - sample_inputs_fn=sample_inputs_solarize_video, - ), - ] -) - - -def sample_inputs_autocontrast_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader) - - -def reference_inputs_autocontrast_image_tensor(): - for image_loader in make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_autocontrast_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.autocontrast_image, - kernel_name="autocontrast_image_tensor", - sample_inputs_fn=sample_inputs_autocontrast_image_tensor, - reference_fn=pil_reference_wrapper(F._autocontrast_image_pil), - reference_inputs_fn=reference_inputs_autocontrast_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(), - **float32_vs_uint8_pixel_difference(), - }, - ), - KernelInfo( - F.autocontrast_video, - sample_inputs_fn=sample_inputs_autocontrast_video, - ), - ] -) - -_ADJUST_SHARPNESS_FACTORS = [0.1, 0.5] - - -def sample_inputs_adjust_sharpness_image_tensor(): - for image_loader in make_image_loaders( - sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)], - color_spaces=("GRAY", "RGB"), - ): - yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) - - -def reference_inputs_adjust_sharpness_image_tensor(): - for image_loader, sharpness_factor in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _ADJUST_SHARPNESS_FACTORS, - ): - yield ArgsKwargs(image_loader, sharpness_factor=sharpness_factor) - - -def sample_inputs_adjust_sharpness_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.adjust_sharpness_image, - kernel_name="adjust_sharpness_image_tensor", - sample_inputs_fn=sample_inputs_adjust_sharpness_image_tensor, - reference_fn=pil_reference_wrapper(F._adjust_sharpness_image_pil), - reference_inputs_fn=reference_inputs_adjust_sharpness_image_tensor, - float32_vs_uint8=True, - closeness_kwargs=float32_vs_uint8_pixel_difference(2), - ), - KernelInfo( - F.adjust_sharpness_video, - sample_inputs_fn=sample_inputs_adjust_sharpness_video, - ), - ] -) - - -_ADJUST_CONTRAST_FACTORS = [0.1, 0.5] - - -def sample_inputs_adjust_contrast_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) - - -def reference_inputs_adjust_contrast_image_tensor(): - for image_loader, contrast_factor in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _ADJUST_CONTRAST_FACTORS, - ): - yield ArgsKwargs(image_loader, contrast_factor=contrast_factor) - - -def sample_inputs_adjust_contrast_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.adjust_contrast_image, - kernel_name="adjust_contrast_image_tensor", - sample_inputs_fn=sample_inputs_adjust_contrast_image_tensor, - reference_fn=pil_reference_wrapper(F._adjust_contrast_image_pil), - reference_inputs_fn=reference_inputs_adjust_contrast_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(), - **float32_vs_uint8_pixel_difference(2), - **cuda_vs_cpu_pixel_difference(), - (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs(1), - }, - ), - KernelInfo( - F.adjust_contrast_video, - sample_inputs_fn=sample_inputs_adjust_contrast_video, - closeness_kwargs={ - **cuda_vs_cpu_pixel_difference(), - (("TestKernels", "test_against_reference"), torch.uint8, "cpu"): pixel_difference_closeness_kwargs(1), - }, - ), - ] -) - -_ADJUST_GAMMA_GAMMAS_GAINS = [ - (0.5, 2.0), - (0.0, 1.0), -] - - -def sample_inputs_adjust_gamma_image_tensor(): - gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, gamma=gamma, gain=gain) - - -def reference_inputs_adjust_gamma_image_tensor(): - for image_loader, (gamma, gain) in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _ADJUST_GAMMA_GAMMAS_GAINS, - ): - yield ArgsKwargs(image_loader, gamma=gamma, gain=gain) - - -def sample_inputs_adjust_gamma_video(): - gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, gamma=gamma, gain=gain) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.adjust_gamma_image, - kernel_name="adjust_gamma_image_tensor", - sample_inputs_fn=sample_inputs_adjust_gamma_image_tensor, - reference_fn=pil_reference_wrapper(F._adjust_gamma_image_pil), - reference_inputs_fn=reference_inputs_adjust_gamma_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(), - **float32_vs_uint8_pixel_difference(), - }, - ), - KernelInfo( - F.adjust_gamma_video, - sample_inputs_fn=sample_inputs_adjust_gamma_video, - ), - ] -) - - -_ADJUST_HUE_FACTORS = [-0.1, 0.5] - - -def sample_inputs_adjust_hue_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) - - -def reference_inputs_adjust_hue_image_tensor(): - for image_loader, hue_factor in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _ADJUST_HUE_FACTORS, - ): - yield ArgsKwargs(image_loader, hue_factor=hue_factor) - - -def sample_inputs_adjust_hue_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.adjust_hue_image, - kernel_name="adjust_hue_image_tensor", - sample_inputs_fn=sample_inputs_adjust_hue_image_tensor, - reference_fn=pil_reference_wrapper(F._adjust_hue_image_pil), - reference_inputs_fn=reference_inputs_adjust_hue_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(2, mae=True), - **float32_vs_uint8_pixel_difference(), - }, - ), - KernelInfo( - F.adjust_hue_video, - sample_inputs_fn=sample_inputs_adjust_hue_video, - ), - ] -) - -_ADJUST_SATURATION_FACTORS = [0.1, 0.5] - - -def sample_inputs_adjust_saturation_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): - yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) - - -def reference_inputs_adjust_saturation_image_tensor(): - for image_loader, saturation_factor in itertools.product( - make_image_loaders(color_spaces=("GRAY", "RGB"), extra_dims=[()], dtypes=[torch.uint8]), - _ADJUST_SATURATION_FACTORS, - ): - yield ArgsKwargs(image_loader, saturation_factor=saturation_factor) - - -def sample_inputs_adjust_saturation_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.adjust_saturation_image, - kernel_name="adjust_saturation_image_tensor", - sample_inputs_fn=sample_inputs_adjust_saturation_image_tensor, - reference_fn=pil_reference_wrapper(F._adjust_saturation_image_pil), - reference_inputs_fn=reference_inputs_adjust_saturation_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(), - **float32_vs_uint8_pixel_difference(2), - **cuda_vs_cpu_pixel_difference(), - }, - ), - KernelInfo( - F.adjust_saturation_video, - sample_inputs_fn=sample_inputs_adjust_saturation_video, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), - ), - ] -) - - _FIVE_TEN_CROP_SIZES = [7, (6,), [5], (6, 5), [7, 6]] From 0683b1c166a79cb4987690a2141efe4d9e0e4d95 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 12 Sep 2023 14:39:01 +0200 Subject: [PATCH 17/27] fix normalize --- test/test_transforms_v2_refactored.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index ddadad8ca54..fbe44929e85 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4066,7 +4066,7 @@ def test_kernel_video(self): @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) def test_functional(self, make_input): - check_functional(F.normalize, make_input(dtype=torch.float32)) + check_functional(F.normalize, make_input(dtype=torch.float32), mean=self.MEAN, std=self.STD) @pytest.mark.parametrize( ("kernel", "input_type"), From 01992d315789fc2486f06b11083c57c9fab00764 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 12 Sep 2023 15:31:21 +0200 Subject: [PATCH 18/27] add tests for five / ten crop --- test/test_transforms_v2_refactored.py | 107 +++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index fbe44929e85..cfad5fe064e 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -194,7 +194,10 @@ def check_functional(functional, input, *args, check_scripted_smoke=True, **kwar spy.assert_any_call(f"{functional.__module__}.{functional.__name__}") - assert isinstance(output, type(input)) + if functional in {F.five_crop, F.ten_crop}: + assert all(isinstance(o, type(input)) for o in output) + else: + assert isinstance(output, type(input)) if isinstance(input, tv_tensors.BoundingBoxes) and functional is not F.convert_bounding_box_format: assert output.format == input.format @@ -259,7 +262,13 @@ def _check_transform_v1_compatibility(transform, input, *, rtol, atol): with freeze_rng_state(): output_v1 = v1_transform(input) - assert_close(F.to_image(output_v2), F.to_image(output_v1), rtol=rtol, atol=atol) + output_v2, output_v1 = [ + type(output)(F.to_image(o) for o in output) + if isinstance(transform, (transforms.FiveCrop, transforms.TenCrop)) + else F.to_image(output) + for output in [output_v2, output_v1] + ] + assert_close(output_v2, output_v1, rtol=rtol, atol=atol) if isinstance(input, PIL.Image.Image): return @@ -271,7 +280,11 @@ def check_transform(transform, input, check_v1_compatibility=True): pickle.loads(pickle.dumps(transform)) output = transform(input) - assert isinstance(output, type(input)) + + if isinstance(transform, (transforms.FiveCrop, transforms.TenCrop)): + assert all(isinstance(o, type(input)) for o in output) + else: + assert isinstance(output, type(input)) if isinstance(input, tv_tensors.BoundingBoxes) and not isinstance(transform, transforms.ConvertBoundingBoxFormat): assert output.format == input.format @@ -4536,3 +4549,91 @@ def test_correctness_image(self, saturation_factor): expected = F.to_image(F.adjust_saturation(F.to_pil_image(image), saturation_factor=saturation_factor)) assert_close(actual, expected, rtol=0, atol=1) + + +class TestFiveTenCrop: + INPUT_SIZE = (17, 11) + OUTPUT_SIZE = (3, 5) + + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("kernel", [F.five_crop_image, F.ten_crop]) + def test_kernel_image(self, dtype, device, kernel): + check_kernel( + kernel, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + size=self.OUTPUT_SIZE, + check_batched_vs_unbatched=False, + ) + + @pytest.mark.parametrize("kernel", [F.five_crop_video, F.ten_crop_video]) + def test_kernel_video(self, kernel): + check_kernel(kernel, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZE, check_batched_vs_unbatched=False) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("functional", [F.five_crop, F.ten_crop]) + def test_functional(self, make_input, functional): + check_functional(functional, make_input(self.INPUT_SIZE), size=self.OUTPUT_SIZE) + + @pytest.mark.parametrize( + ("functional", "kernel", "input_type"), + [ + (F.five_crop, F.five_crop_image, torch.Tensor), + (F.five_crop, F._five_crop_image_pil, PIL.Image.Image), + (F.five_crop, F.five_crop_image, tv_tensors.Image), + (F.five_crop, F.five_crop_video, tv_tensors.Video), + (F.ten_crop, F.ten_crop_image, torch.Tensor), + (F.ten_crop, F._ten_crop_image_pil, PIL.Image.Image), + (F.ten_crop, F.ten_crop_image, tv_tensors.Image), + (F.ten_crop, F.ten_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, functional, kernel, input_type): + check_functional_kernel_signature_match(functional, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) + def test_transform(self, make_input, transform_cls): + check_transform(transform_cls(size=self.OUTPUT_SIZE), make_input(self.INPUT_SIZE)) + + @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_mask]) + @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) + def test_transform_error(self, make_input, transform_cls): + transform = transform_cls(size=self.OUTPUT_SIZE) + + with pytest.raises(TypeError, match="not supported"): + transform(make_input(self.INPUT_SIZE)) + + @pytest.mark.parametrize("fn", [F.five_crop, transform_cls_to_functional(transforms.FiveCrop)]) + def test_correctness_image_five_crop(self, fn): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, size=self.OUTPUT_SIZE) + expected = F.five_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE) + + assert isinstance(actual, tuple) + assert_equal(actual, [F.to_image(e) for e in expected]) + + @pytest.mark.parametrize("fn_or_class", [F.ten_crop, transforms.TenCrop]) + @pytest.mark.parametrize("vertical_flip", [False, True]) + def test_correctness_image_ten_crop(self, fn_or_class, vertical_flip): + if fn_or_class is transforms.TenCrop: + fn = transform_cls_to_functional(fn_or_class, size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + kwargs = dict() + else: + fn = fn_or_class + kwargs = dict(size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, **kwargs) + expected = F.ten_crop(F.to_pil_image(image), size=self.OUTPUT_SIZE, vertical_flip=vertical_flip) + + assert isinstance(actual, tuple) + assert_equal(actual, [F.to_image(e) for e in expected]) From 775e5ecea3a84a55e545fe52b11540eba4f23fde Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 13 Sep 2023 09:43:38 +0200 Subject: [PATCH 19/27] cleanup --- test/test_transforms_v2_consistency.py | 21 -- test/test_transforms_v2_functional.py | 443 ------------------------- test/transforms_v2_dispatcher_infos.py | 138 -------- test/transforms_v2_kernel_infos.py | 210 ------------ test/transforms_v2_legacy_utils.py | 4 +- 5 files changed, 1 insertion(+), 815 deletions(-) delete mode 100644 test/transforms_v2_dispatcher_infos.py delete mode 100644 test/transforms_v2_kernel_infos.py diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index c2fe5723f69..f3d46e90f4e 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -72,25 +72,6 @@ def __init__( LINEAR_TRANSFORMATION_MATRIX = torch.rand([LINEAR_TRANSFORMATION_MEAN.numel()] * 2) CONSISTENCY_CONFIGS = [ - ConsistencyConfig( - v2_transforms.FiveCrop, - legacy_transforms.FiveCrop, - [ - ArgsKwargs(18), - ArgsKwargs((18, 13)), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(20, 19)]), - ), - ConsistencyConfig( - v2_transforms.TenCrop, - legacy_transforms.TenCrop, - [ - ArgsKwargs(18), - ArgsKwargs((18, 13)), - ArgsKwargs(18, vertical_flip=True), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(20, 19)]), - ), *[ ConsistencyConfig( v2_transforms.LinearTransformation, @@ -750,8 +731,6 @@ def test_common(self, t_ref, t, data_kwargs): (legacy_F.pil_to_tensor, {}), (legacy_F.convert_image_dtype, {}), (legacy_F.to_pil_image, {}), - (legacy_F.five_crop, {}), - (legacy_F.ten_crop, {}), (legacy_F.to_grayscale, {}), (legacy_F.rgb_to_grayscale, {}), (legacy_F.to_tensor, {}), diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index badaca69ca8..71d0cce6dd8 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -1,452 +1,9 @@ -import inspect -import re - import numpy as np import PIL.Image import pytest import torch -from common_utils import assert_close, cache, cpu_and_cuda, needs_cuda, set_rng_seed -from torch.utils._pytree import tree_map -from torchvision import tv_tensors from torchvision.transforms.v2 import functional as F -from torchvision.transforms.v2._utils import is_pure_tensor -from transforms_v2_dispatcher_infos import DISPATCHER_INFOS -from transforms_v2_kernel_infos import KERNEL_INFOS -from transforms_v2_legacy_utils import parametrized_error_message - - -KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS} -DISPATCHER_INFOS_MAP = {info.dispatcher: info for info in DISPATCHER_INFOS} - - -@cache -def script(fn): - try: - return torch.jit.script(fn) - except Exception as error: - raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error - - -# Scripting a function often triggers a warning like -# `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information` -# with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore -# them. -ignore_jit_warning_no_profile = pytest.mark.filterwarnings( - f"ignore:{re.escape('operator() profile_node %')}:UserWarning" -) - - -def make_info_args_kwargs_params(info, *, args_kwargs_fn, test_id=None): - args_kwargs = list(args_kwargs_fn(info)) - if not args_kwargs: - raise pytest.UsageError( - f"Couldn't collect a single `ArgsKwargs` for `{info.id}`{f' in {test_id}' if test_id else ''}" - ) - idx_field_len = len(str(len(args_kwargs))) - return [ - pytest.param( - info, - args_kwargs_, - marks=info.get_marks(test_id, args_kwargs_) if test_id else [], - id=f"{info.id}-{idx:0{idx_field_len}}", - ) - for idx, args_kwargs_ in enumerate(args_kwargs) - ] - - -def make_info_args_kwargs_parametrization(infos, *, args_kwargs_fn): - def decorator(test_fn): - parts = test_fn.__qualname__.split(".") - if len(parts) == 1: - test_class_name = None - test_function_name = parts[0] - elif len(parts) == 2: - test_class_name, test_function_name = parts - else: - raise pytest.UsageError("Unable to parse the test class name and test function name from test function") - test_id = (test_class_name, test_function_name) - - argnames = ("info", "args_kwargs") - argvalues = [] - for info in infos: - argvalues.extend(make_info_args_kwargs_params(info, args_kwargs_fn=args_kwargs_fn, test_id=test_id)) - - return pytest.mark.parametrize(argnames, argvalues)(test_fn) - - return decorator - - -@pytest.fixture(autouse=True) -def fix_rng_seed(): - set_rng_seed(0) - yield - - -@pytest.fixture() -def test_id(request): - test_class_name = request.cls.__name__ if request.cls is not None else None - test_function_name = request.node.originalname - return test_class_name, test_function_name - - -class TestKernels: - sample_inputs = make_info_args_kwargs_parametrization( - KERNEL_INFOS, - args_kwargs_fn=lambda kernel_info: kernel_info.sample_inputs_fn(), - ) - reference_inputs = make_info_args_kwargs_parametrization( - [info for info in KERNEL_INFOS if info.reference_fn is not None], - args_kwargs_fn=lambda info: info.reference_inputs_fn(), - ) - - @make_info_args_kwargs_parametrization( - [info for info in KERNEL_INFOS if info.logs_usage], - args_kwargs_fn=lambda info: info.sample_inputs_fn(), - ) - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_logging(self, spy_on, info, args_kwargs, device): - spy = spy_on(torch._C._log_api_usage_once) - - (input, *other_args), kwargs = args_kwargs.load(device) - info.kernel(input.as_subclass(torch.Tensor), *other_args, **kwargs) - - spy.assert_any_call(f"{info.kernel.__module__}.{info.id}") - - @ignore_jit_warning_no_profile - @sample_inputs - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_scripted_vs_eager(self, test_id, info, args_kwargs, device): - kernel_eager = info.kernel - kernel_scripted = script(kernel_eager) - - (input, *other_args), kwargs = args_kwargs.load(device) - input = input.as_subclass(torch.Tensor) - - actual = kernel_scripted(input, *other_args, **kwargs) - expected = kernel_eager(input, *other_args, **kwargs) - - assert_close( - actual, - expected, - **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device), - msg=parametrized_error_message(input, other_args, **kwargs), - ) - - def _unbatch(self, batch, *, data_dims): - if isinstance(batch, torch.Tensor): - batched_tensor = batch - metadata = () - else: - batched_tensor, *metadata = batch - - if batched_tensor.ndim == data_dims: - return batch - - return [ - self._unbatch(unbatched, data_dims=data_dims) - for unbatched in ( - batched_tensor.unbind(0) if not metadata else [(t, *metadata) for t in batched_tensor.unbind(0)] - ) - ] - - @sample_inputs - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_batched_vs_single(self, test_id, info, args_kwargs, device): - (batched_input, *other_args), kwargs = args_kwargs.load(device) - - tv_tensor_type = tv_tensors.Image if is_pure_tensor(batched_input) else type(batched_input) - # This dictionary contains the number of rightmost dimensions that contain the actual data. - # Everything to the left is considered a batch dimension. - data_dims = { - tv_tensors.Image: 3, - tv_tensors.BoundingBoxes: 1, - # `Mask`'s are special in the sense that the data dimensions depend on the type of mask. For detection masks - # it is 3 `(*, N, H, W)`, but for segmentation masks it is 2 `(*, H, W)`. Since both a grouped under one - # type all kernels should also work without differentiating between the two. Thus, we go with 2 here as - # common ground. - tv_tensors.Mask: 2, - tv_tensors.Video: 4, - }.get(tv_tensor_type) - if data_dims is None: - raise pytest.UsageError( - f"The number of data dimensions cannot be determined for input of type {tv_tensor_type.__name__}." - ) from None - elif batched_input.ndim <= data_dims: - pytest.skip("Input is not batched.") - elif not all(batched_input.shape[:-data_dims]): - pytest.skip("Input has a degenerate batch shape.") - - batched_input = batched_input.as_subclass(torch.Tensor) - batched_output = info.kernel(batched_input, *other_args, **kwargs) - actual = self._unbatch(batched_output, data_dims=data_dims) - - single_inputs = self._unbatch(batched_input, data_dims=data_dims) - expected = tree_map(lambda single_input: info.kernel(single_input, *other_args, **kwargs), single_inputs) - - assert_close( - actual, - expected, - **info.get_closeness_kwargs(test_id, dtype=batched_input.dtype, device=batched_input.device), - msg=parametrized_error_message(batched_input, *other_args, **kwargs), - ) - - @sample_inputs - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_no_inplace(self, info, args_kwargs, device): - (input, *other_args), kwargs = args_kwargs.load(device) - input = input.as_subclass(torch.Tensor) - - if input.numel() == 0: - pytest.skip("The input has a degenerate shape.") - - input_version = input._version - info.kernel(input, *other_args, **kwargs) - - assert input._version == input_version - - @sample_inputs - @needs_cuda - def test_cuda_vs_cpu(self, test_id, info, args_kwargs): - (input_cpu, *other_args), kwargs = args_kwargs.load("cpu") - input_cpu = input_cpu.as_subclass(torch.Tensor) - input_cuda = input_cpu.to("cuda") - - output_cpu = info.kernel(input_cpu, *other_args, **kwargs) - output_cuda = info.kernel(input_cuda, *other_args, **kwargs) - - assert_close( - output_cuda, - output_cpu, - check_device=False, - **info.get_closeness_kwargs(test_id, dtype=input_cuda.dtype, device=input_cuda.device), - msg=parametrized_error_message(input_cpu, *other_args, **kwargs), - ) - - @sample_inputs - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_dtype_and_device_consistency(self, info, args_kwargs, device): - (input, *other_args), kwargs = args_kwargs.load(device) - input = input.as_subclass(torch.Tensor) - - output = info.kernel(input, *other_args, **kwargs) - # Most kernels just return a tensor, but some also return some additional metadata - if not isinstance(output, torch.Tensor): - output, *_ = output - - assert output.dtype == input.dtype - assert output.device == input.device - - @reference_inputs - def test_against_reference(self, test_id, info, args_kwargs): - (input, *other_args), kwargs = args_kwargs.load("cpu") - - actual = info.kernel(input.as_subclass(torch.Tensor), *other_args, **kwargs) - # We intnetionally don't unwrap the input of the reference function in order for it to have access to all - # metadata regardless of whether the kernel takes it explicitly or not - expected = info.reference_fn(input, *other_args, **kwargs) - - assert_close( - actual, - expected, - **info.get_closeness_kwargs(test_id, dtype=input.dtype, device=input.device), - msg=parametrized_error_message(input, *other_args, **kwargs), - ) - - @make_info_args_kwargs_parametrization( - [info for info in KERNEL_INFOS if info.float32_vs_uint8], - args_kwargs_fn=lambda info: info.reference_inputs_fn(), - ) - def test_float32_vs_uint8(self, test_id, info, args_kwargs): - (input, *other_args), kwargs = args_kwargs.load("cpu") - input = input.as_subclass(torch.Tensor) - - if input.dtype != torch.uint8: - pytest.skip(f"Input dtype is {input.dtype}.") - - adapted_other_args, adapted_kwargs = info.float32_vs_uint8(other_args, kwargs) - - actual = info.kernel( - F.to_dtype_image(input, dtype=torch.float32, scale=True), - *adapted_other_args, - **adapted_kwargs, - ) - - expected = F.to_dtype_image(info.kernel(input, *other_args, **kwargs), dtype=torch.float32, scale=True) - - assert_close( - actual, - expected, - **info.get_closeness_kwargs(test_id, dtype=torch.float32, device=input.device), - msg=parametrized_error_message(input, *other_args, **kwargs), - ) - - -@pytest.fixture -def spy_on(mocker): - def make_spy(fn, *, module=None, name=None): - # TODO: we can probably get rid of the non-default modules and names if we eliminate aliasing - module = module or fn.__module__ - name = name or fn.__name__ - spy = mocker.patch(f"{module}.{name}", wraps=fn) - return spy - - return make_spy - - -class TestDispatchers: - image_sample_inputs = make_info_args_kwargs_parametrization( - [info for info in DISPATCHER_INFOS if tv_tensors.Image in info.kernels], - args_kwargs_fn=lambda info: info.sample_inputs(tv_tensors.Image), - ) - - @make_info_args_kwargs_parametrization( - DISPATCHER_INFOS, - args_kwargs_fn=lambda info: info.sample_inputs(), - ) - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_logging(self, spy_on, info, args_kwargs, device): - spy = spy_on(torch._C._log_api_usage_once) - - args, kwargs = args_kwargs.load(device) - info.dispatcher(*args, **kwargs) - - spy.assert_any_call(f"{info.dispatcher.__module__}.{info.id}") - - @ignore_jit_warning_no_profile - @image_sample_inputs - @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_scripted_smoke(self, info, args_kwargs, device): - dispatcher = script(info.dispatcher) - - (image_tv_tensor, *other_args), kwargs = args_kwargs.load(device) - image_pure_tensor = torch.Tensor(image_tv_tensor) - - dispatcher(image_pure_tensor, *other_args, **kwargs) - - # TODO: We need this until the dispatchers below also have `DispatcherInfo`'s. If they do, `test_scripted_smoke` - # replaces this test for them. - @ignore_jit_warning_no_profile - @pytest.mark.parametrize( - "dispatcher", - [ - F.get_dimensions, - F.get_image_num_channels, - F.get_image_size, - F.get_num_channels, - F.get_num_frames, - F.get_size, - F.rgb_to_grayscale, - F.uniform_temporal_subsample, - ], - ids=lambda dispatcher: dispatcher.__name__, - ) - def test_scriptable(self, dispatcher): - script(dispatcher) - - @image_sample_inputs - def test_pure_tensor_output_type(self, info, args_kwargs): - (image_tv_tensor, *other_args), kwargs = args_kwargs.load() - image_pure_tensor = image_tv_tensor.as_subclass(torch.Tensor) - - output = info.dispatcher(image_pure_tensor, *other_args, **kwargs) - - # We cannot use `isinstance` here since all tv_tensors are instances of `torch.Tensor` as well - assert type(output) is torch.Tensor - - @make_info_args_kwargs_parametrization( - [info for info in DISPATCHER_INFOS if info.pil_kernel_info is not None], - args_kwargs_fn=lambda info: info.sample_inputs(tv_tensors.Image), - ) - def test_pil_output_type(self, info, args_kwargs): - (image_tv_tensor, *other_args), kwargs = args_kwargs.load() - - if image_tv_tensor.ndim > 3: - pytest.skip("Input is batched") - - image_pil = F.to_pil_image(image_tv_tensor) - - output = info.dispatcher(image_pil, *other_args, **kwargs) - - assert isinstance(output, PIL.Image.Image) - - @make_info_args_kwargs_parametrization( - DISPATCHER_INFOS, - args_kwargs_fn=lambda info: info.sample_inputs(), - ) - def test_tv_tensor_output_type(self, info, args_kwargs): - (tv_tensor, *other_args), kwargs = args_kwargs.load() - - output = info.dispatcher(tv_tensor, *other_args, **kwargs) - - assert isinstance(output, type(tv_tensor)) - - if isinstance(tv_tensor, tv_tensors.BoundingBoxes) and info.dispatcher is not F.convert_bounding_box_format: - assert output.format == tv_tensor.format - - @pytest.mark.parametrize( - ("dispatcher_info", "tv_tensor_type", "kernel_info"), - [ - pytest.param( - dispatcher_info, tv_tensor_type, kernel_info, id=f"{dispatcher_info.id}-{tv_tensor_type.__name__}" - ) - for dispatcher_info in DISPATCHER_INFOS - for tv_tensor_type, kernel_info in dispatcher_info.kernel_infos.items() - ], - ) - def test_dispatcher_kernel_signatures_consistency(self, dispatcher_info, tv_tensor_type, kernel_info): - dispatcher_signature = inspect.signature(dispatcher_info.dispatcher) - dispatcher_params = list(dispatcher_signature.parameters.values())[1:] - - kernel_signature = inspect.signature(kernel_info.kernel) - kernel_params = list(kernel_signature.parameters.values())[1:] - - # We filter out metadata that is implicitly passed to the dispatcher through the input tv_tensor, but has to be - # explicitly passed to the kernel. - input_type = {v: k for k, v in dispatcher_info.kernels.items()}.get(kernel_info.kernel) - explicit_metadata = { - tv_tensors.BoundingBoxes: {"format", "canvas_size"}, - } - kernel_params = [param for param in kernel_params if param.name not in explicit_metadata.get(input_type, set())] - - dispatcher_params = iter(dispatcher_params) - for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params): - try: - # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out - # dispatcher parameters that have no kernel equivalent while keeping the order intact. - while dispatcher_param.name != kernel_param.name: - dispatcher_param = next(dispatcher_params) - except StopIteration: - raise AssertionError( - f"Parameter `{kernel_param.name}` of kernel `{kernel_info.id}` " - f"has no corresponding parameter on the dispatcher `{dispatcher_info.id}`." - ) from None - - assert dispatcher_param == kernel_param - - @pytest.mark.parametrize("info", DISPATCHER_INFOS, ids=lambda info: info.id) - def test_unkown_type(self, info): - unkown_input = object() - (_, *other_args), kwargs = next(iter(info.sample_inputs())).load("cpu") - - with pytest.raises(TypeError, match=re.escape(str(type(unkown_input)))): - info.dispatcher(unkown_input, *other_args, **kwargs) - - @make_info_args_kwargs_parametrization( - [ - info - for info in DISPATCHER_INFOS - if tv_tensors.BoundingBoxes in info.kernels and info.dispatcher is not F.convert_bounding_box_format - ], - args_kwargs_fn=lambda info: info.sample_inputs(tv_tensors.BoundingBoxes), - ) - def test_bounding_boxes_format_consistency(self, info, args_kwargs): - (bounding_boxes, *other_args), kwargs = args_kwargs.load() - format = bounding_boxes.format - - output = info.dispatcher(bounding_boxes, *other_args, **kwargs) - - assert output.format == format @pytest.mark.parametrize( diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py deleted file mode 100644 index af5f48b148b..00000000000 --- a/test/transforms_v2_dispatcher_infos.py +++ /dev/null @@ -1,138 +0,0 @@ -import pytest -import torchvision.transforms.v2.functional as F -from torchvision import tv_tensors -from transforms_v2_kernel_infos import KERNEL_INFOS -from transforms_v2_legacy_utils import InfoBase, TestMark - -__all__ = ["DispatcherInfo", "DISPATCHER_INFOS"] - - -class PILKernelInfo(InfoBase): - def __init__( - self, - kernel, - *, - # Defaults to `kernel.__name__`. Should be set if the function is exposed under a different name - # TODO: This can probably be removed after roll-out since we shouldn't have any aliasing then - kernel_name=None, - ): - super().__init__(id=kernel_name or kernel.__name__) - self.kernel = kernel - - -class DispatcherInfo(InfoBase): - _KERNEL_INFO_MAP = {info.kernel: info for info in KERNEL_INFOS} - - def __init__( - self, - dispatcher, - *, - # Dictionary of types that map to the kernel the dispatcher dispatches to. - kernels, - # If omitted, no PIL dispatch test will be performed. - pil_kernel_info=None, - # See InfoBase - test_marks=None, - # See InfoBase - closeness_kwargs=None, - ): - super().__init__(id=dispatcher.__name__, test_marks=test_marks, closeness_kwargs=closeness_kwargs) - self.dispatcher = dispatcher - self.kernels = kernels - self.pil_kernel_info = pil_kernel_info - - kernel_infos = {} - for tv_tensor_type, kernel in self.kernels.items(): - kernel_info = self._KERNEL_INFO_MAP.get(kernel) - if not kernel_info: - raise pytest.UsageError( - f"Can't register {kernel.__name__} for type {tv_tensor_type} since there is no `KernelInfo` for it. " - f"Please add a `KernelInfo` for it in `transforms_v2_kernel_infos.py`." - ) - kernel_infos[tv_tensor_type] = kernel_info - self.kernel_infos = kernel_infos - - def sample_inputs(self, *tv_tensor_types, filter_metadata=True): - for tv_tensor_type in tv_tensor_types or self.kernel_infos.keys(): - kernel_info = self.kernel_infos.get(tv_tensor_type) - if not kernel_info: - raise pytest.UsageError(f"There is no kernel registered for type {type.__name__}") - - sample_inputs = kernel_info.sample_inputs_fn() - - if not filter_metadata: - yield from sample_inputs - return - - import itertools - - for args_kwargs in sample_inputs: - if hasattr(tv_tensor_type, "__annotations__"): - for name in itertools.chain( - tv_tensor_type.__annotations__.keys(), - # FIXME: this seems ok for conversion dispatchers, but we should probably handle this on a - # per-dispatcher level. However, so far there is no option for that. - (f"old_{name}" for name in tv_tensor_type.__annotations__.keys()), - ): - if name in args_kwargs.kwargs: - del args_kwargs.kwargs[name] - - yield args_kwargs - - -def xfail_jit(reason, *, condition=None): - return TestMark( - ("TestDispatchers", "test_scripted_smoke"), - pytest.mark.xfail(reason=reason), - condition=condition, - ) - - -def xfail_jit_python_scalar_arg(name, *, reason=None): - return xfail_jit( - reason or f"Python scalar int or float for `{name}` is not supported when scripting", - condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), (int, float)), - ) - - -skip_dispatch_tv_tensor = TestMark( - ("TestDispatchers", "test_dispatch_tv_tensor"), - pytest.mark.skip(reason="Dispatcher doesn't support arbitrary tv_tensor dispatch."), -) - -multi_crop_skips = [ - TestMark( - ("TestDispatchers", test_name), - pytest.mark.skip(reason="Multi-crop dispatchers return a sequence of items rather than a single one."), - ) - for test_name in ["test_pure_tensor_output_type", "test_pil_output_type", "test_tv_tensor_output_type"] -] -multi_crop_skips.append(skip_dispatch_tv_tensor) - - -DISPATCHER_INFOS = [ - DispatcherInfo( - F.five_crop, - kernels={ - tv_tensors.Image: F.five_crop_image, - tv_tensors.Video: F.five_crop_video, - }, - pil_kernel_info=PILKernelInfo(F._five_crop_image_pil), - test_marks=[ - xfail_jit_python_scalar_arg("size"), - *multi_crop_skips, - ], - ), - DispatcherInfo( - F.ten_crop, - kernels={ - tv_tensors.Image: F.ten_crop_image, - tv_tensors.Video: F.ten_crop_video, - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - *multi_crop_skips, - ], - pil_kernel_info=PILKernelInfo(F._ten_crop_image_pil), - ), -] diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py deleted file mode 100644 index c97ef48e707..00000000000 --- a/test/transforms_v2_kernel_infos.py +++ /dev/null @@ -1,210 +0,0 @@ -import functools -import itertools - -import PIL.Image -import pytest -import torch.testing -import torchvision.transforms.v2.functional as F -from transforms_v2_legacy_utils import ( - ArgsKwargs, - InfoBase, - make_image_loaders, - make_video_loaders, - mark_framework_limitation, - TestMark, -) - -__all__ = ["KernelInfo", "KERNEL_INFOS"] - - -class KernelInfo(InfoBase): - def __init__( - self, - kernel, - *, - # Defaults to `kernel.__name__`. Should be set if the function is exposed under a different name - # TODO: This can probably be removed after roll-out since we shouldn't have any aliasing then - kernel_name=None, - # Most common tests use these inputs to check the kernel. As such it should cover all valid code paths, but - # should not include extensive parameter combinations to keep to overall test count moderate. - sample_inputs_fn, - # This function should mirror the kernel. It should have the same signature as the `kernel` and as such also - # take tensors as inputs. Any conversion into another object type, e.g. PIL images or numpy arrays, should - # happen inside the function. It should return a tensor or to be more precise an object that can be compared to - # a tensor by `assert_close`. If omitted, no reference test will be performed. - reference_fn=None, - # These inputs are only used for the reference tests and thus can be comprehensive with regard to the parameter - # values to be tested. If not specified, `sample_inputs_fn` will be used. - reference_inputs_fn=None, - # If true-ish, triggers a test that checks the kernel for consistency between uint8 and float32 inputs with the - # reference inputs. This is usually used whenever we use a PIL kernel as reference. - # Can be a callable in which case it will be called with `other_args, kwargs`. It should return the same - # structure, but with adapted parameters. This is useful in case a parameter value is closely tied to the input - # dtype. - float32_vs_uint8=False, - # Some kernels don't have dispatchers that would handle logging the usage. Thus, the kernel has to do it - # manually. If set, triggers a test that makes sure this happens. - logs_usage=False, - # See InfoBase - test_marks=None, - # See InfoBase - closeness_kwargs=None, - ): - super().__init__(id=kernel_name or kernel.__name__, test_marks=test_marks, closeness_kwargs=closeness_kwargs) - self.kernel = kernel - self.sample_inputs_fn = sample_inputs_fn - self.reference_fn = reference_fn - self.reference_inputs_fn = reference_inputs_fn - - if float32_vs_uint8 and not callable(float32_vs_uint8): - float32_vs_uint8 = lambda other_args, kwargs: (other_args, kwargs) # noqa: E731 - self.float32_vs_uint8 = float32_vs_uint8 - self.logs_usage = logs_usage - - -def pil_reference_wrapper(pil_kernel): - @functools.wraps(pil_kernel) - def wrapper(input_tensor, *other_args, **kwargs): - if input_tensor.dtype != torch.uint8: - raise pytest.UsageError(f"Can only test uint8 tensor images against PIL, but input is {input_tensor.dtype}") - if input_tensor.ndim > 3: - raise pytest.UsageError( - f"Can only test single tensor images against PIL, but input has shape {input_tensor.shape}" - ) - - input_pil = F.to_pil_image(input_tensor) - output_pil = pil_kernel(input_pil, *other_args, **kwargs) - if not isinstance(output_pil, PIL.Image.Image): - return output_pil - - output_tensor = F.to_image(output_pil) - - # 2D mask shenanigans - if output_tensor.ndim == 2 and input_tensor.ndim == 3: - output_tensor = output_tensor.unsqueeze(0) - elif output_tensor.ndim == 3 and input_tensor.ndim == 2: - output_tensor = output_tensor.squeeze(0) - - return output_tensor - - return wrapper - - -def xfail_jit(reason, *, condition=None): - return TestMark(("TestKernels", "test_scripted_vs_eager"), pytest.mark.xfail(reason=reason), condition=condition) - - -def xfail_jit_python_scalar_arg(name, *, reason=None): - return xfail_jit( - reason or f"Python scalar int or float for `{name}` is not supported when scripting", - condition=lambda args_kwargs: isinstance(args_kwargs.kwargs.get(name), (int, float)), - ) - - -KERNEL_INFOS = [] - - -_FIVE_TEN_CROP_SIZES = [7, (6,), [5], (6, 5), [7, 6]] - - -def _get_five_ten_crop_canvas_size(size): - if isinstance(size, int): - crop_height = crop_width = size - elif len(size) == 1: - crop_height = crop_width = size[0] - else: - crop_height, crop_width = size - return 2 * crop_height, 2 * crop_width - - -def sample_inputs_five_crop_image_tensor(): - for size in _FIVE_TEN_CROP_SIZES: - for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_canvas_size(size)], - color_spaces=["RGB"], - dtypes=[torch.float32], - ): - yield ArgsKwargs(image_loader, size=size) - - -def reference_inputs_five_crop_image_tensor(): - for size in _FIVE_TEN_CROP_SIZES: - for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_canvas_size(size)], extra_dims=[()], dtypes=[torch.uint8] - ): - yield ArgsKwargs(image_loader, size=size) - - -def sample_inputs_five_crop_video(): - size = _FIVE_TEN_CROP_SIZES[0] - for video_loader in make_video_loaders(sizes=[_get_five_ten_crop_canvas_size(size)]): - yield ArgsKwargs(video_loader, size=size) - - -def sample_inputs_ten_crop_image_tensor(): - for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]): - for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_canvas_size(size)], - color_spaces=["RGB"], - dtypes=[torch.float32], - ): - yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip) - - -def reference_inputs_ten_crop_image_tensor(): - for size, vertical_flip in itertools.product(_FIVE_TEN_CROP_SIZES, [False, True]): - for image_loader in make_image_loaders( - sizes=[_get_five_ten_crop_canvas_size(size)], extra_dims=[()], dtypes=[torch.uint8] - ): - yield ArgsKwargs(image_loader, size=size, vertical_flip=vertical_flip) - - -def sample_inputs_ten_crop_video(): - size = _FIVE_TEN_CROP_SIZES[0] - for video_loader in make_video_loaders(sizes=[_get_five_ten_crop_canvas_size(size)]): - yield ArgsKwargs(video_loader, size=size) - - -def multi_crop_pil_reference_wrapper(pil_kernel): - def wrapper(input_tensor, *other_args, **kwargs): - output = pil_reference_wrapper(pil_kernel)(input_tensor, *other_args, **kwargs) - return type(output)( - F.to_dtype_image(F.to_image(output_pil), dtype=input_tensor.dtype, scale=True) for output_pil in output - ) - - return wrapper - - -_common_five_ten_crop_marks = [ - xfail_jit_python_scalar_arg("size"), - mark_framework_limitation(("TestKernels", "test_batched_vs_single"), "Custom batching needed."), -] - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.five_crop_image, - sample_inputs_fn=sample_inputs_five_crop_image_tensor, - reference_fn=multi_crop_pil_reference_wrapper(F._five_crop_image_pil), - reference_inputs_fn=reference_inputs_five_crop_image_tensor, - test_marks=_common_five_ten_crop_marks, - ), - KernelInfo( - F.five_crop_video, - sample_inputs_fn=sample_inputs_five_crop_video, - test_marks=_common_five_ten_crop_marks, - ), - KernelInfo( - F.ten_crop_image, - sample_inputs_fn=sample_inputs_ten_crop_image_tensor, - reference_fn=multi_crop_pil_reference_wrapper(F._ten_crop_image_pil), - reference_inputs_fn=reference_inputs_ten_crop_image_tensor, - test_marks=_common_five_ten_crop_marks, - ), - KernelInfo( - F.ten_crop_video, - sample_inputs_fn=sample_inputs_ten_crop_video, - test_marks=_common_five_ten_crop_marks, - ), - ] -) diff --git a/test/transforms_v2_legacy_utils.py b/test/transforms_v2_legacy_utils.py index 9dead793422..bf24412082c 100644 --- a/test/transforms_v2_legacy_utils.py +++ b/test/transforms_v2_legacy_utils.py @@ -5,11 +5,9 @@ The following legacy modules depend on this module -- transforms_v2_kernel_infos.py -- transforms_v2_dispatcher_infos.py - test_transforms_v2_functional.py - test_transforms_v2_consistency.py -- test_transforms.py +- test_transforms_v2.py When all the logic is ported from the files above to test_transforms_v2_refactored.py, delete all the legacy modules including this one and drop the _refactored prefix from the name. From 7b1ef58d4c6282eed1f3d05f4d86c6c628e24341 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 14 Sep 2023 10:59:43 +0200 Subject: [PATCH 20/27] port sample input smoke test --- test/common_utils.py | 4 +- test/test_transforms_v2.py | 60 +------- test/test_transforms_v2_refactored.py | 196 +++++++++++++++++++++++--- 3 files changed, 176 insertions(+), 84 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index a1d188efdae..73eb6b7d0b1 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -406,6 +406,7 @@ def make_bounding_boxes( canvas_size=DEFAULT_SIZE, *, format=tv_tensors.BoundingBoxFormat.XYXY, + num_objects=1, dtype=None, device="cpu", ): @@ -419,8 +420,7 @@ def sample_position(values, max_value): dtype = dtype or torch.float32 - num_objects = 1 - h, w = [torch.randint(1, c, (num_objects,)) for c in canvas_size] + h, w = [torch.randint(1, s, (num_objects,)) for s in canvas_size] y = sample_position(h, canvas_size[0]) x = sample_position(w, canvas_size[1]) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 3e8456e1ef1..2014b9f6515 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -17,7 +17,7 @@ from torchvision.ops.boxes import box_iou from torchvision.transforms.functional import to_pil_image from torchvision.transforms.v2 import functional as F -from torchvision.transforms.v2._utils import check_type, is_pure_tensor, query_chw +from torchvision.transforms.v2._utils import is_pure_tensor, query_chw from transforms_v2_legacy_utils import ( make_bounding_boxes, make_detection_mask, @@ -62,22 +62,6 @@ def parametrize(transforms_with_inputs): ) -def auto_augment_adapter(transform, input, device): - adapted_input = {} - image_or_video_found = False - for key, value in input.items(): - if isinstance(value, (tv_tensors.BoundingBoxes, tv_tensors.Mask)): - # AA transforms don't support bounding boxes or masks - continue - elif check_type(value, (tv_tensors.Image, tv_tensors.Video, is_pure_tensor, PIL.Image.Image)): - if image_or_video_found: - # AA transforms only support a single image or video - continue - image_or_video_found = True - adapted_input[key] = value - return adapted_input - - def linear_transformation_adapter(transform, input, device): flat_inputs = list(input.values()) c, h, w = query_chw( @@ -93,58 +77,19 @@ def linear_transformation_adapter(transform, input, device): return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} -def normalize_adapter(transform, input, device): - adapted_input = {} - for key, value in input.items(): - if isinstance(value, PIL.Image.Image): - # normalize doesn't support PIL images - continue - elif check_type(value, (tv_tensors.Image, tv_tensors.Video, is_pure_tensor)): - # normalize doesn't support integer images - value = F.to_dtype(value, torch.float32, scale=True) - adapted_input[key] = value - return adapted_input - - class TestSmoke: @pytest.mark.parametrize( ("transform", "adapter"), [ - (transforms.RandomErasing(p=1.0), None), - (transforms.AugMix(), auto_augment_adapter), - (transforms.AutoAugment(), auto_augment_adapter), - (transforms.RandAugment(), auto_augment_adapter), - (transforms.TrivialAugmentWide(), auto_augment_adapter), (transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None), (transforms.Grayscale(), None), - (transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1.0), None), - (transforms.RandomAutocontrast(p=1.0), None), - (transforms.RandomEqualize(p=1.0), None), (transforms.RandomGrayscale(p=1.0), None), - (transforms.RandomInvert(p=1.0), None), (transforms.RandomChannelPermutation(), None), (transforms.RandomPhotometricDistort(p=1.0), None), - (transforms.RandomPosterize(bits=4, p=1.0), None), - (transforms.RandomSolarize(threshold=0.5, p=1.0), None), - (transforms.CenterCrop([16, 16]), None), - (transforms.ElasticTransform(sigma=1.0), None), - (transforms.Pad(4), None), - (transforms.RandomAffine(degrees=30.0), None), - (transforms.RandomCrop([16, 16], pad_if_needed=True), None), - (transforms.RandomHorizontalFlip(p=1.0), None), - (transforms.RandomPerspective(p=1.0), None), - (transforms.RandomResize(min_size=10, max_size=20, antialias=True), None), - (transforms.RandomResizedCrop([16, 16], antialias=True), None), - (transforms.RandomRotation(degrees=30), None), (transforms.RandomShortestSize(min_size=10, antialias=True), None), - (transforms.RandomVerticalFlip(p=1.0), None), (transforms.RandomZoomOut(p=1.0), None), - (transforms.Resize([16, 16], antialias=True), None), (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2), antialias=True), None), - (transforms.ClampBoundingBoxes(), None), - (transforms.ConvertBoundingBoxFormat(tv_tensors.BoundingBoxFormat.CXCYWH), None), (transforms.ConvertImageDtype(), None), - (transforms.GaussianBlur(kernel_size=3), None), ( transforms.LinearTransformation( # These are just dummy values that will be filled by the adapter. We can't define them upfront, @@ -154,9 +99,6 @@ class TestSmoke: ), linear_transformation_adapter, ), - (transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), normalize_adapter), - (transforms.ToDtype(torch.float64), None), - (transforms.UniformTemporalSubsample(num_samples=2), None), ], ids=lambda transform: type(transform).__name__, ) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index cfad5fe064e..015ad721b1c 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -6,6 +6,7 @@ import math import pickle import re +from copy import deepcopy from pathlib import Path from unittest import mock @@ -38,13 +39,14 @@ from torch import nn from torch.testing import assert_close -from torch.utils._pytree import tree_map +from torch.utils._pytree import tree_flatten, tree_map from torch.utils.data import DataLoader, default_collate from torchvision import tv_tensors from torchvision.transforms._functional_tensor import _max_value as get_max_value from torchvision.transforms.functional import pil_modes_mapping from torchvision.transforms.v2 import functional as F +from torchvision.transforms.v2._utils import check_type, is_pure_tensor from torchvision.transforms.v2.functional._geometry import _get_perspective_coeffs from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal @@ -276,7 +278,120 @@ def _check_transform_v1_compatibility(transform, input, *, rtol, atol): _script(v1_transform)(input) -def check_transform(transform, input, check_v1_compatibility=True): +def _make_transform_sample(transform, *, image_or_video, adapter): + device = image_or_video.device if isinstance(image_or_video, torch.Tensor) else "cpu" + size = F.get_size(image_or_video) + input = dict( + image_or_video=image_or_video, + image_tv_tensor=make_image(size, device=device), + video_tv_tensor=make_video(size, device=device), + image_pil=make_image_pil(size), + bounding_boxes_xyxy=make_bounding_boxes( + size, format=tv_tensors.BoundingBoxFormat.XYXY, num_objects=3, device=device + ), + bounding_boxes_xywh=make_bounding_boxes( + size, + format=tv_tensors.BoundingBoxFormat.XYWH, + num_objects=4, + device=device, + ), + bounding_boxes_cxcywh=make_bounding_boxes( + size, + format=tv_tensors.BoundingBoxFormat.CXCYWH, + num_objects=5, + device=device, + ), + bounding_boxes_degenerate_xyxy=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [2, 0, 1, 1], # x1 > x2, y1 < y2 + [0, 2, 1, 1], # x1 < x2, y1 > y2 + [2, 2, 1, 1], # x1 > x2, y1 > y2 + ], + format=tv_tensors.BoundingBoxFormat.XYXY, + canvas_size=size, + device=device, + ), + bounding_boxes_degenerate_xywh=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=tv_tensors.BoundingBoxFormat.XYWH, + canvas_size=size, + device=device, + ), + bounding_boxes_degenerate_cxcywh=tv_tensors.BoundingBoxes( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=tv_tensors.BoundingBoxFormat.CXCYWH, + canvas_size=size, + device=device, + ), + detection_mask=make_detection_mask(size, device=device), + segmentation_mask=make_segmentation_mask(size, device=device), + int=0, + float=0.0, + bool=True, + none=None, + str="str", + path=Path.cwd(), + object=object(), + tensor=torch.empty(5), + array=np.empty(5), + ) + if adapter is not None: + input = adapter(transform, input, device) + return input + + +def _check_transform_sample_input_smoke(transform, input, *, adapter): + if not check_type(input, (is_pure_tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video)): + return + image_or_video = input + + for container_type in [dict, list, tuple]: + input = _make_transform_sample( + # adapter might change transform inplace + transform=transform if adapter is None else deepcopy(transform), + image_or_video=image_or_video, + adapter=adapter, + ) + + if container_type in {tuple, list}: + input = container_type(input.values()) + + input_flat, input_spec = tree_flatten(input) + + with freeze_rng_state(): + torch.manual_seed(0) + output = transform(input) + output_flat, output_spec = tree_flatten(output) + + assert output_spec == input_spec + + for output_item, input_item, should_be_transformed in zip( + output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat) + ): + if should_be_transformed: + assert type(output_item) is type(input_item) + else: + assert output_item is input_item + + +def check_transform(transform, input, check_v1_compatibility=True, check_sample_input=True): pickle.loads(pickle.dumps(transform)) output = transform(input) @@ -289,6 +404,11 @@ def check_transform(transform, input, check_v1_compatibility=True): if isinstance(input, tv_tensors.BoundingBoxes) and not isinstance(transform, transforms.ConvertBoundingBoxFormat): assert output.format == input.format + if check_sample_input: + _check_transform_sample_input_smoke( + transform, input, adapter=check_sample_input if callable(check_sample_input) else None + ) + if check_v1_compatibility: _check_transform_v1_compatibility(transform, input, **_to_tolerances(check_v1_compatibility)) @@ -1800,7 +1920,7 @@ def test_transform(self, make_input, input_dtype, output_dtype, device, scale, a input = make_input(dtype=input_dtype, device=device) if as_dict: output_dtype = {type(input): output_dtype} - check_transform(transforms.ToDtype(dtype=output_dtype, scale=scale), input) + check_transform(transforms.ToDtype(dtype=output_dtype, scale=scale), input, check_sample_input=not as_dict) def reference_convert_dtype_image_tensor(self, image, dtype=torch.float, scale=False): input_dtype = image.dtype @@ -2601,9 +2721,13 @@ def test_functional_image_correctness(self, kwargs): def test_transform(self, param, value, make_input): input = make_input(self.INPUT_SIZE) + check_sample_input = True if param == "fill": - if isinstance(input, tv_tensors.Mask) and isinstance(value, (tuple, list)): - pytest.skip("F.pad_mask doesn't support non-scalar fill.") + if isinstance(value, (tuple, list)): + if isinstance(input, tv_tensors.Mask): + pytest.skip("F.pad_mask doesn't support non-scalar fill.") + else: + check_sample_input = False kwargs = dict( # 1. size is required @@ -2618,6 +2742,7 @@ def test_transform(self, param, value, make_input): transforms.RandomCrop(**kwargs, pad_if_needed=True), input, check_v1_compatibility=param != "fill" or isinstance(value, (int, float)), + check_sample_input=check_sample_input, ) @pytest.mark.parametrize("padding", [1, (1, 1), (1, 1, 1, 1)]) @@ -2803,9 +2928,13 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform(self, make_input, device): input = make_input(device=device) - check_transform( - transforms.RandomErasing(p=1), input, check_v1_compatibility=not isinstance(input, PIL.Image.Image) - ) + + with pytest.warns(UserWarning, match="currently passing through inputs of type"): + check_transform( + transforms.RandomErasing(p=1), + input, + check_v1_compatibility=not isinstance(input, PIL.Image.Image), + ) def _reference_erase_image(self, image, *, i, j, h, w, v): mask = torch.zeros_like(image, dtype=torch.bool) @@ -2877,18 +3006,6 @@ def test_transform_errors(self): with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): transform._get_params([make_image()]) - @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_mask]) - def test_transform_passthrough(self, make_input): - transform = transforms.RandomErasing(p=1) - - input = make_input(self.INPUT_SIZE) - - with pytest.warns(UserWarning, match="currently passing through inputs of type"): - # RandomErasing requires an image or video to be present - _, output = transform(make_image(self.INPUT_SIZE), input) - - assert output is input - class TestGaussianBlur: @pytest.mark.parametrize("kernel_size", [1, 3, (3, 1), [3, 5]]) @@ -3105,6 +3222,21 @@ def test_correctness_shear_translate(self, transform_id, magnitude, interpolatio else: assert_close(actual, expected, rtol=0, atol=1) + def _sample_input_adapter(self, transform, input, device): + adapted_input = {} + image_or_video_found = False + for key, value in input.items(): + if isinstance(value, (tv_tensors.BoundingBoxes, tv_tensors.Mask)): + # AA transforms don't support bounding boxes or masks + continue + elif check_type(value, (tv_tensors.Image, tv_tensors.Video, is_pure_tensor, PIL.Image.Image)): + if image_or_video_found: + # AA transforms only support a single image or video + continue + image_or_video_found = True + adapted_input[key] = value + return adapted_input + @pytest.mark.parametrize( "transform", [transforms.AutoAugment(), transforms.RandAugment(), transforms.TrivialAugmentWide(), transforms.AugMix()], @@ -3129,7 +3261,9 @@ def test_transform_smoke(self, transform, make_input, dtype, device): # For v2, we changed the random sampling of the AA transforms. This makes it impossible to compare the v1 # and v2 outputs without complicated mocking and monkeypatching. Thus, we skip the v1 compatibility checks # here and only check if we can script the v2 transform and subsequently call the result. - check_transform(transform, input, check_v1_compatibility=False) + check_transform( + transform, input, check_v1_compatibility=False, check_sample_input=self._sample_input_adapter + ) if type(input) is torch.Tensor and dtype is torch.uint8: _script(transform)(input) @@ -4103,9 +4237,25 @@ def test_functional_error(self): with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"): F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std) + def _sample_input_adapter(self, transform, input, device): + adapted_input = {} + for key, value in input.items(): + if isinstance(value, PIL.Image.Image): + # normalize doesn't support PIL images + continue + elif check_type(value, (is_pure_tensor, tv_tensors.Image, tv_tensors.Video)): + # normalize doesn't support integer images + value = F.to_dtype(value, torch.float32, scale=True) + adapted_input[key] = value + return adapted_input + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) def test_transform(self, make_input): - check_transform(transforms.Normalize(mean=self.MEAN, std=self.STD), make_input(dtype=torch.float32)) + check_transform( + transforms.Normalize(mean=self.MEAN, std=self.STD), + make_input(dtype=torch.float32), + check_sample_input=self._sample_input_adapter, + ) def _assert_is_standard_normal_distributed(self, tensor): result = scipy.stats.kstest(tensor.flatten().cpu(), cdf="norm", args=(0, 1)) @@ -4600,7 +4750,7 @@ def test_functional_signature(self, functional, kernel, input_type): ) @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) def test_transform(self, make_input, transform_cls): - check_transform(transform_cls(size=self.OUTPUT_SIZE), make_input(self.INPUT_SIZE)) + check_transform(transform_cls(size=self.OUTPUT_SIZE), make_input(self.INPUT_SIZE), check_sample_input=False) @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_mask]) @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) From af89f737e969b203000bd42baf33d75fd91b3d5a Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 19 Sep 2023 10:44:37 +0200 Subject: [PATCH 21/27] port tests forr LinearTransform --- test/test_transforms_v2.py | 61 +------------------------ test/test_transforms_v2_consistency.py | 22 --------- test/test_transforms_v2_refactored.py | 63 ++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 82 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 2014b9f6515..e3ee3c76317 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -17,7 +17,7 @@ from torchvision.ops.boxes import box_iou from torchvision.transforms.functional import to_pil_image from torchvision.transforms.v2 import functional as F -from torchvision.transforms.v2._utils import is_pure_tensor, query_chw +from torchvision.transforms.v2._utils import is_pure_tensor from transforms_v2_legacy_utils import ( make_bounding_boxes, make_detection_mask, @@ -62,21 +62,6 @@ def parametrize(transforms_with_inputs): ) -def linear_transformation_adapter(transform, input, device): - flat_inputs = list(input.values()) - c, h, w = query_chw( - [ - item - for item, needs_transform in zip(flat_inputs, transforms.Transform()._needs_transform_list(flat_inputs)) - if needs_transform - ] - ) - num_elements = c * h * w - transform.transformation_matrix = torch.randn((num_elements, num_elements), device=device) - transform.mean_vector = torch.randn((num_elements,), device=device) - return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} - - class TestSmoke: @pytest.mark.parametrize( ("transform", "adapter"), @@ -84,21 +69,10 @@ class TestSmoke: (transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None), (transforms.Grayscale(), None), (transforms.RandomGrayscale(p=1.0), None), - (transforms.RandomChannelPermutation(), None), (transforms.RandomPhotometricDistort(p=1.0), None), (transforms.RandomShortestSize(min_size=10, antialias=True), None), (transforms.RandomZoomOut(p=1.0), None), (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2), antialias=True), None), - (transforms.ConvertImageDtype(), None), - ( - transforms.LinearTransformation( - # These are just dummy values that will be filled by the adapter. We can't define them upfront, - # because for we neither know the spatial size nor the device at this point - transformation_matrix=torch.empty((1, 1)), - mean_vector=torch.empty((1,)), - ), - linear_transformation_adapter, - ), ], ids=lambda transform: type(transform).__name__, ) @@ -546,39 +520,6 @@ def test__get_params(self, min_size, max_size): assert shorter in min_size -class TestLinearTransformation: - def test_assertions(self): - with pytest.raises(ValueError, match="transformation_matrix should be square"): - transforms.LinearTransformation(torch.rand(2, 3), torch.rand(5)) - - with pytest.raises(ValueError, match="mean_vector should have the same length"): - transforms.LinearTransformation(torch.rand(3, 3), torch.rand(5)) - - @pytest.mark.parametrize( - "inpt", - [ - 122 * torch.ones(1, 3, 8, 8), - 122.0 * torch.ones(1, 3, 8, 8), - tv_tensors.Image(122 * torch.ones(1, 3, 8, 8)), - PIL.Image.new("RGB", (8, 8), (122, 122, 122)), - ], - ) - def test__transform(self, inpt): - - v = 121 * torch.ones(3 * 8 * 8) - m = torch.ones(3 * 8 * 8, 3 * 8 * 8) - transform = transforms.LinearTransformation(m, v) - - if isinstance(inpt, PIL.Image.Image): - with pytest.raises(TypeError, match="does not support PIL images"): - transform(inpt) - else: - output = transform(inpt) - assert isinstance(output, torch.Tensor) - assert output.unique() == 3 * 8 * 8 - assert output.dtype == inpt.dtype - - class TestRandomResize: def test__get_params(self): min_size = 3 diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index f3d46e90f4e..038f4a771bd 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -72,28 +72,6 @@ def __init__( LINEAR_TRANSFORMATION_MATRIX = torch.rand([LINEAR_TRANSFORMATION_MEAN.numel()] * 2) CONSISTENCY_CONFIGS = [ - *[ - ConsistencyConfig( - v2_transforms.LinearTransformation, - legacy_transforms.LinearTransformation, - [ - ArgsKwargs(LINEAR_TRANSFORMATION_MATRIX.to(matrix_dtype), LINEAR_TRANSFORMATION_MEAN.to(matrix_dtype)), - ], - # Make sure that the product of the height, width and number of channels matches the number of elements in - # `LINEAR_TRANSFORMATION_MEAN`. For example 2 * 6 * 3 == 4 * 3 * 3 == 36. - make_images_kwargs=dict( - DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(2, 6), (4, 3)], color_spaces=["RGB"], dtypes=[image_dtype] - ), - supports_pil=False, - ) - for matrix_dtype, image_dtype in [ - (torch.float32, torch.float32), - (torch.float64, torch.float64), - (torch.float32, torch.uint8), - (torch.float64, torch.float32), - (torch.float32, torch.float64), - ] - ], ConsistencyConfig( v2_transforms.Grayscale, legacy_transforms.Grayscale, diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 015ad721b1c..1db65c0e8d1 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4787,3 +4787,66 @@ def test_correctness_image_ten_crop(self, fn_or_class, vertical_flip): assert isinstance(actual, tuple) assert_equal(actual, [F.to_image(e) for e in expected]) + + +class TestLinearTransform: + def _make_matrix_and_vector(self, input, *, device=None): + device = device or input.device + numel = math.prod(F.get_dimensions(input)) + transformation_matrix = torch.randn((numel, numel), device=device) + mean_vector = torch.randn((numel,), device=device) + return transformation_matrix, mean_vector + + def _sample_input_adapter(self, transform, input, device): + return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} + + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, dtype, device): + input = make_input(dtype=dtype, device=device) + check_transform( + transforms.LinearTransformation(*self._make_matrix_and_vector(input)), + input, + check_sample_input=self._sample_input_adapter, + ) + + def test_transform_error(self): + with pytest.raises(ValueError, match="transformation_matrix should be square"): + transforms.LinearTransformation(transformation_matrix=torch.rand(2, 3), mean_vector=torch.rand(2)) + + with pytest.raises(ValueError, match="mean_vector should have the same length"): + transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(1)) + + for matrix_dtype, vector_dtype in [(torch.float32, torch.float64), (torch.float64, torch.float32)]: + with pytest.raises(ValueError, match="Input tensors should have the same dtype"): + transforms.LinearTransformation( + transformation_matrix=torch.rand(2, 2, dtype=matrix_dtype), + mean_vector=torch.rand(2, dtype=vector_dtype), + ) + + image = make_image() + transform = transforms.LinearTransformation(transformation_matrix=torch.rand(2, 2), mean_vector=torch.rand(2)) + with pytest.raises(ValueError, match="Input tensor and transformation matrix have incompatible shape"): + transform(image) + + transform = transforms.LinearTransformation(*self._make_matrix_and_vector(image)) + with pytest.raises(TypeError, match="does not support PIL images"): + transform(F.to_pil_image(image)) + + @needs_cuda + def test_transform_error_cuda(self): + for matrix_device, vector_device in [("cuda", "cpu"), ("cpu", "cuda")]: + with pytest.raises(ValueError, match="Input tensors should be on the same device"): + transforms.LinearTransformation( + transformation_matrix=torch.rand(2, 2, device=matrix_device), + mean_vector=torch.rand(2, device=vector_device), + ) + + for input_device, param_device in [("cuda", "cpu"), ("cpu", "cuda")]: + input = make_image(device=input_device) + transform = transforms.LinearTransformation(*self._make_matrix_and_vector(input, device=param_device)) + with pytest.raises( + ValueError, match="Input tensor should be on the same device as transformation matrix and mean vector" + ): + transform(input) From 7a48905384457ce3d29f3f1c513f41fa750ff6e0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 28 Sep 2023 10:43:55 +0200 Subject: [PATCH 22/27] fix normalize and five / ten crop --- test/test_transforms_v2_refactored.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 57e699195e1..e9497b9802f 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -4148,9 +4148,25 @@ def test_functional_error(self): with pytest.raises(ValueError, match="std evaluated to zero, leading to division by zero"): F.normalize_image(make_image(dtype=torch.float32), mean=self.MEAN, std=std) + def _sample_input_adapter(self, transform, input, device): + adapted_input = {} + for key, value in input.items(): + if isinstance(value, PIL.Image.Image): + # normalize doesn't support PIL images + continue + elif check_type(value, (is_pure_tensor, tv_tensors.Image, tv_tensors.Video)): + # normalize doesn't support integer images + value = F.to_dtype(value, torch.float32, scale=True) + adapted_input[key] = value + return adapted_input + @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video]) def test_transform(self, make_input): - check_transform(transforms.Normalize(mean=self.MEAN, std=self.STD), make_input(dtype=torch.float32)) + check_transform( + transforms.Normalize(mean=self.MEAN, std=self.STD), + make_input(dtype=torch.float32), + check_sample_input=self._sample_input_adapter, + ) def _reference_normalize_image(self, image, *, mean, std): image = image.numpy() @@ -4677,7 +4693,11 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: ) @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) def test_transform(self, make_input, transform_cls): - check_transform(self._TransformWrapper(transform_cls(size=self.OUTPUT_SIZE)), make_input(self.INPUT_SIZE)) + check_transform( + self._TransformWrapper(transform_cls(size=self.OUTPUT_SIZE)), + make_input(self.INPUT_SIZE), + check_sample_input=False, + ) @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_mask]) @pytest.mark.parametrize("transform_cls", [transforms.FiveCrop, transforms.TenCrop]) From 5424bf37ee6c7cc75846d08dd8cd93d465109c81 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 28 Sep 2023 10:57:06 +0200 Subject: [PATCH 23/27] put back degenerate box test --- test/test_transforms_v2_refactored.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index e9497b9802f..9bc7dfea76a 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -375,6 +375,16 @@ def _check_transform_sample_input_smoke(transform, input, *, adapter): else: assert output_item is input_item + # Enforce that the transform does not turn a degenerate box marked by RandomIoUCrop (or any other future + # transform that does this), back into a valid one. + # TODO: we should test that against all degenerate boxes above + for format in list(tv_tensors.BoundingBoxFormat): + sample = dict( + boxes=tv_tensors.BoundingBoxes([[0, 0, 0, 0]], format=format, canvas_size=(224, 244)), + labels=torch.tensor([3]), + ) + assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4) + def check_transform(transform, input, check_v1_compatibility=True, check_sample_input=True): pickle.loads(pickle.dumps(transform)) From 3f7f529395f8a7bc7628b5f1fd3336d655374ee6 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 28 Sep 2023 11:12:24 +0200 Subject: [PATCH 24/27] refactor and improve degenerate box check --- test/test_transforms_v2_refactored.py | 35 +++++++++++++++------------ 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 9bc7dfea76a..9b1cedeb749 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -345,18 +345,18 @@ def _make_transform_sample(transform, *, image_or_video, adapter): def _check_transform_sample_input_smoke(transform, input, *, adapter): if not check_type(input, (is_pure_tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video)): return - image_or_video = input + sample = _make_transform_sample( + # adapter might change transform inplace + transform=transform if adapter is None else deepcopy(transform), + image_or_video=input, + adapter=adapter, + ) for container_type in [dict, list, tuple]: - input = _make_transform_sample( - # adapter might change transform inplace - transform=transform if adapter is None else deepcopy(transform), - image_or_video=image_or_video, - adapter=adapter, - ) - - if container_type in {tuple, list}: - input = container_type(input.values()) + if container_type is dict: + input = sample + else: + input = container_type(sample.values()) input_flat, input_spec = tree_flatten(input) @@ -375,13 +375,16 @@ def _check_transform_sample_input_smoke(transform, input, *, adapter): else: assert output_item is input_item - # Enforce that the transform does not turn a degenerate box marked by RandomIoUCrop (or any other future - # transform that does this), back into a valid one. - # TODO: we should test that against all degenerate boxes above - for format in list(tv_tensors.BoundingBoxFormat): + # Enforce that the transform does not turn a degenerate bounding box, e.g. marked by RandomIoUCrop (or any other + # future transform that does this), back into a valid one. + for degenerate_bounding_boxes in ( + bounding_box + for name, bounding_box in sample.items() + if "degenerate" in name and isinstance(bounding_box, tv_tensors.BoundingBoxes) + ): sample = dict( - boxes=tv_tensors.BoundingBoxes([[0, 0, 0, 0]], format=format, canvas_size=(224, 244)), - labels=torch.tensor([3]), + boxes=degenerate_bounding_boxes, + labels=torch.randint(10, (degenerate_bounding_boxes.shape[0],), device=degenerate_bounding_boxes.device), ) assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4) From 1173704086cd22b22aaffdd047a47bdbcda8626a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 28 Sep 2023 10:40:37 +0100 Subject: [PATCH 25/27] Fix error message --- test/test_transforms_v2_refactored.py | 2 +- torchvision/transforms/v2/_geometry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 9b1cedeb749..b502baa4367 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -832,7 +832,7 @@ def test_interpolation_int(self, interpolation, make_input): assert_equal(actual, expected) def test_transform_unknown_size_error(self): - with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"): + with pytest.raises(ValueError, match="size can either be an integer or a sequence of one or two integers"): transforms.Resize(size=object()) @pytest.mark.parametrize( diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index df5d82e75ad..badae44319c 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -139,7 +139,7 @@ def __init__( size = list(size) else: raise ValueError( - f"size can either be an integer or a list or tuple of one or two integers, " f"but got {size} instead." + f"size can either be an integer or a sequence of one or two integers, but got {size} instead." ) self.size = size From 151f1959cb26bd1163ee7cfe1998f64797c0087c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 28 Sep 2023 10:42:24 +0100 Subject: [PATCH 26/27] Revert "Fix error message" This reverts commit 1173704086cd22b22aaffdd047a47bdbcda8626a. --- test/test_transforms_v2_refactored.py | 2 +- torchvision/transforms/v2/_geometry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index b502baa4367..9b1cedeb749 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -832,7 +832,7 @@ def test_interpolation_int(self, interpolation, make_input): assert_equal(actual, expected) def test_transform_unknown_size_error(self): - with pytest.raises(ValueError, match="size can either be an integer or a sequence of one or two integers"): + with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"): transforms.Resize(size=object()) @pytest.mark.parametrize( diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index badae44319c..df5d82e75ad 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -139,7 +139,7 @@ def __init__( size = list(size) else: raise ValueError( - f"size can either be an integer or a sequence of one or two integers, but got {size} instead." + f"size can either be an integer or a list or tuple of one or two integers, " f"but got {size} instead." ) self.size = size From d2f21fd71b83bb060e997adf269c25485db02019 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 28 Sep 2023 13:06:26 +0200 Subject: [PATCH 27/27] address comments --- test/common_utils.py | 2 +- test/test_transforms_v2_refactored.py | 20 +++++--------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 73eb6b7d0b1..4d40b0b18a4 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -406,7 +406,6 @@ def make_bounding_boxes( canvas_size=DEFAULT_SIZE, *, format=tv_tensors.BoundingBoxFormat.XYXY, - num_objects=1, dtype=None, device="cpu", ): @@ -420,6 +419,7 @@ def sample_position(values, max_value): dtype = dtype or torch.float32 + num_objects = 1 h, w = [torch.randint(1, s, (num_objects,)) for s in canvas_size] y = sample_position(h, canvas_size[0]) x = sample_position(w, canvas_size[1]) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 123847750bc..8530f1d762a 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -271,21 +271,9 @@ def _make_transform_sample(transform, *, image_or_video, adapter): image_tv_tensor=make_image(size, device=device), video_tv_tensor=make_video(size, device=device), image_pil=make_image_pil(size), - bounding_boxes_xyxy=make_bounding_boxes( - size, format=tv_tensors.BoundingBoxFormat.XYXY, num_objects=3, device=device - ), - bounding_boxes_xywh=make_bounding_boxes( - size, - format=tv_tensors.BoundingBoxFormat.XYWH, - num_objects=4, - device=device, - ), - bounding_boxes_cxcywh=make_bounding_boxes( - size, - format=tv_tensors.BoundingBoxFormat.CXCYWH, - num_objects=5, - device=device, - ), + bounding_boxes_xyxy=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYXY, device=device), + bounding_boxes_xywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.XYWH, device=device), + bounding_boxes_cxcywh=make_bounding_boxes(size, format=tv_tensors.BoundingBoxFormat.CXCYWH, device=device), bounding_boxes_degenerate_xyxy=tv_tensors.BoundingBoxes( [ [0, 0, 0, 0], # no height or width @@ -343,6 +331,8 @@ def _make_transform_sample(transform, *, image_or_video, adapter): def _check_transform_sample_input_smoke(transform, input, *, adapter): + # This is a bunch of input / output convention checks, using a big sample with different parts as input. + if not check_type(input, (is_pure_tensor, PIL.Image.Image, tv_tensors.Image, tv_tensors.Video)): return