diff --git a/.clang-format b/.clang-format
index 6d0ab740db4..95d60445f4a 100644
--- a/.clang-format
+++ b/.clang-format
@@ -60,9 +60,6 @@ MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
 PenaltyBreakBeforeFirstCallParameter: 1
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
@@ -85,4 +82,11 @@ SpacesInSquareBrackets: false
 Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
+---
+Language: ObjC
+ColumnLimit: 120
+AlignAfterOpenBracket: Align
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
 ...
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
index d102735909e..e4af4e7c61a 100755
--- a/.github/scripts/setup-env.sh
+++ b/.github/scripts/setup-env.sh
@@ -54,7 +54,11 @@ echo '::endgroup::'
 
 if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then
   echo '::group::Install VisualStudio CUDA extensions on Windows'
-  TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations"
+  if [[ "${VC_YEAR:-}" == "2022" ]]; then
+    TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/MSBuild/Microsoft/VC/v170/BuildCustomizations"
+  else
+    TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations"
+  fi
   mkdir -p "${TARGET_DIR}"
   cp -r "${CUDA_HOME}/MSBuildExtensions/"* "${TARGET_DIR}"
   echo '::endgroup::'
diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml
index 06bd4de753e..3871dca340f 100644
--- a/.github/workflows/build-cmake.yml
+++ b/.github/workflows/build-cmake.yml
@@ -74,9 +74,9 @@ jobs:
       script: |
         set -euo pipefail
 
-        source packaging/windows/internal/vc_install_helper.sh
-
         export PYTHON_VERSION=3.8
+        export VC_YEAR=2022
+        export VSDEVCMD_ARGS=""
         export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
         export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
 
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index cd6011b4ad4..22e1a4ac18d 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -98,6 +98,8 @@ jobs:
         set -euxo pipefail
 
         export PYTHON_VERSION=${{ matrix.python-version }}
+        export VC_YEAR=2019
+        export VSDEVCMD_ARGS=""
         export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
         export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
         
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 343df7f1021..762ebf6fce0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,6 +6,8 @@ repos:
       - id: check-toml
       - id: check-yaml
         exclude: packaging/.*
+        args:
+          - --allow-multiple-documents
       - id: mixed-line-ending
         args: [--fix=lf]
       - id: end-of-file-fixer
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4bb75fe6eeb..7b3e9e8a7f3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -140,7 +140,7 @@
     "logo_only": True,
     "pytorch_project": "docs",
     "navigation_with_keys": True,
-    "analytics_id": "UA-117752657-2",
+    "analytics_id": "GTM-T8XT4PS",
 }
 
 html_logo = "_static/img/pytorch-logo-dark.svg"
diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py
index e6a4ef9d458..6a8d35f1a6e 100644
--- a/packaging/wheel/relocate.py
+++ b/packaging/wheel/relocate.py
@@ -2,7 +2,6 @@
 
 import glob
 import hashlib
-import io
 
 # Standard library imports
 import os
@@ -65,21 +64,12 @@
 PYTHON_VERSION = sys.version_info
 
 
-def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
-    """Yield pieces of data from a file-like object until EOF."""
-    while True:
-        chunk = file.read(size)
-        if not chunk:
-            break
-        yield chunk
-
-
 def rehash(path, blocksize=1 << 20):
     """Return (hash, length) for path using hashlib.sha256()"""
     h = hashlib.sha256()
     length = 0
     with open(path, "rb") as f:
-        for block in read_chunks(f, size=blocksize):
+        while block := f.read(blocksize):
             length += len(block)
             h.update(block)
     digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat
index e85a372f93d..d3484a66e9f 100644
--- a/packaging/windows/internal/vc_env_helper.bat
+++ b/packaging/windows/internal/vc_env_helper.bat
@@ -1,7 +1,11 @@
 @echo on
 
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
 if "%VC_YEAR%" == "2017" (
     set VC_VERSION_LOWER=15
     set VC_VERSION_UPPER=16
diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh
deleted file mode 100644
index 251509ae194..00000000000
--- a/packaging/windows/internal/vc_install_helper.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-export VC_YEAR=2019
-export VSDEVCMD_ARGS=""
diff --git a/test/assets/toosmall_png/heapbof.png b/test/assets/toosmall_png/heapbof.png
new file mode 100644
index 00000000000..e720d183342
Binary files /dev/null and b/test/assets/toosmall_png/heapbof.png differ
diff --git a/test/common_utils.py b/test/common_utils.py
index 1d0b82a827c..abefd07c43d 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -7,9 +7,11 @@
 import os
 import pathlib
 import random
+import re
 import shutil
 import sys
 import tempfile
+import warnings
 from collections import defaultdict
 from subprocess import CalledProcessError, check_output, STDOUT
 from typing import Callable, Sequence, Tuple, Union
@@ -122,7 +124,7 @@ def disable_console_output():
         yield
 
 
-def cpu_and_gpu():
+def cpu_and_cuda():
     import pytest  # noqa
 
     return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
@@ -880,3 +882,23 @@ def assert_run_python_script(source_code):
             raise RuntimeError(f"script errored with output:\n{e.output.decode()}")
         if out != b"":
             raise AssertionError(out.decode())
+
+
+@contextlib.contextmanager
+def assert_no_warnings():
+    # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes
+    # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        yield
+
+
+@contextlib.contextmanager
+def ignore_jit_no_profile_information_warning():
+    # Calling a scripted object often triggers a warning like
+    # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information`
+    # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore
+    # them.
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning)
+        yield
diff --git a/test/conftest.py b/test/conftest.py
index a9e8f1cda52..468587f1c9e 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -33,7 +33,7 @@ def pytest_collection_modifyitems(items):
         # The needs_cuda mark will exist if the test was explicitly decorated with
         # the @needs_cuda decorator. It will also exist if it was parametrized with a
         # parameter that has the mark: for example if a test is parametrized with
-        # @pytest.mark.parametrize('device', cpu_and_gpu())
+        # @pytest.mark.parametrize('device', cpu_and_cuda())
         # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
         # and the ones with device == 'cpu' won't have the mark.
         needs_cuda = item.get_closest_marker("needs_cuda") is not None
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
index 0e1cc648a19..43f54e6f107 100644
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -21,7 +21,7 @@
     _create_data_batch,
     _test_fn_on_batch,
     assert_equal,
-    cpu_and_gpu,
+    cpu_and_cuda,
     needs_cuda,
 )
 from torchvision.transforms import InterpolationMode
@@ -34,7 +34,7 @@
 )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fn", [F.get_image_size, F.get_image_num_channels, F.get_dimensions])
 def test_image_sizes(device, fn):
     script_F = torch.jit.script(fn)
@@ -72,7 +72,7 @@ class TestRotate:
     scripted_rotate = torch.jit.script(F.rotate)
     IMG_W = 26
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(7, 33), (26, IMG_W), (32, IMG_W)])
     @pytest.mark.parametrize(
         "center",
@@ -131,7 +131,7 @@ def test_rotate(self, device, height, width, center, dt, angle, expand, fill, fn
             f"{out_pil_tensor[0, :7, :7]}"
         )
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_rotate_batch(self, device, dt):
         if dt == torch.float16 and device == "cpu":
@@ -157,7 +157,7 @@ class TestAffine:
     ALL_DTYPES = [None, torch.float32, torch.float64, torch.float16]
     scripted_affine = torch.jit.script(F.affine)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_identity_map(self, device, height, width, dt):
@@ -180,7 +180,7 @@ def test_identity_map(self, device, height, width, dt):
         )
         assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize(
@@ -224,7 +224,7 @@ def test_square_rotations(self, device, height, width, dt, angle, config, fn):
         # Tolerance : less than 6% of different pixels
         assert ratio_diff_pixels < 0.06
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize("angle", [90, 45, 15, -30, -60, -120])
@@ -258,7 +258,7 @@ def test_rect_rotations(self, device, height, width, dt, angle, fn, center):
         # Tolerance : less than 3% of different pixels
         assert ratio_diff_pixels < 0.03
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize("t", [[10, 12], (-12, -13)])
@@ -283,7 +283,7 @@ def test_translations(self, device, height, width, dt, t, fn):
 
         _assert_equal_tensor_to_pil(out_tensor, out_pil_img)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize(
@@ -344,7 +344,7 @@ def test_all_ops(self, device, height, width, dt, a, t, s, sh, f, fn):
         tol = 0.06 if device == "cuda" else 0.05
         assert ratio_diff_pixels < tol
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_batches(self, device, dt):
         if dt == torch.float16 and device == "cpu":
@@ -357,7 +357,7 @@ def test_batches(self, device, dt):
 
         _test_fn_on_batch(batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0])
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_interpolation_type(self, device):
         tensor, pil_img = _create_data(26, 26, device=device)
 
@@ -389,7 +389,7 @@ def _get_data_dims_and_points_for_perspective():
     return dims_and_points
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize(
@@ -435,7 +435,7 @@ def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn):
     assert ratio_diff_pixels < 0.05
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 def test_perspective_batch(device, dims_and_points, dt):
@@ -473,7 +473,7 @@ def test_perspective_interpolation_type():
     assert_equal(res1, res2)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize(
     "size",
@@ -539,7 +539,7 @@ def test_resize(device, dt, size, max_size, interpolation):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_resize_asserts(device):
 
     tensor, pil_img = _create_data(26, 36, device=device)
@@ -556,7 +556,7 @@ def test_resize_asserts(device):
             F.resize(img, size=32, max_size=32)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("size", [[96, 72], [96, 420], [420, 72]])
 @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
@@ -609,21 +609,6 @@ def test_resize_antialias(device, dt, size, interpolation):
     assert_equal(resized_tensor, resize_result)
 
 
-@needs_cuda
-@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
-def test_assert_resize_antialias(interpolation):
-
-    # Checks implementation on very large scales
-    # and catch TORCH_CHECK inside PyTorch implementation
-    torch.manual_seed(12)
-    tensor, _ = _create_data(1000, 1000, device="cuda")
-
-    # Error message is not yet updated in pytorch nightly
-    # with pytest.raises(RuntimeError, match=r"Provided interpolation parameters can not be handled"):
-    with pytest.raises(RuntimeError, match=r"Too much shared memory required"):
-        F.resize(tensor, size=(5, 5), interpolation=interpolation, antialias=True)
-
-
 def test_resize_antialias_default_warning():
 
     img = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8)
@@ -641,25 +626,6 @@ def test_resize_antialias_default_warning():
         F.resized_crop(img, 0, 0, 10, 10, size=(20, 20), interpolation=NEAREST)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
-@pytest.mark.parametrize("dt", [torch.float32, torch.float64, torch.float16])
-@pytest.mark.parametrize("size", [[10, 7], [10, 42], [42, 7]])
-@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
-def test_interpolate_antialias_backward(device, dt, size, interpolation):
-
-    if dt == torch.float16 and device == "cpu":
-        # skip float16 on CPU case
-        return
-
-    torch.manual_seed(12)
-    x = (torch.rand(1, 32, 29, 3, dtype=torch.double, device=device).permute(0, 3, 1, 2).requires_grad_(True),)
-    resize = partial(F.resize, size=size, interpolation=interpolation, antialias=True)
-    assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False)
-
-    x = (torch.rand(1, 3, 32, 29, dtype=torch.double, device=device, requires_grad=True),)
-    assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False)
-
-
 def check_functional_vs_PIL_vs_scripted(
     fn, fn_pil, fn_t, config, device, dtype, channels=3, tol=2.0 + 1e-10, agg_method="max"
 ):
@@ -697,7 +663,7 @@ def check_functional_vs_PIL_vs_scripted(
     _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=atol, **config)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"brightness_factor": f} for f in (0.1, 0.5, 1.0, 1.34, 2.5)])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -713,7 +679,7 @@ def test_adjust_brightness(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_invert(device, dtype, channels):
@@ -722,7 +688,7 @@ def test_invert(device, dtype, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("config", [{"bits": bits} for bits in range(0, 8)])
 @pytest.mark.parametrize("channels", [1, 3])
 def test_posterize(device, config, channels):
@@ -739,7 +705,7 @@ def test_posterize(device, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0, 64, 128, 192, 255]])
 @pytest.mark.parametrize("channels", [1, 3])
 def test_solarize1(device, config, channels):
@@ -756,7 +722,7 @@ def test_solarize1(device, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0.0, 0.25, 0.5, 0.75, 1.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -788,7 +754,7 @@ def test_solarize2(device, dtype, config, channels):
         *[(torch.int64, threshold) for threshold in [0, 2**32, 2**63 - 1]],
     ],
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_solarize_threshold_within_bound(threshold, dtype, device):
     make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max)
     img = make_img((3, 12, 23), dtype=dtype, device=device)
@@ -804,7 +770,7 @@ def test_solarize_threshold_within_bound(threshold, dtype, device):
         (torch.int64, 2**64),
     ],
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_solarize_threshold_above_bound(threshold, dtype, device):
     make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max)
     img = make_img((3, 12, 23), dtype=dtype, device=device)
@@ -812,7 +778,7 @@ def test_solarize_threshold_above_bound(threshold, dtype, device):
         F_t.solarize(img, threshold)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"sharpness_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -828,7 +794,7 @@ def test_adjust_sharpness(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_autocontrast(device, dtype, channels):
@@ -837,7 +803,7 @@ def test_autocontrast(device, dtype, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_autocontrast_equal_minmax(device, dtype, channels):
@@ -849,7 +815,7 @@ def test_autocontrast_equal_minmax(device, dtype, channels):
     assert (F.autocontrast(a)[0] == F.autocontrast(a[0])).all()
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("channels", [1, 3])
 def test_equalize(device, channels):
     torch.use_deterministic_algorithms(False)
@@ -866,7 +832,7 @@ def test_equalize(device, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -876,7 +842,7 @@ def test_adjust_contrast(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -886,7 +852,7 @@ def test_adjust_saturation(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -896,7 +862,7 @@ def test_adjust_hue(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -912,7 +878,7 @@ def test_adjust_gamma(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("pad", [2, [3], [0, 3], (3, 3), [4, 2, 4, 3]])
 @pytest.mark.parametrize(
@@ -962,7 +928,7 @@ def test_pad(device, dt, pad, config):
     _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("mode", [NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC])
 def test_resized_crop(device, mode):
     # test values of F.resized_crop in several cases:
@@ -997,7 +963,7 @@ def test_resized_crop(device, mode):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "func, args",
     [
@@ -1030,7 +996,7 @@ def test_assert_image_tensor(device, func, args):
         func(tensor, *args)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_vflip(device):
     script_vflip = torch.jit.script(F.vflip)
 
@@ -1047,7 +1013,7 @@ def test_vflip(device):
     _test_fn_on_batch(batch_tensors, F.vflip)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_hflip(device):
     script_hflip = torch.jit.script(F.hflip)
 
@@ -1064,7 +1030,7 @@ def test_hflip(device):
     _test_fn_on_batch(batch_tensors, F.hflip)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "top, left, height, width",
     [
@@ -1093,7 +1059,7 @@ def test_crop(device, top, left, height, width):
     _test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("image_size", ("small", "large"))
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)])
@@ -1147,7 +1113,7 @@ def test_gaussian_blur(device, image_size, dt, ksize, sigma, fn):
     torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}")
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_hsv2rgb(device):
     scripted_fn = torch.jit.script(F_t._hsv2rgb)
     shape = (3, 100, 150)
@@ -1178,7 +1144,7 @@ def test_hsv2rgb(device):
     _test_fn_on_batch(batch_tensors, F_t._hsv2rgb)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_rgb2hsv(device):
     scripted_fn = torch.jit.script(F_t._rgb2hsv)
     shape = (3, 150, 100)
@@ -1217,7 +1183,7 @@ def test_rgb2hsv(device):
     _test_fn_on_batch(batch_tensors, F_t._rgb2hsv)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_output_channels", (3, 1))
 def test_rgb_to_grayscale(device, num_output_channels):
     script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale)
@@ -1236,7 +1202,7 @@ def test_rgb_to_grayscale(device, num_output_channels):
     _test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_center_crop(device):
     script_center_crop = torch.jit.script(F.center_crop)
 
@@ -1254,7 +1220,7 @@ def test_center_crop(device):
     _test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11])
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_five_crop(device):
     script_five_crop = torch.jit.script(F.five_crop)
 
@@ -1288,7 +1254,7 @@ def test_five_crop(device):
         assert_equal(transformed_batch, s_transformed_batch)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_ten_crop(device):
     script_ten_crop = torch.jit.script(F.ten_crop)
 
@@ -1334,7 +1300,7 @@ def test_elastic_transform_asserts():
         _ = F.elastic_transform(img_tensor, displacement=torch.rand(1, 2))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC])
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize(
diff --git a/test/test_image.py b/test/test_image.py
index 4c210ea7eef..b08dc2026d4 100644
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -32,6 +32,7 @@
 DAMAGED_PNG = os.path.join(IMAGE_ROOT, "damaged_png")
 ENCODE_JPEG = os.path.join(IMAGE_ROOT, "encode_jpeg")
 INTERLACED_PNG = os.path.join(IMAGE_ROOT, "interlaced_png")
+TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png")
 IS_WINDOWS = sys.platform in ("win32", "cygwin")
 PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))
 
@@ -193,6 +194,8 @@ def test_decode_png_errors():
         decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8))
     with pytest.raises(RuntimeError, match="Out of bound read in decode_png"):
         decode_png(read_file(os.path.join(DAMAGED_PNG, "sigsegv.png")))
+    with pytest.raises(RuntimeError, match="Content is too small for png"):
+        decode_png(read_file(os.path.join(TOOSMALL_PNG, "heapbof.png")))
 
 
 @pytest.mark.parametrize(
diff --git a/test/test_models.py b/test/test_models.py
index f6eeb7c28c8..67eb2115c85 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -15,7 +15,7 @@
 import torch.fx
 import torch.nn as nn
 from _utils_internal import get_relative_path
-from common_utils import cpu_and_gpu, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed
+from common_utils import cpu_and_cuda, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed
 from PIL import Image
 from torchvision import models, transforms
 from torchvision.models import get_model_builder, list_models
@@ -676,14 +676,14 @@ def vitc_b_16(**kwargs: Any):
 
 
 @pytest.mark.parametrize("model_fn", [vitc_b_16])
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_vitc_models(model_fn, dev):
     test_classification_model(model_fn, dev)
 
 
 @disable_tf32()  # see: https://github.com/pytorch/vision/issues/7618
 @pytest.mark.parametrize("model_fn", list_model_fns(models))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_classification_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -726,7 +726,7 @@ def test_classification_model(model_fn, dev):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.segmentation))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_segmentation_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -791,7 +791,7 @@ def check_out(out):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.detection))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_detection_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -923,7 +923,7 @@ def test_detection_model_validation(model_fn):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.video))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_video_model(model_fn, dev):
     set_rng_seed(0)
     # the default input shape is
diff --git a/test/test_ops.py b/test/test_ops.py
index 463ebb333ff..b993bce65a2 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -10,7 +10,7 @@
 import torch
 import torch.fx
 import torch.nn.functional as F
-from common_utils import assert_equal, cpu_and_gpu, needs_cuda
+from common_utils import assert_equal, cpu_and_cuda, needs_cuda
 from PIL import Image
 from torch import nn, Tensor
 from torch.autograd import gradcheck
@@ -97,7 +97,7 @@ def forward(self, imgs: Tensor, boxes: List[Tensor]) -> Tensor:
 class RoIOpTester(ABC):
     dtype = torch.float64
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, deterministic=False, **kwargs):
         x_dtype = self.dtype if x_dtype is None else x_dtype
@@ -126,7 +126,7 @@ def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, determ
         tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
         torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -135,7 +135,7 @@ def test_is_leaf_node(self, device):
         assert len(graph_node_names[0]) == len(graph_node_names[1])
         assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.float):
         op_obj = self.make_obj().to(device=device)
         graph_module = torch.fx.symbolic_trace(op_obj)
@@ -155,7 +155,7 @@ def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.floa
         torch.testing.assert_close(output_gt, output_fx, rtol=tol, atol=tol)
 
     @pytest.mark.parametrize("seed", range(10))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     def test_backward(self, seed, device, contiguous, deterministic=False):
         torch.random.manual_seed(seed)
@@ -418,7 +418,7 @@ def test_boxes_shape(self):
         self._helper_boxes_shape(ops.roi_align)
 
     @pytest.mark.parametrize("aligned", (True, False))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("deterministic", (True, False))
     def test_forward(self, device, contiguous, deterministic, aligned, x_dtype=None, rois_dtype=None):
@@ -450,7 +450,7 @@ def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype):
             )
 
     @pytest.mark.parametrize("seed", range(10))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("deterministic", (True, False))
     def test_backward(self, seed, device, contiguous, deterministic):
@@ -612,7 +612,7 @@ def test_msroialign_repr(self):
         )
         assert repr(t) == expected_string
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -885,7 +885,7 @@ def make_obj(self, in_channels=6, out_channels=2, kernel_size=(3, 2), groups=2,
         )
         return DeformConvModuleWrapper(obj) if wrap else obj
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -894,7 +894,7 @@ def test_is_leaf_node(self, device):
         assert len(graph_node_names[0]) == len(graph_node_names[1])
         assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("batch_sz", (0, 33))
     def test_forward(self, device, contiguous, batch_sz, dtype=None):
@@ -946,7 +946,7 @@ def test_wrong_sizes(self):
             wrong_mask = torch.rand_like(mask[:, :2])
             layer(x, offset, wrong_mask)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("batch_sz", (0, 33))
     def test_backward(self, device, contiguous, batch_sz):
@@ -1411,7 +1411,7 @@ def assert_empty_loss(iou_fn, dtype, device):
 
 class TestGeneralizedBoxIouLoss:
     # We refer to original test: https://github.com/facebookresearch/fvcore/blob/main/tests/test_giou_loss.py
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_giou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
@@ -1439,7 +1439,7 @@ def test_giou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.generalized_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_inputs(self, dtype, device):
         assert_empty_loss(ops.generalized_box_iou_loss, dtype, device)
@@ -1447,7 +1447,7 @@ def test_empty_inputs(self, dtype, device):
 
 class TestCompleteBoxIouLoss:
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_ciou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
 
@@ -1461,14 +1461,14 @@ def test_ciou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.complete_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_inputs(self, dtype, device):
         assert_empty_loss(ops.complete_box_iou_loss, dtype, device)
 
 
 class TestDistanceBoxIouLoss:
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_distance_iou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
@@ -1483,7 +1483,7 @@ def test_distance_iou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.distance_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_distance_iou_inputs(self, dtype, device):
         assert_empty_loss(ops.distance_box_iou_loss, dtype, device)
@@ -1528,7 +1528,7 @@ def generate_tensor_with_range_type(shape, range_type, **kwargs):
 
     @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0])
     @pytest.mark.parametrize("gamma", [0, 2])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [0, 1])
     def test_correct_ratio(self, alpha, gamma, device, dtype, seed):
@@ -1557,7 +1557,7 @@ def test_correct_ratio(self, alpha, gamma, device, dtype, seed):
         torch.testing.assert_close(correct_ratio, loss_ratio, atol=tol, rtol=tol)
 
     @pytest.mark.parametrize("reduction", ["mean", "sum"])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [2, 3])
     def test_equal_ce_loss(self, reduction, device, dtype, seed):
@@ -1584,7 +1584,7 @@ def test_equal_ce_loss(self, reduction, device, dtype, seed):
     @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0])
     @pytest.mark.parametrize("gamma", [0, 2])
     @pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [4, 5])
     def test_jit(self, alpha, gamma, reduction, device, dtype, seed):
@@ -1600,7 +1600,7 @@ def test_jit(self, alpha, gamma, reduction, device, dtype, seed):
         torch.testing.assert_close(focal_loss, scripted_focal_loss, rtol=tol, atol=tol)
 
     # Raise ValueError for anonymous reduction mode
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_reduction_mode(self, device, dtype, reduction="xyz"):
         if device == "cpu" and dtype is torch.half:
diff --git a/test/test_prototype_models.py b/test/test_prototype_models.py
index 6d9f22c1543..d32df68f1f4 100644
--- a/test/test_prototype_models.py
+++ b/test/test_prototype_models.py
@@ -1,13 +1,13 @@
 import pytest
 import test_models as TM
 import torch
-from common_utils import cpu_and_gpu, set_rng_seed
+from common_utils import cpu_and_cuda, set_rng_seed
 from torchvision.prototype import models
 
 
 @pytest.mark.parametrize("model_fn", (models.depth.stereo.raft_stereo_base,))
 @pytest.mark.parametrize("model_mode", ("standard", "scripted"))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_raft_stereo(model_fn, model_mode, dev):
     # A simple test to make sure the model can do forward pass and jit scriptable
     set_rng_seed(0)
@@ -40,7 +40,7 @@ def test_raft_stereo(model_fn, model_mode, dev):
 
 @pytest.mark.parametrize("model_fn", (models.depth.stereo.crestereo_base,))
 @pytest.mark.parametrize("model_mode", ("standard", "scripted"))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_crestereo(model_fn, model_mode, dev):
     set_rng_seed(0)
 
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index 077a12af490..e2ab5673f1e 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -12,7 +12,7 @@
     _create_data,
     _create_data_batch,
     assert_equal,
-    cpu_and_gpu,
+    cpu_and_cuda,
     float_dtypes,
     get_tmp_dir,
     int_dtypes,
@@ -105,7 +105,7 @@ def _test_fn_save_load(fn, tmpdir):
     _ = torch.jit.load(p)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "func,method,fn_kwargs,match_kwargs",
     [
@@ -130,7 +130,7 @@ def test_random(func, method, device, channels, fn_kwargs, match_kwargs):
 
 
 @pytest.mark.parametrize("seed", range(10))
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("channels", [1, 3])
 class TestColorJitter:
     @pytest.fixture(autouse=True)
@@ -206,7 +206,7 @@ def test_color_jitter_all(self, device, channels):
         )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("m", ["constant", "edge", "reflect", "symmetric"])
 @pytest.mark.parametrize("mul", [1, -1])
 def test_pad(m, mul, device):
@@ -229,7 +229,7 @@ def test_pad(m, mul, device):
     _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_crop(device):
     fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5}
     # Test transforms.RandomCrop with size and padding as tuple
@@ -257,7 +257,7 @@ def test_crop(device):
     _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "padding_config",
     [
@@ -283,7 +283,7 @@ def test_random_crop_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_center_crop(device, tmpdir):
     fn_kwargs = {"output_size": (4, 5)}
     meth_kwargs = {"size": (4, 5)}
@@ -313,7 +313,7 @@ def test_center_crop_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "fn, method, out_length",
     [
@@ -380,7 +380,7 @@ def test_resize_int(self, size):
         assert y.shape[1] == size
         assert y.shape[2] == int(size * 46 / 32)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64])
     @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]])
     @pytest.mark.parametrize("max_size", [None, 35, 1000])
@@ -404,7 +404,7 @@ def test_resize_save_load(self, tmpdir):
         fn = T.Resize(size=[32], antialias=True)
         _test_fn_save_load(fn, tmpdir)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]])
     @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]])
     @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]])
@@ -460,42 +460,42 @@ def test_random_affine_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("shear", [15, 10.0, (5.0, 10.0), [-15, 15], [-10.0, 10.0, -11.0, 11.0]])
 def test_random_affine_shear(device, interpolation, shear):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, shear=shear)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]])
 def test_random_affine_scale(device, interpolation, scale):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, scale=scale)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("translate", [(0.1, 0.2), [0.2, 0.1]])
 def test_random_affine_translate(device, interpolation, translate):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, translate=translate)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]])
 def test_random_affine_degrees(device, interpolation, degrees):
     _test_random_affine_helper(device, degrees=degrees, interpolation=interpolation)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_random_affine_fill(device, interpolation, fill):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("center", [(0, 0), [10, 10], None, (56, 44)])
 @pytest.mark.parametrize("expand", [True, False])
 @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]])
@@ -517,7 +517,7 @@ def test_random_rotate_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20))
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
@@ -537,7 +537,7 @@ def test_random_perspective_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "Klass, meth_kwargs",
     [(T.Grayscale, {"num_output_channels": 1}), (T.Grayscale, {"num_output_channels": 3}), (T.RandomGrayscale, {})],
@@ -547,7 +547,7 @@ def test_to_grayscale(device, Klass, meth_kwargs):
     _test_class_op(Klass, meth_kwargs=meth_kwargs, test_exact_match=False, device=device, tol=tol, agg_method="max")
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("in_dtype", int_dtypes() + float_dtypes())
 @pytest.mark.parametrize("out_dtype", int_dtypes() + float_dtypes())
 def test_convert_image_dtype(device, in_dtype, out_dtype):
@@ -578,7 +578,7 @@ def test_convert_image_dtype_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("policy", [policy for policy in T.AutoAugmentPolicy])
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_autoaugment(device, policy, fill):
@@ -592,7 +592,7 @@ def test_autoaugment(device, policy, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_ops", [1, 2, 3])
 @pytest.mark.parametrize("magnitude", [7, 9, 11])
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
@@ -607,7 +607,7 @@ def test_randaugment(device, num_ops, magnitude, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_trivialaugmentwide(device, fill):
     tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device)
@@ -620,7 +620,7 @@ def test_trivialaugmentwide(device, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_augmix(device, fill):
     tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device)
@@ -686,7 +686,7 @@ def shear(pil_img, level, mode, resample):
     _assert_approx_equal_tensor_to_pil(out, expected_out)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "config",
     [
@@ -724,7 +724,7 @@ def test_random_erasing_with_invalid_data():
         random_erasing(img)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_normalize(device, tmpdir):
     fn = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
     tensor, _ = _create_data(26, 34, device=device)
@@ -743,7 +743,7 @@ def test_normalize(device, tmpdir):
     scripted_fn.save(os.path.join(tmpdir, "t_norm.pt"))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_linear_transformation(device, tmpdir):
     c, h, w = 3, 24, 32
 
@@ -769,7 +769,7 @@ def test_linear_transformation(device, tmpdir):
     scripted_fn.save(os.path.join(tmpdir, "t_norm.pt"))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_compose(device):
     tensor, _ = _create_data(26, 34, device=device)
     tensor = tensor.to(dtype=torch.float32) / 255.0
@@ -797,7 +797,7 @@ def test_compose(device):
         torch.jit.script(t)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_random_apply(device):
     tensor, _ = _create_data(26, 34, device=device)
     tensor = tensor.to(dtype=torch.float32) / 255.0
@@ -839,7 +839,7 @@ def test_random_apply(device):
             torch.jit.script(transforms)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "meth_kwargs",
     [
@@ -877,7 +877,7 @@ def test_gaussian_blur(device, channels, meth_kwargs):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "fill",
     [
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index 02e3e1e569a..935d25edd6d 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -16,7 +16,7 @@
 from common_utils import (
     assert_equal,
     assert_run_python_script,
-    cpu_and_gpu,
+    cpu_and_cuda,
     make_bounding_box,
     make_bounding_boxes,
     make_detection_mask,
@@ -173,7 +173,7 @@ class TestSmoke:
             next(make_vanilla_tensor_images()),
         ],
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_common(self, transform, adapter, container_type, image_or_video, device):
         spatial_size = F.get_spatial_size(image_or_video)
         input = dict(
@@ -1364,7 +1364,7 @@ def test_assertions(self):
 
 
 class TestRandomIoUCrop:
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
     def test__get_params(self, device, options, mocker):
         image = mocker.MagicMock(spec=datapoints.Image)
@@ -1711,8 +1711,6 @@ def test_antialias_warning():
     tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8)
 
     match = "The default value of the antialias parameter"
-    with pytest.warns(UserWarning, match=match):
-        transforms.Resize((20, 20))(tensor_img)
     with pytest.warns(UserWarning, match=match):
         transforms.RandomResizedCrop((20, 20))(tensor_img)
     with pytest.warns(UserWarning, match=match):
@@ -1722,18 +1720,6 @@ def test_antialias_warning():
     with pytest.warns(UserWarning, match=match):
         transforms.RandomResize(10, 20)(tensor_img)
 
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_img, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20))
-
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_video, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_video(tensor_video, (20, 20))
-
-    with pytest.warns(UserWarning, match=match):
-        datapoints.Image(tensor_img).resize((20, 20))
     with pytest.warns(UserWarning, match=match):
         datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
 
@@ -1744,27 +1730,17 @@ def test_antialias_warning():
 
     with warnings.catch_warnings():
         warnings.simplefilter("error")
-        transforms.Resize((20, 20))(pil_img)
         transforms.RandomResizedCrop((20, 20))(pil_img)
         transforms.ScaleJitter((20, 20))(pil_img)
         transforms.RandomShortestSize((20, 20))(pil_img)
         transforms.RandomResize(10, 20)(pil_img)
-        transforms.functional.resize(pil_img, (20, 20))
 
-        transforms.Resize((20, 20), antialias=True)(tensor_img)
         transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img)
         transforms.ScaleJitter((20, 20), antialias=True)(tensor_img)
         transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img)
         transforms.RandomResize(10, 20, antialias=True)(tensor_img)
 
-        transforms.functional.resize(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize(tensor_video, (20, 20), antialias=True)
-        transforms.functional.resize_video(tensor_video, (20, 20), antialias=True)
-
-        datapoints.Image(tensor_img).resize((20, 20), antialias=True)
         datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
-        datapoints.Video(tensor_video).resize((20, 20), antialias=True)
         datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
 
 
diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py
index e541feaf1eb..f035dde45ed 100644
--- a/test/test_transforms_v2_consistency.py
+++ b/test/test_transforms_v2_consistency.py
@@ -22,6 +22,7 @@
     make_image,
     make_images,
     make_segmentation_mask,
+    set_rng_seed,
 )
 from torch import nn
 from torchvision import datapoints, transforms as legacy_transforms
@@ -35,6 +36,12 @@
 DEFAULT_MAKE_IMAGES_KWARGS = dict(color_spaces=["RGB"], extra_dims=[(4,)])
 
 
+@pytest.fixture(autouse=True)
+def fix_rng_seed():
+    set_rng_seed(0)
+    yield
+
+
 class NotScriptableArgsKwargs(ArgsKwargs):
     """
     This class is used to mark parameters that render the transform non-scriptable. They still work in eager mode and
diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
index 60a06f571b1..9a2ea37a4ae 100644
--- a/test/test_transforms_v2_functional.py
+++ b/test/test_transforms_v2_functional.py
@@ -14,7 +14,7 @@
 from common_utils import (
     assert_close,
     cache,
-    cpu_and_gpu,
+    cpu_and_cuda,
     DEFAULT_SQUARE_SPATIAL_SIZE,
     make_bounding_boxes,
     needs_cuda,
@@ -120,7 +120,7 @@ class TestKernels:
         [info for info in KERNEL_INFOS if info.logs_usage],
         args_kwargs_fn=lambda info: info.sample_inputs_fn(),
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_logging(self, spy_on, info, args_kwargs, device):
         spy = spy_on(torch._C._log_api_usage_once)
 
@@ -131,7 +131,7 @@ def test_logging(self, spy_on, info, args_kwargs, device):
 
     @ignore_jit_warning_no_profile
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_scripted_vs_eager(self, test_id, info, args_kwargs, device):
         kernel_eager = info.kernel
         kernel_scripted = script(kernel_eager)
@@ -167,7 +167,7 @@ def _unbatch(self, batch, *, data_dims):
         ]
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_batched_vs_single(self, test_id, info, args_kwargs, device):
         (batched_input, *other_args), kwargs = args_kwargs.load(device)
 
@@ -208,7 +208,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
         )
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_no_inplace(self, info, args_kwargs, device):
         (input, *other_args), kwargs = args_kwargs.load(device)
         input = input.as_subclass(torch.Tensor)
@@ -240,7 +240,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs):
         )
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_dtype_and_device_consistency(self, info, args_kwargs, device):
         (input, *other_args), kwargs = args_kwargs.load(device)
         input = input.as_subclass(torch.Tensor)
@@ -320,7 +320,7 @@ class TestDispatchers:
         DISPATCHER_INFOS,
         args_kwargs_fn=lambda info: info.sample_inputs(),
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_logging(self, spy_on, info, args_kwargs, device):
         spy = spy_on(torch._C._log_api_usage_once)
 
@@ -331,7 +331,7 @@ def test_logging(self, spy_on, info, args_kwargs, device):
 
     @ignore_jit_warning_no_profile
     @image_sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_scripted_smoke(self, info, args_kwargs, device):
         dispatcher = script(info.dispatcher)
 
@@ -553,7 +553,7 @@ def test_alias(alias, target):
         args_kwargs_fn=lambda info: info.sample_inputs_fn(),
     ),
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     (input, *other_args), kwargs = args_kwargs.load(device)
     dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32)
@@ -564,7 +564,7 @@ def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     assert output.device == input.device
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_channels", [1, 3])
 def test_normalize_image_tensor_stats(device, num_channels):
     stats = pytest.importorskip("scipy.stats", reason="SciPy is not available")
@@ -664,7 +664,7 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_):
     return true_matrix
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_affine_bounding_box_on_fixed_input(device):
     # Check transformation against known expected output
     format = datapoints.BoundingBoxFormat.XYXY
@@ -715,7 +715,7 @@ def test_correctness_affine_bounding_box_on_fixed_input(device):
     torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_affine_segmentation_mask_on_fixed_input(device):
     # Check transformation against known expected output and CPU/CUDA devices
 
@@ -820,7 +820,7 @@ def _compute_expected_bbox(bbox, angle_, expand_, center_):
         torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("expand", [False])  # expand=True does not match D2
 def test_correctness_rotate_bounding_box_on_fixed_input(device, expand):
     # Check transformation against known expected output
@@ -876,7 +876,7 @@ def test_correctness_rotate_bounding_box_on_fixed_input(device, expand):
     torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
     # Check transformation against known expected output and CPU/CUDA devices
 
@@ -892,7 +892,7 @@ def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
     [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
@@ -949,7 +949,7 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width,
     torch.testing.assert_close(output_spatial_size, spatial_size)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_horizontal_flip_segmentation_mask_on_fixed_input(device):
     mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device)
     mask[:, :, 0] = 1
@@ -961,7 +961,7 @@ def test_correctness_horizontal_flip_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
     mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device)
     mask[:, 0, :] = 1
@@ -973,7 +973,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
     [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
@@ -1032,7 +1032,7 @@ def _parse_padding(padding):
     return padding
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]])
 def test_correctness_pad_bounding_box(device, padding):
     def _compute_expected_bbox(bbox, padding_):
@@ -1087,7 +1087,7 @@ def _compute_expected_spatial_size(bbox, padding_):
         torch.testing.assert_close(output_boxes, expected_bboxes, atol=1, rtol=0)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_pad_segmentation_mask_on_fixed_input(device):
     mask = torch.ones((1, 3, 3), dtype=torch.long, device=device)
 
@@ -1098,7 +1098,7 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "startpoints, endpoints",
     [
@@ -1182,7 +1182,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):
         torch.testing.assert_close(output_bboxes, expected_bboxes, rtol=0, atol=1)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "output_size",
     [(18, 18), [18, 15], (16, 19), [12], [46, 48]],
@@ -1236,7 +1236,7 @@ def _compute_expected_bbox(bbox, output_size_):
         torch.testing.assert_close(output_spatial_size, output_size)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("output_size", [[4, 2], [4], [7, 6]])
 def test_correctness_center_crop_mask(device, output_size):
     def _compute_expected_mask(mask, output_size):
@@ -1260,7 +1260,7 @@ def _compute_expected_mask(mask, output_size):
 
 
 # Copied from test/test_functional_tensor.py
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("spatial_size", ("small", "large"))
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)])
@@ -1357,7 +1357,7 @@ def test_equalize_image_tensor_edge_cases():
     assert output.unique().tolist() == [0, 255]
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_uniform_temporal_subsample(device):
     video = torch.arange(10, device=device)[:, None, None, None].expand(-1, 3, 8, 8)
     out_video = F.uniform_temporal_subsample(video, 5)
diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
new file mode 100644
index 00000000000..002da24ac89
--- /dev/null
+++ b/test/test_transforms_v2_refactored.py
@@ -0,0 +1,728 @@
+import contextlib
+import inspect
+import re
+from typing import get_type_hints
+from unittest import mock
+
+import numpy as np
+import PIL.Image
+import pytest
+
+import torch
+import torchvision.transforms.v2 as transforms
+from common_utils import (
+    assert_equal,
+    assert_no_warnings,
+    cache,
+    cpu_and_cuda,
+    ignore_jit_no_profile_information_warning,
+    make_bounding_box,
+    make_detection_mask,
+    make_image,
+    make_segmentation_mask,
+    make_video,
+    set_rng_seed,
+)
+from torch.testing import assert_close
+from torchvision import datapoints
+from torchvision.transforms.functional import pil_modes_mapping
+from torchvision.transforms.v2 import functional as F
+
+
+@pytest.fixture(autouse=True)
+def fix_rng_seed():
+    set_rng_seed(0)
+    yield
+
+
+def _to_tolerances(maybe_tolerance_dict):
+    if not isinstance(maybe_tolerance_dict, dict):
+        return dict(rtol=None, atol=None)
+
+    tolerances = dict(rtol=0, atol=0)
+    tolerances.update(maybe_tolerance_dict)
+    return tolerances
+
+
+def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces closes results for inputs on GPU and CPU."""
+    if input.device.type != "cuda":
+        return
+
+    input_cuda = input.as_subclass(torch.Tensor)
+    input_cpu = input_cuda.to("cpu")
+
+    actual = kernel(input_cuda, *args, **kwargs)
+    expected = kernel(input_cpu, *args, **kwargs)
+
+    assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol)
+
+
+@cache
+def _script(fn):
+    try:
+        return torch.jit.script(fn)
+    except Exception as error:
+        raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error
+
+
+def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel is scriptable and if the scripted output is close to the eager one."""
+    if input.device.type != "cpu":
+        return
+
+    kernel_scripted = _script(kernel)
+
+    input = input.as_subclass(torch.Tensor)
+    with ignore_jit_no_profile_information_warning():
+        actual = kernel_scripted(input, *args, **kwargs)
+    expected = kernel(input, *args, **kwargs)
+
+    assert_close(actual, expected, rtol=rtol, atol=atol)
+
+
+def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces close results for batched and unbatched inputs."""
+    unbatched_input = input.as_subclass(torch.Tensor)
+
+    for batch_dims in [(2,), (2, 1)]:
+        repeats = [*batch_dims, *[1] * input.ndim]
+
+        actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs)
+
+        expected = kernel(unbatched_input, *args, **kwargs)
+        # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata
+        if isinstance(expected, torch.Tensor):
+            expected = expected.repeat(repeats)
+        else:
+            tensor, *metadata = expected
+            expected = (tensor.repeat(repeats), *metadata)
+
+        assert_close(actual, expected, rtol=rtol, atol=atol)
+
+    for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]:
+        degenerate_batched_input = torch.empty(
+            degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device
+        )
+
+        output = kernel(degenerate_batched_input, *args, **kwargs)
+        # Most kernels just return a tensor, but some also return some additional metadata
+        if not isinstance(output, torch.Tensor):
+            output, *_ = output
+
+        assert output.shape[: -input.ndim] == degenerate_batch_dims
+
+
+def check_kernel(
+    kernel,
+    input,
+    *args,
+    check_cuda_vs_cpu=True,
+    check_scripted_vs_eager=True,
+    check_batched_vs_unbatched=True,
+    **kwargs,
+):
+    initial_input_version = input._version
+
+    output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs)
+    # Most kernels just return a tensor, but some also return some additional metadata
+    if not isinstance(output, torch.Tensor):
+        output, *_ = output
+
+    # check that no inplace operation happened
+    assert input._version == initial_input_version
+
+    assert output.dtype == input.dtype
+    assert output.device == input.device
+
+    if check_cuda_vs_cpu:
+        _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu))
+
+    if check_scripted_vs_eager:
+        _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager))
+
+    if check_batched_vs_unbatched:
+        _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched))
+
+
+def _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs):
+    """Checks if the dispatcher can be scripted and the scripted version can be called without error."""
+    if not isinstance(input, datapoints.Image):
+        return
+
+    dispatcher_scripted = _script(dispatcher)
+    with ignore_jit_no_profile_information_warning():
+        dispatcher_scripted(input.as_subclass(torch.Tensor), *args, **kwargs)
+
+
+def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs):
+    """Checks if the dispatcher correctly dispatches the input to the corresponding kernel and that the input type is
+    preserved in doing so. For bounding boxes also checks that the format is preserved.
+    """
+    if isinstance(input, datapoints._datapoint.Datapoint):
+        # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly,
+        # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel.
+        spy = mock.MagicMock(wraps=kernel)
+        with mock.patch.object(F, kernel.__name__, spy):
+            # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class.
+            # Since that is not the case here, we need to prefix f"_{cls.__name__}"
+            # See https://docs.python.org/3/tutorial/classes.html#private-variables for details
+            with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F):
+                output = dispatcher(input, *args, **kwargs)
+
+        spy.assert_called_once()
+    else:
+        with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy:
+            output = dispatcher(input, *args, **kwargs)
+
+            spy.assert_called_once()
+
+    assert isinstance(output, type(input))
+
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+
+
+def check_dispatcher(
+    dispatcher,
+    kernel,
+    input,
+    *args,
+    check_scripted_smoke=True,
+    check_dispatch=True,
+    **kwargs,
+):
+    with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy:
+        dispatcher(input, *args, **kwargs)
+
+        spy.assert_any_call(f"{dispatcher.__module__}.{dispatcher.__name__}")
+
+    unknown_input = object()
+    with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))):
+        dispatcher(unknown_input, *args, **kwargs)
+
+    if check_scripted_smoke:
+        _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs)
+
+    if check_dispatch:
+        _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs)
+
+
+def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type):
+    """Checks if the signature of the dispatcher matches the kernel signature."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+
+    kernel_signature = inspect.signature(kernel)
+    kernel_params = list(kernel_signature.parameters.values())[1:]
+
+    if issubclass(input_type, datapoints._datapoint.Datapoint):
+        # We filter out metadata that is implicitly passed to the dispatcher through the input datapoint, but has to be
+        # explicitly passed to the kernel.
+        kernel_params = [param for param in kernel_params if param.name not in input_type.__annotations__.keys()]
+
+    dispatcher_params = iter(dispatcher_params)
+    for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params):
+        try:
+            # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out
+            # dispatcher parameters that have no kernel equivalent while keeping the order intact.
+            while dispatcher_param.name != kernel_param.name:
+                dispatcher_param = next(dispatcher_params)
+        except StopIteration:
+            raise AssertionError(
+                f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` "
+                f"has no corresponding parameter on the dispatcher `{dispatcher.__name__}`."
+            ) from None
+
+        if issubclass(input_type, PIL.Image.Image):
+            # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check
+            # them in the first place.
+            dispatcher_param._annotation = kernel_param._annotation = inspect.Parameter.empty
+
+        assert dispatcher_param == kernel_param
+
+
+def _check_dispatcher_datapoint_signature_match(dispatcher):
+    """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+
+    datapoint_method = getattr(datapoints._datapoint.Datapoint, dispatcher.__name__)
+    datapoint_signature = inspect.signature(datapoint_method)
+    datapoint_params = list(datapoint_signature.parameters.values())[1:]
+
+    # Some annotations in the `datapoints._datapoint` module
+    # are stored as strings. The block below makes them concrete again (non-strings), so they can be compared to the
+    # natively concrete dispatcher annotations.
+    datapoint_annotations = get_type_hints(datapoint_method)
+    for param in datapoint_params:
+        param._annotation = datapoint_annotations[param.name]
+
+    assert dispatcher_params == datapoint_params
+
+
+def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type):
+    _check_dispatcher_kernel_signature_match(dispatcher, kernel=kernel, input_type=input_type)
+    _check_dispatcher_datapoint_signature_match(dispatcher)
+
+
+def _check_transform_v1_compatibility(transform, input):
+    """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static
+    ``get_params`` method, is scriptable, and the scripted version can be called without error."""
+    if not hasattr(transform, "_v1_transform_cls"):
+        return
+
+    if type(input) is not torch.Tensor:
+        return
+
+    if hasattr(transform._v1_transform_cls, "get_params"):
+        assert type(transform).get_params is transform._v1_transform_cls.get_params
+
+    scripted_transform = _script(transform)
+    with ignore_jit_no_profile_information_warning():
+        scripted_transform(input)
+
+
+def check_transform(transform_cls, input, *args, **kwargs):
+    transform = transform_cls(*args, **kwargs)
+
+    output = transform(input)
+    assert isinstance(output, type(input))
+
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+
+    _check_transform_v1_compatibility(transform, input)
+
+
+def transform_cls_to_functional(transform_cls):
+    def wrapper(input, *args, **kwargs):
+        transform = transform_cls(*args, **kwargs)
+        return transform(input)
+
+    wrapper.__name__ = transform_cls.__name__
+
+    return wrapper
+
+
+# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well
+INTERPOLATION_MODES = [
+    transforms.InterpolationMode.NEAREST,
+    transforms.InterpolationMode.NEAREST_EXACT,
+    transforms.InterpolationMode.BILINEAR,
+    transforms.InterpolationMode.BICUBIC,
+]
+
+
+@contextlib.contextmanager
+def assert_warns_antialias_default_value():
+    with pytest.warns(UserWarning, match="The default value of the antialias parameter of all the resizing transforms"):
+        yield
+
+
+def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix):
+    def transform(bbox, affine_matrix_, format_, spatial_size_):
+        # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
+        in_dtype = bbox.dtype
+        if not torch.is_floating_point(bbox):
+            bbox = bbox.float()
+        bbox_xyxy = F.convert_format_bounding_box(
+            bbox.as_subclass(torch.Tensor),
+            old_format=format_,
+            new_format=datapoints.BoundingBoxFormat.XYXY,
+            inplace=True,
+        )
+        points = np.array(
+            [
+                [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
+            ]
+        )
+        transformed_points = np.matmul(points, affine_matrix_.T)
+        out_bbox = torch.tensor(
+            [
+                np.min(transformed_points[:, 0]).item(),
+                np.min(transformed_points[:, 1]).item(),
+                np.max(transformed_points[:, 0]).item(),
+                np.max(transformed_points[:, 1]).item(),
+            ],
+            dtype=bbox_xyxy.dtype,
+        )
+        out_bbox = F.convert_format_bounding_box(
+            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format_, inplace=True
+        )
+        # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
+        out_bbox = F.clamp_bounding_box(out_bbox, format=format_, spatial_size=spatial_size_)
+        out_bbox = out_bbox.to(dtype=in_dtype)
+        return out_bbox
+
+    if bounding_box.ndim < 2:
+        bounding_box = [bounding_box]
+
+    expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bounding_box]
+    if len(expected_bboxes) > 1:
+        expected_bboxes = torch.stack(expected_bboxes)
+    else:
+        expected_bboxes = expected_bboxes[0]
+
+    return expected_bboxes
+
+
+class TestResize:
+    INPUT_SIZE = (17, 11)
+    OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]
+
+    def _make_max_size_kwarg(self, *, use_max_size, size):
+        if use_max_size:
+            if not (isinstance(size, int) or len(size) == 1):
+                # This would result in an `ValueError`
+                return None
+
+            max_size = (size if isinstance(size, int) else size[0]) + 1
+        else:
+            max_size = None
+
+        return dict(max_size=max_size)
+
+    def _make_input(self, input_type, *, dtype=None, device="cpu", **kwargs):
+        if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}:
+            input = make_image(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+            if input_type is torch.Tensor:
+                input = input.as_subclass(torch.Tensor)
+            elif input_type is PIL.Image.Image:
+                input = F.to_image_pil(input)
+        elif input_type is datapoints.BoundingBox:
+            kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY)
+            input = make_bounding_box(
+                spatial_size=self.INPUT_SIZE,
+                dtype=dtype or torch.float32,
+                device=device,
+                **kwargs,
+            )
+        elif input_type is datapoints.Mask:
+            input = make_segmentation_mask(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+        elif input_type is datapoints.Video:
+            input = make_video(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+
+        return input
+
+    def _compute_output_size(self, *, input_size, size, max_size):
+        if not (isinstance(size, int) or len(size) == 1):
+            return tuple(size)
+
+        if not isinstance(size, int):
+            size = size[0]
+
+        old_height, old_width = input_size
+        ratio = old_width / old_height
+        if ratio > 1:
+            new_height = size
+            new_width = int(ratio * new_height)
+        else:
+            new_width = size
+            new_height = int(new_width / ratio)
+
+        if max_size is not None and max(new_height, new_width) > max_size:
+            # Need to recompute the aspect ratio, since it might have changed due to rounding
+            ratio = new_width / new_height
+            if ratio > 1:
+                new_width = max_size
+                new_height = int(new_width / ratio)
+            else:
+                new_height = max_size
+                new_width = int(new_height * ratio)
+
+        return new_height, new_width
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("antialias", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA.
+        # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that.
+        atol = 30 if transforms.InterpolationMode.BICUBIC and dtype is torch.uint8 else 1
+        check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol)
+
+        check_kernel(
+            F.resize_image_tensor,
+            self._make_input(datapoints.Image, dtype=dtype, device=device),
+            size=size,
+            interpolation=interpolation,
+            **max_size_kwarg,
+            antialias=antialias,
+            check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        bounding_box = self._make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format)
+        check_kernel(
+            F.resize_bounding_box,
+            bounding_box,
+            spatial_size=bounding_box.spatial_size,
+            size=size,
+            **max_size_kwarg,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize(
+        "dtype_and_make_mask", [(torch.uint8, make_segmentation_mask), (torch.bool, make_detection_mask)]
+    )
+    def test_kernel_mask(self, dtype_and_make_mask):
+        dtype, make_mask = dtype_and_make_mask
+        check_kernel(F.resize_mask, make_mask(dtype=dtype), size=self.OUTPUT_SIZES[-1])
+
+    def test_kernel_video(self):
+        check_kernel(F.resize_video, self._make_input(datapoints.Video), size=self.OUTPUT_SIZES[-1], antialias=True)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        "input_type_and_kernel",
+        [
+            (torch.Tensor, F.resize_image_tensor),
+            (PIL.Image.Image, F.resize_image_pil),
+            (datapoints.Image, F.resize_image_tensor),
+            (datapoints.BoundingBox, F.resize_bounding_box),
+            (datapoints.Mask, F.resize_mask),
+            (datapoints.Video, F.resize_video),
+        ],
+    )
+    def test_dispatcher(self, size, input_type_and_kernel):
+        input_type, kernel = input_type_and_kernel
+        check_dispatcher(
+            F.resize,
+            kernel,
+            self._make_input(input_type),
+            size=size,
+            antialias=True,
+            check_scripted_smoke=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize(
+        ("input_type", "kernel"),
+        [
+            (torch.Tensor, F.resize_image_tensor),
+            (PIL.Image.Image, F.resize_image_pil),
+            (datapoints.Image, F.resize_image_tensor),
+            (datapoints.BoundingBox, F.resize_bounding_box),
+            (datapoints.Mask, F.resize_mask),
+            (datapoints.Video, F.resize_video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.resize, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_transform(self, size, device, input_type):
+        input = self._make_input(input_type, device=device)
+
+        check_transform(
+            transforms.Resize,
+            input,
+            size=size,
+            antialias=True,
+        )
+
+    def _check_output_size(self, input, output, *, size, max_size):
+        assert tuple(F.get_spatial_size(output)) == self._compute_output_size(
+            input_size=F.get_spatial_size(input), size=size, max_size=max_size
+        )
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2.
+    # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT`
+    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_image_correctness(self, size, interpolation, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        image = self._make_input(torch.Tensor, dtype=torch.uint8, device="cpu")
+
+        actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True)
+        expected = F.to_image_tensor(
+            F.resize(F.to_image_pil(image), size=size, interpolation=interpolation, **max_size_kwarg)
+        )
+
+        self._check_output_size(image, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected, atol=1, rtol=0)
+
+    def _reference_resize_bounding_box(self, bounding_box, *, size, max_size=None):
+        old_height, old_width = bounding_box.spatial_size
+        new_height, new_width = self._compute_output_size(
+            input_size=bounding_box.spatial_size, size=size, max_size=max_size
+        )
+
+        if (old_height, old_width) == (new_height, new_width):
+            return bounding_box
+
+        affine_matrix = np.array(
+            [
+                [new_width / old_width, 0, 0],
+                [0, new_height / old_height, 0],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=(new_height, new_width),
+            affine_matrix=affine_matrix,
+        )
+        return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes, spatial_size=(new_height, new_width))
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_bounding_box_correctness(self, format, size, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        bounding_box = self._make_input(datapoints.BoundingBox)
+
+        actual = fn(bounding_box, size=size, **max_size_kwarg)
+        expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg)
+
+        self._check_output_size(bounding_box, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES))
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video],
+    )
+    def test_pil_interpolation_compat_smoke(self, interpolation, input_type):
+        input = self._make_input(input_type)
+
+        with (
+            contextlib.nullcontext()
+            if isinstance(input, PIL.Image.Image)
+            # This error is triggered in PyTorch core
+            else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}")
+        ):
+            F.resize(
+                input,
+                size=self.OUTPUT_SIZES[0],
+                interpolation=interpolation,
+            )
+
+    def test_dispatcher_pil_antialias_warning(self):
+        with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"):
+            F.resize(self._make_input(PIL.Image.Image), size=self.OUTPUT_SIZES[0], antialias=False)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_max_size_error(self, size, input_type):
+        if isinstance(size, int) or len(size) == 1:
+            max_size = (size if isinstance(size, int) else size[0]) - 1
+            match = "must be strictly greater than the requested size"
+        else:
+            # value can be anything other than None
+            max_size = -1
+            match = "size should be an int or a sequence of length 1"
+
+        with pytest.raises(ValueError, match=match):
+            F.resize(self._make_input(input_type), size=size, max_size=max_size, antialias=True)
+
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, datapoints.Image, datapoints.Video],
+    )
+    def test_antialias_warning(self, interpolation, input_type):
+        with (
+            assert_warns_antialias_default_value()
+            if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC}
+            else assert_no_warnings()
+        ):
+            F.resize(self._make_input(input_type), size=self.OUTPUT_SIZES[0], interpolation=interpolation)
+
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video],
+    )
+    def test_interpolation_int(self, interpolation, input_type):
+        # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to
+        # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a
+        # difference and thus we don't test it here.
+        if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT:
+            return
+
+        input = self._make_input(input_type)
+
+        expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True)
+        actual = F.resize(
+            input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True
+        )
+
+        assert_equal(actual, expected)
+
+    def test_transform_unknown_size_error(self):
+        with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"):
+            transforms.Resize(size=object())
+
+    @pytest.mark.parametrize(
+        "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)]
+    )
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_noop(self, size, input_type):
+        input = self._make_input(input_type)
+
+        output = F.resize(input, size=size, antialias=True)
+
+        # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there
+        # is a good reason to break this, feel free to downgrade to an equality check.
+        if isinstance(input, datapoints._datapoint.Datapoint):
+            # We can't test identity directly, since that checks for the identity of the Python object. Since all
+            # datapoints unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check
+            # that the underlying storage is the same
+            assert output.data_ptr() == input.data_ptr()
+        else:
+            assert output is input
+
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_no_regression_5405(self, input_type):
+        # Checks that `max_size` is not ignored if `size == small_edge_size`
+        # See https://github.com/pytorch/vision/issues/5405
+
+        input = self._make_input(input_type)
+
+        size = min(F.get_spatial_size(input))
+        max_size = size + 1
+        output = F.resize(input, size=size, max_size=max_size, antialias=True)
+
+        assert max(F.get_spatial_size(output)) == max_size
diff --git a/test/test_utils.py b/test/test_utils.py
index 32b3db59631..b13bd0f0f5b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -9,7 +9,7 @@
 import torch
 import torchvision.transforms.functional as F
 import torchvision.utils as utils
-from common_utils import assert_equal
+from common_utils import assert_equal, cpu_and_cuda
 from PIL import __version__ as PILLOW_VERSION, Image, ImageColor
 
 
@@ -203,12 +203,13 @@ def test_draw_no_boxes():
     ],
 )
 @pytest.mark.parametrize("alpha", (0, 0.5, 0.7, 1))
-def test_draw_segmentation_masks(colors, alpha):
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_segmentation_masks(colors, alpha, device):
     """This test makes sure that masks draw their corresponding color where they should"""
     num_masks, h, w = 2, 100, 100
     dtype = torch.uint8
-    img = torch.randint(0, 256, size=(3, h, w), dtype=dtype)
-    masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool)
+    img = torch.randint(0, 256, size=(3, h, w), dtype=dtype, device=device)
+    masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool, device=device)
 
     # For testing we enforce that there's no overlap between the masks. The
     # current behaviour is that the last mask's color will take priority when
@@ -234,7 +235,7 @@ def test_draw_segmentation_masks(colors, alpha):
     for mask, color in zip(masks, colors):
         if isinstance(color, str):
             color = ImageColor.getrgb(color)
-        color = torch.tensor(color, dtype=dtype)
+        color = torch.tensor(color, dtype=dtype, device=device)
 
         if alpha == 1:
             assert (out[:, mask] == color[:, None]).all()
@@ -245,11 +246,12 @@ def test_draw_segmentation_masks(colors, alpha):
         torch.testing.assert_close(out[:, mask], interpolated_color, rtol=0.0, atol=1.0)
 
 
-def test_draw_segmentation_masks_errors():
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_segmentation_masks_errors(device):
     h, w = 10, 10
 
-    masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool)
-    img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8)
+    masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool, device=device)
+    img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8, device=device)
 
     with pytest.raises(TypeError, match="The image must be a tensor"):
         utils.draw_segmentation_masks(image="Not A Tensor Image", masks=masks)
@@ -281,9 +283,10 @@ def test_draw_segmentation_masks_errors():
         utils.draw_segmentation_masks(image=img, masks=masks, colors=bad_colors)
 
 
-def test_draw_no_segmention_mask():
-    img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
-    masks = torch.full((0, 100, 100), 0, dtype=torch.bool)
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_no_segmention_mask(device):
+    img = torch.full((3, 100, 100), 0, dtype=torch.uint8, device=device)
+    masks = torch.full((0, 100, 100), 0, dtype=torch.bool, device=device)
     with pytest.warns(UserWarning, match=re.escape("masks doesn't contain any mask. No mask was drawn")):
         res = utils.draw_segmentation_masks(img, masks)
         # Check that the function didn't change the image
diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py
index 1d9dd025254..cb1bc257e50 100644
--- a/test/transforms_v2_dispatcher_infos.py
+++ b/test/transforms_v2_dispatcher_infos.py
@@ -148,19 +148,6 @@ def fill_sequence_needs_broadcast(args_kwargs):
         },
         pil_kernel_info=PILKernelInfo(F.horizontal_flip_image_pil, kernel_name="horizontal_flip_image_pil"),
     ),
-    DispatcherInfo(
-        F.resize,
-        kernels={
-            datapoints.Image: F.resize_image_tensor,
-            datapoints.Video: F.resize_video,
-            datapoints.BoundingBox: F.resize_bounding_box,
-            datapoints.Mask: F.resize_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.resize_image_pil),
-        test_marks=[
-            xfail_jit_python_scalar_arg("size"),
-        ],
-    ),
     DispatcherInfo(
         F.affine,
         kernels={
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
index 7b877fb092d..547e708b726 100644
--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -238,179 +238,6 @@ def reference_inputs_flip_bounding_box():
 )
 
 
-def _get_resize_sizes(spatial_size):
-    height, width = spatial_size
-    length = max(spatial_size)
-    yield length
-    yield [length]
-    yield (length,)
-    new_height = int(height * 0.75)
-    new_width = int(width * 1.25)
-    yield [new_height, new_width]
-    yield height, width
-
-
-def sample_inputs_resize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(image_loader, size=size)
-
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]),
-        [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR],
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-
-    yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25)
-
-
-def sample_inputs_resize_image_tensor_bicubic():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC]
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-
-
-@pil_reference_wrapper
-def reference_resize_image_tensor(*args, **kwargs):
-    if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in {
-        F.InterpolationMode.BILINEAR,
-        F.InterpolationMode.BICUBIC,
-    }:
-        raise pytest.UsageError("Anti-aliasing is always active in PIL")
-    return F.resize_image_pil(*args, **kwargs)
-
-
-def reference_inputs_resize_image_tensor():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders_for_interpolation(),
-        [
-            F.InterpolationMode.NEAREST,
-            F.InterpolationMode.NEAREST_EXACT,
-            F.InterpolationMode.BILINEAR,
-            F.InterpolationMode.BICUBIC,
-        ],
-    ):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(
-                image_loader,
-                size=size,
-                interpolation=interpolation,
-                antialias=interpolation
-                in {
-                    F.InterpolationMode.BILINEAR,
-                    F.InterpolationMode.BICUBIC,
-                },
-            )
-
-
-def sample_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders():
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, spatial_size=bounding_box_loader.spatial_size, size=size)
-
-
-def sample_inputs_resize_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
-        yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])
-
-
-def sample_inputs_resize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
-
-
-def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=None):
-    old_height, old_width = spatial_size
-    new_height, new_width = F._geometry._compute_resized_output_size(spatial_size, size=size, max_size=max_size)
-
-    if (old_height, old_width) == (new_height, new_width):
-        return bounding_box, (old_height, old_width)
-
-    affine_matrix = np.array(
-        [
-            [new_width / old_width, 0, 0],
-            [0, new_height / old_height, 0],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box,
-        format=bounding_box.format,
-        spatial_size=(new_height, new_width),
-        affine_matrix=affine_matrix,
-    )
-    return expected_bboxes, (new_height, new_width)
-
-
-def reference_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(extra_dims=((), (4,))):
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size)
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(atol=30),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_bounding_box,
-            sample_inputs_fn=sample_inputs_resize_bounding_box,
-            reference_fn=reference_resize_bounding_box,
-            reference_inputs_fn=reference_inputs_resize_bounding_box,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_mask,
-            sample_inputs_fn=sample_inputs_resize_mask,
-            closeness_kwargs=pil_reference_pixel_difference(10),
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_video,
-            sample_inputs_fn=sample_inputs_resize_video,
-            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
-        ),
-    ]
-)
-
-
 _AFFINE_KWARGS = combinations_grid(
     angle=[-87, 15, 90],
     translate=[(5, 5), (-5, -5)],
diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp
index b1ceaf1badd..d27eafe45a7 100644
--- a/torchvision/csrc/io/image/cpu/decode_png.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_png.cpp
@@ -49,6 +49,7 @@ torch::Tensor decode_png(
     png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
     TORCH_CHECK(false, "Internal error.");
   }
+  TORCH_CHECK(datap_len >= 8, "Content is too small for png!")
   auto is_png = !png_sig_cmp(datap, 0, 8);
   TORCH_CHECK(is_png, "Content is not png!")
 
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 220c1ae79d5..b79b4ef4e61 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str:
     else:
         md5 = hashlib.md5()
     with open(fpath, "rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
+        while chunk := f.read(chunk_size):
             md5.update(chunk)
     return md5.hexdigest()
 
diff --git a/torchvision/models/_api.py b/torchvision/models/_api.py
index 51db5c0b23e..e244207a8ed 100644
--- a/torchvision/models/_api.py
+++ b/torchvision/models/_api.py
@@ -122,7 +122,9 @@ def get_weight(name: str) -> WeightsEnum:
     base_module_name = ".".join(sys.modules[__name__].__name__.split(".")[:-1])
     base_module = importlib.import_module(base_module_name)
     model_modules = [base_module] + [
-        x[1] for x in inspect.getmembers(base_module, inspect.ismodule) if x[1].__file__.endswith("__init__.py")
+        x[1]
+        for x in inspect.getmembers(base_module, inspect.ismodule)
+        if x[1].__file__.endswith("__init__.py")  # type: ignore[union-attr]
     ]
 
     weights_enum = None
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index aa520e14962..1041d4d149f 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -378,7 +378,7 @@ def mobilenet_v3_large(
             weights are used.
         progress (bool, optional): If True, displays a progress bar of the
             download to stderr. Default is True.
-        **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
+        **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
             base class. Please refer to the `source code
             <https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
             for more details about this class.
@@ -409,7 +409,7 @@ def mobilenet_v3_small(
             weights are used.
         progress (bool, optional): If True, displays a progress bar of the
             download to stderr. Default is True.
-        **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
+        **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
             base class. Please refer to the `source code
             <https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
             for more details about this class.
diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py
index 52b85244b3d..3f3322b7a88 100644
--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -35,7 +35,7 @@ def channel_shuffle(x: Tensor, groups: int) -> Tensor:
     x = torch.transpose(x, 1, 2).contiguous()
 
     # flatten
-    x = x.view(batchsize, -1, height, width)
+    x = x.view(batchsize, num_channels, height, width)
 
     return x
 
diff --git a/torchvision/prototype/datasets/_builtin/README.md b/torchvision/prototype/datasets/_builtin/README.md
index 05d61c6870e..3b33100eb81 100644
--- a/torchvision/prototype/datasets/_builtin/README.md
+++ b/torchvision/prototype/datasets/_builtin/README.md
@@ -91,7 +91,7 @@ import hashlib
 def sha256sum(path, chunk_size=1024 * 1024):
     checksum = hashlib.sha256()
     with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
+        while chunk := f.read(chunk_size):
             checksum.update(chunk)
     print(checksum.hexdigest())
 ```
diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py
index af4ede38dc0..dadec014b52 100644
--- a/torchvision/prototype/datasets/utils/_resource.py
+++ b/torchvision/prototype/datasets/utils/_resource.py
@@ -136,7 +136,7 @@ def download(self, root: Union[str, pathlib.Path], *, skip_integrity_check: bool
     def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None:
         hash = hashlib.sha256()
         with open(path, "rb") as file:
-            for chunk in iter(lambda: file.read(chunk_size), b""):
+            while chunk := file.read(chunk_size):
                 hash.update(chunk)
         sha256 = hash.hexdigest()
         if sha256 != self.sha256:
diff --git a/torchvision/utils.py b/torchvision/utils.py
index 1418656a7f2..6ec19a0e0a1 100644
--- a/torchvision/utils.py
+++ b/torchvision/utils.py
@@ -304,7 +304,10 @@ def draw_segmentation_masks(
         return image
 
     out_dtype = torch.uint8
-    colors = [torch.tensor(color, dtype=out_dtype) for color in _parse_colors(colors, num_objects=num_masks)]
+    colors = [
+        torch.tensor(color, dtype=out_dtype, device=image.device)
+        for color in _parse_colors(colors, num_objects=num_masks)
+    ]
 
     img_to_draw = image.detach().clone()
     # TODO: There might be a way to vectorize this