From 40d7c8315de94fbe55c6c47046984a1562191079 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 11 Jan 2023 17:33:23 +0000
Subject: [PATCH 01/14] First round

---
 torchvision/_utils.py                         |  9 ++++++++
 torchvision/prototype/datapoints/__init__.py  |  2 +-
 .../prototype/datapoints/_datapoint.py        | 22 +++++++++++++++++--
 torchvision/prototype/transforms/_geometry.py | 10 ++++++++-
 .../prototype/transforms/_transform.py        |  4 ++++
 .../transforms/functional/__init__.py         |  5 ++++-
 .../transforms/functional/_geometry.py        | 12 +++++++++-
 torchvision/prototype/transforms/utils.py     |  2 ++
 8 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/torchvision/_utils.py b/torchvision/_utils.py
index b739ef0966e..1494a2cf000 100644
--- a/torchvision/_utils.py
+++ b/torchvision/_utils.py
@@ -4,6 +4,14 @@
 T = TypeVar("T", bound=enum.Enum)
 
 
+
+# Could we just define 
+# class ColorSpace(enum.Enum):
+#    RGB = "RGB"  # instead of auto()
+#    ...
+# and avoid this helper class altogether?
+# I sort of remember having some issue with this class before (perhaps with the
+# WeightsEnum?)
 class StrEnumMeta(enum.EnumMeta):
     auto = enum.auto
 
@@ -16,6 +24,7 @@ def from_str(self: Type[T], member: str) -> T:  # type: ignore[misc]
             raise ValueError(f"Unknown value '{member}' for {self.__name__}.") from None
 
 
+# Can this just be a simple class inherited from Enum (without the metaclass)?
 class StrEnum(enum.Enum, metaclass=StrEnumMeta):
     pass
 
diff --git a/torchvision/prototype/datapoints/__init__.py b/torchvision/prototype/datapoints/__init__.py
index 92f345e20bd..f4aeb4bc946 100644
--- a/torchvision/prototype/datapoints/__init__.py
+++ b/torchvision/prototype/datapoints/__init__.py
@@ -1,5 +1,5 @@
 from ._bounding_box import BoundingBox, BoundingBoxFormat
-from ._datapoint import FillType, FillTypeJIT, InputType, InputTypeJIT
+from ._datapoint import FillType, FillTypeJIT, InputType, InputTypeJIT  # Need to be public?
 from ._image import ColorSpace, Image, ImageType, ImageTypeJIT, TensorImageType, TensorImageTypeJIT
 from ._label import Label, OneHotLabel
 from ._mask import Mask
diff --git a/torchvision/prototype/datapoints/_datapoint.py b/torchvision/prototype/datapoints/_datapoint.py
index 659d4e958cc..de781199506 100644
--- a/torchvision/prototype/datapoints/_datapoint.py
+++ b/torchvision/prototype/datapoints/_datapoint.py
@@ -15,6 +15,8 @@
 FillTypeJIT = Union[int, float, List[float], None]
 
 
+# Perhaps it's already documented somewhere, but I think it'd be useful to
+# provide a few examples of when
 class Datapoint(torch.Tensor):
     __F: Optional[ModuleType] = None
 
@@ -41,6 +43,7 @@ def __new__(
         tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad)
         return tensor.as_subclass(Datapoint)
 
+    # Is this still needed, considering we won't be releasing the prototype datasets anytime soon?
     @classmethod
     def wrap_like(cls: Type[D], other: D, tensor: torch.Tensor) -> D:
         # FIXME: this is just here for BC with the prototype datasets. See __new__ for details. If that is resolved,
@@ -48,6 +51,8 @@ def wrap_like(cls: Type[D], other: D, tensor: torch.Tensor) -> D:
         # raise NotImplementedError
         return tensor.as_subclass(cls)
 
+    # Can things go wrong with having to maintain a different set of funcs that
+    # need special care? What if we forget to handle one properly?
     _NO_WRAPPING_EXCEPTIONS = {
         torch.Tensor.clone: lambda cls, input, output: cls.wrap_like(input, output),
         torch.Tensor.to: lambda cls, input, output: cls.wrap_like(input, output),
@@ -81,10 +86,11 @@ def __torch_function__(
         For these reasons, the automatic output wrapping is turned off for most operators. The only exceptions are
         listed in :attr:`Datapoint._NO_WRAPPING_EXCEPTIONS`
         """
+        # Is this something we've bubbled up to the core team as a potential feature request?
         # Since super().__torch_function__ has no hook to prevent the coercing of the output into the input type, we
         # need to reimplement the functionality.
 
-        if not all(issubclass(cls, t) for t in types):
+        if not all(issubclass(cls, t) for t in types):  # Curious in which cases this can be hit?
             return NotImplemented
 
         with DisableTorchFunctionSubclass():
@@ -100,6 +106,8 @@ def __torch_function__(
             if wrapper and isinstance(args[0], cls):
                 return wrapper(cls, args[0], output)  # type: ignore[no-any-return]
 
+            # Does that mean that DisableTorchFunctionSubclass is ignored for `.inpace_()` functions?
+            # Or maybe I'm misunderstanding what DisableTorchFunctionSubclass is supposed to do.
             # Inplace `func`'s, canonically identified with a trailing underscore in their name like `.add_(...)`,
             # will retain the input type. Thus, we need to unwrap here.
             if isinstance(output, cls):
@@ -107,12 +115,14 @@ def __torch_function__(
 
             return output
 
+    # Is this used?
     def _make_repr(self, **kwargs: Any) -> str:
         # This is a poor man's implementation of the proposal in https://github.com/pytorch/pytorch/issues/76532.
         # If that ever gets implemented, remove this in favor of the solution on the `torch.Tensor` class.
         extra_repr = ", ".join(f"{key}={value}" for key, value in kwargs.items())
         return f"{super().__repr__()[:-1]}, {extra_repr})"
 
+    # What cyclic import is this solving? Can it be avoided somehow?
     @property
     def _F(self) -> ModuleType:
         # This implements a lazy import of the functional to get around the cyclic import. This import is deferred
@@ -125,6 +135,13 @@ def _F(self) -> ModuleType:
             Datapoint.__F = functional
         return Datapoint.__F
 
+    
+    # doing some_tensor.dtype would go through __torch_function__?
+    #  - is this because it is implemented as a @property method?
+    #  - why do we want to bypass __torch_function__ in these case?
+    # Also, what happens if users access another attribute that we haven't
+    # overridden here, e.g. image.data or image.some_new_attribute_in_pytorch3.0?
+
     # Add properties for common attributes like shape, dtype, device, ndim etc
     # this way we return the result without passing into __torch_function__
     @property
@@ -147,6 +164,7 @@ def dtype(self) -> _dtype:  # type: ignore[override]
         with DisableTorchFunctionSubclass():
             return super().dtype
 
+    # Are these the "no-op fallbacks"?
     def horizontal_flip(self) -> Datapoint:
         return self
 
@@ -268,4 +286,4 @@ def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = N
 
 
 InputType = Union[torch.Tensor, PIL.Image.Image, Datapoint]
-InputTypeJIT = torch.Tensor
+InputTypeJIT = torch.Tensor  # why alias it?
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
index 1cbf02d5ae2..581703fdf79 100644
--- a/torchvision/prototype/transforms/_geometry.py
+++ b/torchvision/prototype/transforms/_geometry.py
@@ -40,13 +40,21 @@ class Resize(Transform):
     def __init__(
         self,
         size: Union[int, Sequence[int]],
+        # Unrelated to V2, just interested in your thoughts on this:
+        # It'd be nice to just be able to do Resize(mode="bilinear"), i.e. accept strings.
+        # Having to import the Enum class can be inconvenient when prototyping,
+        # and the discrepancy between torchvision and torch core (which accept
+        # strings) also trips users.
+        # I don't remember if this was supported in the past and then removed?
+        # Would there be any technical challenge in supporting it? (e.g.
+        # torchscript issues?)
         interpolation: InterpolationMode = InterpolationMode.BILINEAR,
         max_size: Optional[int] = None,
         antialias: Optional[bool] = None,
     ) -> None:
         super().__init__()
 
-        self.size = (
+        self.size = (  # Isn't this check duplicated in _compute_resized_output_size()?
             [size]
             if isinstance(size, int)
             else _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
diff --git a/torchvision/prototype/transforms/_transform.py b/torchvision/prototype/transforms/_transform.py
index 43224cabd38..413c3b469d2 100644
--- a/torchvision/prototype/transforms/_transform.py
+++ b/torchvision/prototype/transforms/_transform.py
@@ -4,6 +4,7 @@
 import PIL.Image
 import torch
 from torch import nn
+# I remember that the core team is open to making it public. Is still private?
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from torchvision.prototype.transforms.utils import check_type
 from torchvision.utils import _log_api_usage_once
@@ -14,6 +15,8 @@ class Transform(nn.Module):
     # Class attribute defining transformed types. Other types are passed-through without any transformation
     # We support both Types and callables that are able to do further checks on the type of the input.
     _transformed_types: Tuple[Union[Type, Callable[[Any], bool]], ...] = (torch.Tensor, PIL.Image.Image)
+    # Q: Apart from Tensors (or datapoints) and PIL images, what are the other
+    # types that one may pass to a transform?
 
     def __init__(self) -> None:
         super().__init__()
@@ -29,6 +32,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         raise NotImplementedError
 
     def forward(self, *inputs: Any) -> Any:
+        # TL;DR of the flatten -> unflatten thing?
         flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
 
         self._check_inputs(flat_inputs)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index ec2da6ee518..8574ffd5d9d 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -1,5 +1,8 @@
 # TODO: Add _log_api_usage_once() in all mid-level kernels. If they remain not jit-scriptable we can use decorators
 
+# Do we want all these low-level kernels to be public?
+# Is it mostly because they're the only ones that are fully jit-compatible, or are there other reasons?
+
 from torchvision.transforms import InterpolationMode  # usort: skip
 from ._meta import (
     clamp_bounding_box,
@@ -126,7 +129,7 @@
     perspective_image_tensor,
     perspective_mask,
     perspective_video,
-    resize,
+    resize,  # this is the "dispatcher layer" and the resize_xyz are the low level kernels?
     resize_bounding_box,
     resize_image_pil,
     resize_image_tensor,
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index ba417a0ce84..5beade0d4be 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -140,6 +140,8 @@ def _compute_resized_output_size(
     return __compute_resized_output_size(spatial_size, size=size, max_size=max_size)
 
 
+# IIUC this is mostly a rewrite of torchvision.transforms.functional_tensor.resize()?
+# Was this new version made for perf improvements, or some other reason?
 def resize_image_tensor(
     image: torch.Tensor,
     size: List[int],
@@ -182,6 +184,8 @@ def resize_image_tensor(
     return image.reshape(shape[:-3] + (num_channels, new_height, new_width))
 
 
+# This looks like a shallow wrapper around the original _FP.resize()
+# Are these wrapper common or is this a one-of?
 @torch.jit.unused
 def resize_image_pil(
     image: PIL.Image.Image,
@@ -232,6 +236,7 @@ def resize_video(
     return resize_image_tensor(video, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias)
 
 
+# Is the if/elif/else/raise structure the same in (almost?) all dispatchers?
 def resize(
     inpt: datapoints.InputTypeJIT,
     size: List[int],
@@ -244,11 +249,16 @@ def resize(
     if isinstance(inpt, torch.Tensor) and (
         torch.jit.is_scripting() or not isinstance(inpt, datapoints._datapoint.Datapoint)
     ):
+        # Do I understand correctly that resize_image_tensor() will be called if we're scripting and inpt is an Image()?
+        # It seems to suggests that this dispatcher is still jit-scriptable on datapoints (and not just on tensors)?
         return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias)
     elif isinstance(inpt, datapoints._datapoint.Datapoint):
+        # IIUC we're calling the method on the datapoint object which in turn
+        # will go back in this file and call one of the low-level kernels above.
+        # According to offline chat this is to enable extensibility in the future?
         return inpt.resize(size, interpolation=interpolation, max_size=max_size, antialias=antialias)
     elif isinstance(inpt, PIL.Image.Image):
-        if antialias is not None and not antialias:
+        if antialias is not None and not antialias:  # just check `is False` ?
             warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
         return resize_image_pil(inpt, size, interpolation=interpolation, max_size=max_size)
     else:
diff --git a/torchvision/prototype/transforms/utils.py b/torchvision/prototype/transforms/utils.py
index 9ab2ed2602b..1ebcf0efd34 100644
--- a/torchvision/prototype/transforms/utils.py
+++ b/torchvision/prototype/transforms/utils.py
@@ -55,8 +55,10 @@ def query_spatial_size(flat_inputs: List[Any]) -> Tuple[int, int]:
     return h, w
 
 
+# when can types_or_checks be a callable?
 def check_type(obj: Any, types_or_checks: Tuple[Union[Type, Callable[[Any], bool]], ...]) -> bool:
     for type_or_check in types_or_checks:
+        # That messed with my brain for a bit - add parenthesis?
         if isinstance(obj, type_or_check) if isinstance(type_or_check, type) else type_or_check(obj):
             return True
     return False

From 9edfd427cba8e5cc4cc926272ea324399fe4978a Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 12 Jan 2023 13:06:27 +0000
Subject: [PATCH 02/14] More Qs

---
 torchvision/prototype/datapoints/_datapoint.py       |  1 +
 torchvision/prototype/datapoints/_image.py           |  1 +
 torchvision/prototype/datapoints/_label.py           |  1 +
 torchvision/prototype/transforms/_deprecated.py      |  3 +++
 torchvision/prototype/transforms/_presets.py         |  3 +++
 torchvision/prototype/transforms/_type_conversion.py |  3 +++
 torchvision/prototype/transforms/_utils.py           |  5 ++++-
 torchvision/prototype/transforms/utils.py            | 11 +++++++++++
 8 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/torchvision/prototype/datapoints/_datapoint.py b/torchvision/prototype/datapoints/_datapoint.py
index de781199506..5104d98a853 100644
--- a/torchvision/prototype/datapoints/_datapoint.py
+++ b/torchvision/prototype/datapoints/_datapoint.py
@@ -123,6 +123,7 @@ def _make_repr(self, **kwargs: Any) -> str:
         return f"{super().__repr__()[:-1]}, {extra_repr})"
 
     # What cyclic import is this solving? Can it be avoided somehow?
+    # EDIT: is this because the dispatchers call the methods which in turn call back into the functional. namespace?
     @property
     def _F(self) -> ModuleType:
         # This implements a lazy import of the functional to get around the cyclic import. This import is deferred
diff --git a/torchvision/prototype/datapoints/_image.py b/torchvision/prototype/datapoints/_image.py
index fc20691100f..1720bb3a1e0 100644
--- a/torchvision/prototype/datapoints/_image.py
+++ b/torchvision/prototype/datapoints/_image.py
@@ -58,6 +58,7 @@ def _from_tensor_shape(shape: List[int]) -> ColorSpace:
 
 
 class Image(Datapoint):
+    # Where is this used / changes apart from in ConvertColorSpace()?
     color_space: ColorSpace
 
     @classmethod
diff --git a/torchvision/prototype/datapoints/_label.py b/torchvision/prototype/datapoints/_label.py
index 54915493390..abae7eaabda 100644
--- a/torchvision/prototype/datapoints/_label.py
+++ b/torchvision/prototype/datapoints/_label.py
@@ -58,6 +58,7 @@ def to_categories(self) -> Any:
         return tree_map(lambda idx: self.categories[idx], self.tolist())
 
 
+# Remind me where this is used?
 class OneHotLabel(_LabelBase):
     def __new__(
         cls,
diff --git a/torchvision/prototype/transforms/_deprecated.py b/torchvision/prototype/transforms/_deprecated.py
index 3247a8051a3..4f1cd9bf175 100644
--- a/torchvision/prototype/transforms/_deprecated.py
+++ b/torchvision/prototype/transforms/_deprecated.py
@@ -14,6 +14,8 @@
 from .utils import is_simple_tensor, query_chw
 
 
+# Considering the widespread usage of ToTensor, could this be potentially disruptive?
+# What is the cost of keeping it around as an alias for Compose(...)?
 class ToTensor(Transform):
     _transformed_types = (PIL.Image.Image, np.ndarray)
 
@@ -41,6 +43,7 @@ def __init__(self, num_output_channels: Literal[1, 3] = 1) -> None:
             f"The transform `Grayscale(num_output_channels={num_output_channels})` "
             f"is deprecated and will be removed in a future release."
         )
+        # the name seems to be ConvertColorSpace, not ConvertImageColorSpace
         if num_output_channels == 1:
             replacement_msg = (
                 "transforms.ConvertImageColorSpace(old_color_space=ColorSpace.RGB, color_space=ColorSpace.GRAY)"
diff --git a/torchvision/prototype/transforms/_presets.py b/torchvision/prototype/transforms/_presets.py
index a6980f3e135..aa0b350858a 100644
--- a/torchvision/prototype/transforms/_presets.py
+++ b/torchvision/prototype/transforms/_presets.py
@@ -14,6 +14,9 @@
 __all__ = ["StereoMatching"]
 
 
+# I assume this file is actually not part of the transformsV2 work - it's just
+# that StereoMatching is still protytpe?
+
 class StereoMatching(torch.nn.Module):
     def __init__(
         self,
diff --git a/torchvision/prototype/transforms/_type_conversion.py b/torchvision/prototype/transforms/_type_conversion.py
index 01908650fb4..dd62cac3051 100644
--- a/torchvision/prototype/transforms/_type_conversion.py
+++ b/torchvision/prototype/transforms/_type_conversion.py
@@ -62,6 +62,9 @@ def _transform(
         return F.to_image_pil(inpt, mode=self.mode)
 
 
+# What is the "new naming scheme"?
+# I was going to point out that "ToTensorImage" and "ToPILImage" would look more
+# familiar
 # We changed the name to align them with the new naming scheme. Still, `ToPILImage` is
 # prevalent and well understood. Thus, we just alias it without deprecating the old name.
 ToPILImage = ToImagePIL
diff --git a/torchvision/prototype/transforms/_utils.py b/torchvision/prototype/transforms/_utils.py
index cbf8992300e..7e1234e313b 100644
--- a/torchvision/prototype/transforms/_utils.py
+++ b/torchvision/prototype/transforms/_utils.py
@@ -22,7 +22,7 @@ def _setup_float_or_seq(arg: Union[float, Sequence[float]], name: str, req_size:
                 raise ValueError(f"{name} should be a sequence of floats. Got {type(element)}")
 
     if isinstance(arg, float):
-        arg = [float(arg), float(arg)]
+        arg = [float(arg), float(arg)]  # Nit: arg is already a float
     if isinstance(arg, (list, tuple)) and len(arg) == 1:
         arg = [arg[0], arg[0]]
     return arg
@@ -68,6 +68,9 @@ def _convert_fill_arg(fill: datapoints.FillType) -> datapoints.FillTypeJIT:
     return fill
 
 
+# It is fairly obscure what this does or why this is needed - perhaps a quick
+# note summarizing the "fill" situation could be useful? (maybe it exists and
+# I'm not seeing it!)
 def _setup_fill_arg(fill: Union[FillType, Dict[Type, FillType]]) -> Dict[Type, FillTypeJIT]:
     _check_fill_arg(fill)
 
diff --git a/torchvision/prototype/transforms/utils.py b/torchvision/prototype/transforms/utils.py
index 1ebcf0efd34..b9751543cd3 100644
--- a/torchvision/prototype/transforms/utils.py
+++ b/torchvision/prototype/transforms/utils.py
@@ -38,6 +38,12 @@ def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
     return c, h, w
 
 
+# Are we deprecating get_image_size()?
+# A: yes, it's currently reporting W,H
+#
+# Also this Seems like a subset of query_chw():
+# - can we just have query_chw()?
+# - if not, can we share code between the 2?
 def query_spatial_size(flat_inputs: List[Any]) -> Tuple[int, int]:
     sizes = {
         tuple(get_spatial_size(inpt))
@@ -50,6 +56,10 @@ def query_spatial_size(flat_inputs: List[Any]) -> Tuple[int, int]:
     if not sizes:
         raise TypeError("No image, video, mask or bounding box was found in the sample")
     elif len(sizes) > 1:
+        # (This comment applies to the other query_() utils):
+        # Does this happen?
+        # What can go terribly wrong if we don't raise an error?
+        # Should we just document that this returns the size of the first inpt to avoid an error?
         raise ValueError(f"Found multiple HxW dimensions in the sample: {sequence_to_str(sorted(sizes))}")
     h, w = sizes.pop()
     return h, w
@@ -64,6 +74,7 @@ def check_type(obj: Any, types_or_checks: Tuple[Union[Type, Callable[[Any], bool
     return False
 
 
+# Are these public because they are needed for users to implement custom transforms?
 def has_any(flat_inputs: List[Any], *types_or_checks: Union[Type, Callable[[Any], bool]]) -> bool:
     for inpt in flat_inputs:
         if check_type(inpt, types_or_checks):

From e09d85bd0b6518a2fa7e1b23acaeec9f22a541d1 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 12 Jan 2023 15:05:40 +0000
Subject: [PATCH 03/14] TODOs

---
 torchvision/prototype/datapoints/__init__.py         |  2 +-
 torchvision/prototype/datapoints/_bounding_box.py    |  4 ++--
 torchvision/prototype/datapoints/_datapoint.py       | 12 +++++++++---
 torchvision/prototype/datapoints/_image.py           |  2 ++
 torchvision/prototype/transforms/_transform.py       |  2 ++
 .../prototype/transforms/functional/_geometry.py     |  4 ++--
 6 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/torchvision/prototype/datapoints/__init__.py b/torchvision/prototype/datapoints/__init__.py
index f4aeb4bc946..f077101c3b4 100644
--- a/torchvision/prototype/datapoints/__init__.py
+++ b/torchvision/prototype/datapoints/__init__.py
@@ -1,5 +1,5 @@
 from ._bounding_box import BoundingBox, BoundingBoxFormat
-from ._datapoint import FillType, FillTypeJIT, InputType, InputTypeJIT  # Need to be public?
+from ._datapoint import FillType, FillTypeJIT, InputType, InputTypeJIT  # Need to be public? TODO: no
 from ._image import ColorSpace, Image, ImageType, ImageTypeJIT, TensorImageType, TensorImageTypeJIT
 from ._label import Label, OneHotLabel
 from ._mask import Mask
diff --git a/torchvision/prototype/datapoints/_bounding_box.py b/torchvision/prototype/datapoints/_bounding_box.py
index 398770cbf6a..78691b34444 100644
--- a/torchvision/prototype/datapoints/_bounding_box.py
+++ b/torchvision/prototype/datapoints/_bounding_box.py
@@ -16,8 +16,8 @@ class BoundingBoxFormat(StrEnum):
 
 
 class BoundingBox(Datapoint):
-    format: BoundingBoxFormat
-    spatial_size: Tuple[int, int]
+    format: BoundingBoxFormat  # TODO: do not use a builtin?
+    spatial_size: Tuple[int, int]  # TODO: This is the size of the image, not the box
 
     @classmethod
     def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat, spatial_size: Tuple[int, int]) -> BoundingBox:
diff --git a/torchvision/prototype/datapoints/_datapoint.py b/torchvision/prototype/datapoints/_datapoint.py
index 5104d98a853..3717ca5d887 100644
--- a/torchvision/prototype/datapoints/_datapoint.py
+++ b/torchvision/prototype/datapoints/_datapoint.py
@@ -15,8 +15,8 @@
 FillTypeJIT = Union[int, float, List[float], None]
 
 
-# Perhaps it's already documented somewhere, but I think it'd be useful to
-# provide a few examples of when
+# TODO: provide a few examples of when the Datapoint type is preserved vs when it's not
+# test_prototype_datapoints.py is a good starting point
 class Datapoint(torch.Tensor):
     __F: Optional[ModuleType] = None
 
@@ -95,6 +95,7 @@ def __torch_function__(
 
         with DisableTorchFunctionSubclass():
             output = func(*args, **kwargs or dict())
+            # TODO: maybe we can exit the CM here?
 
             wrapper = cls._NO_WRAPPING_EXCEPTIONS.get(func)
             # Apart from `func` needing to be an exception, we also require the primary operand, i.e. `args[0]`, to be
@@ -104,10 +105,15 @@ def __torch_function__(
             # `args = (torch.Tensor(), datapoints.Image())` first. Without this guard, the original `torch.Tensor` would
             # be wrapped into a `datapoints.Image`.
             if wrapper and isinstance(args[0], cls):
+                # TODO: figure out whether
+                # arbitrary_tensor.to(some_img)
+                # should be an Image or a Tensor
                 return wrapper(cls, args[0], output)  # type: ignore[no-any-return]
 
             # Does that mean that DisableTorchFunctionSubclass is ignored for `.inpace_()` functions?
             # Or maybe I'm misunderstanding what DisableTorchFunctionSubclass is supposed to do.
+            # TODO: figure out with torch core whether this is a bug or not
+
             # Inplace `func`'s, canonically identified with a trailing underscore in their name like `.add_(...)`,
             # will retain the input type. Thus, we need to unwrap here.
             if isinstance(output, cls):
@@ -136,7 +142,7 @@ def _F(self) -> ModuleType:
             Datapoint.__F = functional
         return Datapoint.__F
 
-    
+
     # doing some_tensor.dtype would go through __torch_function__?
     #  - is this because it is implemented as a @property method?
     #  - why do we want to bypass __torch_function__ in these case?
diff --git a/torchvision/prototype/datapoints/_image.py b/torchvision/prototype/datapoints/_image.py
index 1720bb3a1e0..b760ecb2058 100644
--- a/torchvision/prototype/datapoints/_image.py
+++ b/torchvision/prototype/datapoints/_image.py
@@ -59,6 +59,8 @@ def _from_tensor_shape(shape: List[int]) -> ColorSpace:
 
 class Image(Datapoint):
     # Where is this used / changes apart from in ConvertColorSpace()?
+    # A: For now, this is somewhat redundant with number of channels.
+    # TODO: decide whether we want to keep it?
     color_space: ColorSpace
 
     @classmethod
diff --git a/torchvision/prototype/transforms/_transform.py b/torchvision/prototype/transforms/_transform.py
index 413c3b469d2..00829e6ffd6 100644
--- a/torchvision/prototype/transforms/_transform.py
+++ b/torchvision/prototype/transforms/_transform.py
@@ -39,6 +39,8 @@ def forward(self, *inputs: Any) -> Any:
 
         params = self._get_params(flat_inputs)
 
+        # TODO: right now, any tensor or datapoint passed to forward() will be transformed
+        # the rest is bypassed
         flat_outputs = [
             self._transform(inpt, params) if check_type(inpt, self._transformed_types) else inpt for inpt in flat_inputs
         ]
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
index 5beade0d4be..1360ad7ead1 100644
--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -249,8 +249,8 @@ def resize(
     if isinstance(inpt, torch.Tensor) and (
         torch.jit.is_scripting() or not isinstance(inpt, datapoints._datapoint.Datapoint)
     ):
-        # Do I understand correctly that resize_image_tensor() will be called if we're scripting and inpt is an Image()?
-        # It seems to suggests that this dispatcher is still jit-scriptable on datapoints (and not just on tensors)?
+    # TODO: Figure out whether this cond could just be:
+    # if torch.jit.is_scripting() or is_simple_tensor(inpt):
         return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias)
     elif isinstance(inpt, datapoints._datapoint.Datapoint):
         # IIUC we're calling the method on the datapoint object which in turn

From 7fbab6cb828af3f29bffd8bdbe0b1ea9662d06ab Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 13 Jan 2023 09:40:32 +0000
Subject: [PATCH 04/14] More

---
 test/test_prototype_datapoints.py              | 5 +++++
 torchvision/prototype/datapoints/_datapoint.py | 7 ++++++-
 torchvision/prototype/datapoints/_label.py     | 2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/test/test_prototype_datapoints.py b/test/test_prototype_datapoints.py
index d036b5db1de..20a835e8b97 100644
--- a/test/test_prototype_datapoints.py
+++ b/test/test_prototype_datapoints.py
@@ -29,6 +29,7 @@ def test_to_wrapping():
 
 
 def test_to_feature_reference():
+    # is "feature" now "datapoint" - or is it something else?
     tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
     label = datapoints.Label(tensor, categories=["foo", "bar"]).to(torch.int32)
 
@@ -46,6 +47,9 @@ def test_clone_wrapping():
 
     assert type(label_clone) is datapoints.Label
     assert label_clone.data_ptr() != label.data_ptr()
+    # Is this expected?
+    # Does this meta-data-preserving behaviour occur for all meta-data attached to datapoints?
+    # Can things go wrong? i.e. if label_clone changes its metadata inplace, that of label gets changed too?
     assert label_clone.categories is label.categories
 
 
@@ -60,6 +64,7 @@ def test_requires_grad__wrapping():
     assert type(label_requires_grad) is datapoints.Label
     assert label.requires_grad
     assert label_requires_grad.requires_grad
+    assert label_requires_grad is label
 
 
 def test_other_op_no_wrapping():
diff --git a/torchvision/prototype/datapoints/_datapoint.py b/torchvision/prototype/datapoints/_datapoint.py
index 3717ca5d887..b0d1a0e01ec 100644
--- a/torchvision/prototype/datapoints/_datapoint.py
+++ b/torchvision/prototype/datapoints/_datapoint.py
@@ -129,7 +129,7 @@ def _make_repr(self, **kwargs: Any) -> str:
         return f"{super().__repr__()[:-1]}, {extra_repr})"
 
     # What cyclic import is this solving? Can it be avoided somehow?
-    # EDIT: is this because the dispatchers call the methods which in turn call back into the functional. namespace?
+    # A: this is because the dispatchers call the methods which in turn call back into the functional namespace
     @property
     def _F(self) -> ModuleType:
         # This implements a lazy import of the functional to get around the cyclic import. This import is deferred
@@ -145,7 +145,9 @@ def _F(self) -> ModuleType:
 
     # doing some_tensor.dtype would go through __torch_function__?
     #  - is this because it is implemented as a @property method?
+    #    A: yes, or something like that
     #  - why do we want to bypass __torch_function__ in these case?
+    #    A: for optimization. TODO: should this be part of core already?
     # Also, what happens if users access another attribute that we haven't
     # overridden here, e.g. image.data or image.some_new_attribute_in_pytorch3.0?
 
@@ -172,6 +174,9 @@ def dtype(self) -> _dtype:  # type: ignore[override]
             return super().dtype
 
     # Are these the "no-op fallbacks"?
+    # A: yes, fallback from the dispatchers. These exist in anticipation of
+    # allowing user-defined transforms.
+    # TODO: figure out design / tradeoffs
     def horizontal_flip(self) -> Datapoint:
         return self
 
diff --git a/torchvision/prototype/datapoints/_label.py b/torchvision/prototype/datapoints/_label.py
index abae7eaabda..e695c05a865 100644
--- a/torchvision/prototype/datapoints/_label.py
+++ b/torchvision/prototype/datapoints/_label.py
@@ -11,6 +11,8 @@
 L = TypeVar("L", bound="_LabelBase")
 
 
+# Do we have transforms that change the categories?
+# Why do we need the labels to be datapoints?
 class _LabelBase(Datapoint):
     categories: Optional[Sequence[str]]
 

From 7aa77b79fefad0d88718e5c400c05b908afbfea8 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 16 Jan 2023 14:18:28 +0000
Subject: [PATCH 05/14] Some more

---
 torchvision/prototype/datapoints/_label.py           | 4 ++++
 torchvision/prototype/transforms/functional/_misc.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/torchvision/prototype/datapoints/_label.py b/torchvision/prototype/datapoints/_label.py
index e695c05a865..8fce6d735a7 100644
--- a/torchvision/prototype/datapoints/_label.py
+++ b/torchvision/prototype/datapoints/_label.py
@@ -13,6 +13,10 @@
 
 # Do we have transforms that change the categories?
 # Why do we need the labels to be datapoints?
+# In these label classes, what is strictly needed vs something that was
+# historically designed with the joint dataset / transforms revamp in mind?
+# (asking because the dataset revamp is on indefinite pause, so perhaps some
+# things are now obsolete?)
 class _LabelBase(Datapoint):
     categories: Optional[Sequence[str]]
 
diff --git a/torchvision/prototype/transforms/functional/_misc.py b/torchvision/prototype/transforms/functional/_misc.py
index bc9408d0e2c..002fd918d3f 100644
--- a/torchvision/prototype/transforms/functional/_misc.py
+++ b/torchvision/prototype/transforms/functional/_misc.py
@@ -69,6 +69,10 @@ def normalize(
                 f"but got {type(inpt)} instead."
             )
 
+    # Saw this from [Feedback] thread (https://github.com/pytorch/vision/issues/6753#issuecomment-1308295943)
+    # Not sure I understand the need to return a Tensor instead of and Image
+    # Is the inconsistency "worth" it? How can we make sure this isn't too unexpected?
+
     # Image or Video type should not be retained after normalization due to unknown data range
     # Thus we return Tensor for input Image
     return normalize_image_tensor(inpt, mean=mean, std=std, inplace=inplace)

From a2bce37e1042fa8fa1a0078e6c996863c71e52cd Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 16 Jan 2023 16:49:12 +0000
Subject: [PATCH 06/14] Some more + some answers

---
 test/test_prototype_datapoints.py             |  2 ++
 torchvision/_utils.py                         | 19 +++++++-----
 .../prototype/datapoints/_bounding_box.py     |  4 ++-
 .../prototype/datapoints/_datapoint.py        |  6 +++-
 torchvision/prototype/datapoints/_image.py    |  5 ++-
 torchvision/prototype/transforms/Migration.md | 31 +++++++++++++++++++
 torchvision/prototype/transforms/_presets.py  |  1 +
 .../prototype/transforms/_transform.py        |  2 +-
 .../transforms/functional/__init__.py         |  2 +-
 9 files changed, 59 insertions(+), 13 deletions(-)
 create mode 100644 torchvision/prototype/transforms/Migration.md

diff --git a/test/test_prototype_datapoints.py b/test/test_prototype_datapoints.py
index 20a835e8b97..5fdf08cab24 100644
--- a/test/test_prototype_datapoints.py
+++ b/test/test_prototype_datapoints.py
@@ -30,6 +30,7 @@ def test_to_wrapping():
 
 def test_to_feature_reference():
     # is "feature" now "datapoint" - or is it something else?
+    # A: yes, TODO: update name
     tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
     label = datapoints.Label(tensor, categories=["foo", "bar"]).to(torch.int32)
 
@@ -50,6 +51,7 @@ def test_clone_wrapping():
     # Is this expected?
     # Does this meta-data-preserving behaviour occur for all meta-data attached to datapoints?
     # Can things go wrong? i.e. if label_clone changes its metadata inplace, that of label gets changed too?
+    # TODO: N and P should discuss this more
     assert label_clone.categories is label.categories
 
 
diff --git a/torchvision/_utils.py b/torchvision/_utils.py
index 1494a2cf000..382429f6bce 100644
--- a/torchvision/_utils.py
+++ b/torchvision/_utils.py
@@ -5,13 +5,6 @@
 
 
 
-# Could we just define 
-# class ColorSpace(enum.Enum):
-#    RGB = "RGB"  # instead of auto()
-#    ...
-# and avoid this helper class altogether?
-# I sort of remember having some issue with this class before (perhaps with the
-# WeightsEnum?)
 class StrEnumMeta(enum.EnumMeta):
     auto = enum.auto
 
@@ -24,11 +17,21 @@ def from_str(self: Type[T], member: str) -> T:  # type: ignore[misc]
             raise ValueError(f"Unknown value '{member}' for {self.__name__}.") from None
 
 
-# Can this just be a simple class inherited from Enum (without the metaclass)?
+# TODO: As agreed with Philip, we can just get rid of this class for now.
+# We can just define enums as 
+# class BoundingBoxFormat(Enum):
+#    XYXY = "XYXY"
+#    ...
+# and replace from_str with BoundingBoxFormat["XYXY"].
+#
+# We won't have a super nice error message as with add_suggestion, but this is
+# something we can think about when this is more critically needed (i.e. after
+# the migration)
 class StrEnum(enum.Enum, metaclass=StrEnumMeta):
     pass
 
 
+
 def sequence_to_str(seq: Sequence, separate_last: str = "") -> str:
     if not seq:
         return ""
diff --git a/torchvision/prototype/datapoints/_bounding_box.py b/torchvision/prototype/datapoints/_bounding_box.py
index 78691b34444..feedcc20768 100644
--- a/torchvision/prototype/datapoints/_bounding_box.py
+++ b/torchvision/prototype/datapoints/_bounding_box.py
@@ -17,7 +17,9 @@ class BoundingBoxFormat(StrEnum):
 
 class BoundingBox(Datapoint):
     format: BoundingBoxFormat  # TODO: do not use a builtin?
-    spatial_size: Tuple[int, int]  # TODO: This is the size of the image, not the box
+    # TODO: This is the size of the image, not the box. Maybe make this explicit in the name?
+    # Note: if this isn't user-facing, the TODO is not critical at all
+    spatial_size: Tuple[int, int]
 
     @classmethod
     def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat, spatial_size: Tuple[int, int]) -> BoundingBox:
diff --git a/torchvision/prototype/datapoints/_datapoint.py b/torchvision/prototype/datapoints/_datapoint.py
index b0d1a0e01ec..91bf1d53c63 100644
--- a/torchvision/prototype/datapoints/_datapoint.py
+++ b/torchvision/prototype/datapoints/_datapoint.py
@@ -53,6 +53,7 @@ def wrap_like(cls: Type[D], other: D, tensor: torch.Tensor) -> D:
 
     # Can things go wrong with having to maintain a different set of funcs that
     # need special care? What if we forget to handle one properly?
+    # A: This is probably fine. These are the only ones encountered so far.
     _NO_WRAPPING_EXCEPTIONS = {
         torch.Tensor.clone: lambda cls, input, output: cls.wrap_like(input, output),
         torch.Tensor.to: lambda cls, input, output: cls.wrap_like(input, output),
@@ -90,7 +91,10 @@ def __torch_function__(
         # Since super().__torch_function__ has no hook to prevent the coercing of the output into the input type, we
         # need to reimplement the functionality.
 
-        if not all(issubclass(cls, t) for t in types):  # Curious in which cases this can be hit?
+        # Curious in which cases this can be hit?
+        # A: Still don't really know, but this comes from the parent
+        # Tensor.__torch_function__ (which we need to re-write here)
+        if not all(issubclass(cls, t) for t in types):
             return NotImplemented
 
         with DisableTorchFunctionSubclass():
diff --git a/torchvision/prototype/datapoints/_image.py b/torchvision/prototype/datapoints/_image.py
index b760ecb2058..5f8e38f1fef 100644
--- a/torchvision/prototype/datapoints/_image.py
+++ b/torchvision/prototype/datapoints/_image.py
@@ -120,7 +120,10 @@ def horizontal_flip(self) -> Image:
     def vertical_flip(self) -> Image:
         output = self._F.vertical_flip_image_tensor(self.as_subclass(torch.Tensor))
         return Image.wrap_like(self, output)
-
+    
+    # Do we want to keep these public?
+    # This is probalby related to internal customer needs. TODO for N: figure that out
+    # This is also related to allow user-defined subclasses and transforms (in anticipation of)
     def resize(  # type: ignore[override]
         self,
         size: List[int],
diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
new file mode 100644
index 00000000000..736d55cabe8
--- /dev/null
+++ b/torchvision/prototype/transforms/Migration.md
@@ -0,0 +1,31 @@
+Needs to be done in order to migrate to stable
+----------------------------------------------
+
+(Some of the items really just mean "Nicolas needs to understand this better")
+
+* Figure out criticality of JIT compat for classes. Is this absolutely needed, by whom, potential workarounds, etc.
+* Write Docs
+* Figure out dataset <-> transformsV2 layer (including HF or other external datasets)
+* address TODOs below and in code, code review etc.
+* pytorch 2.0 compat?
+* Video transforms
+* Figure out path to user-defined transforms and sub-classes 
+* Does Mask support Optical Flow masks?
+* Handle strides, e.g. https://github.com/pytorch/vision/issues/7090 ? Looks like it's a non-issue?
+* Figure out what transformsV2 mean for inference presets
+* Figure out logistics of migration into stable area (extra .v2 namespace, stuff like that)
+
+
+
+TODOs
+-----
+
+- Those in https://github.com/pytorch/vision/pull/7092 and
+  https://github.com/pytorch/vision/pull/7082 (There is overlap!)
+  They're not all critical.
+- Document (internally, not as user-facing docs) the `self.as_subclass(torch.Tensor)` perf hack 
+
+Done
+----
+
+* Figure out what to do about get_params() static methods (See https://github.com/pytorch/vision/pull/7092)
\ No newline at end of file
diff --git a/torchvision/prototype/transforms/_presets.py b/torchvision/prototype/transforms/_presets.py
index aa0b350858a..42763687d4f 100644
--- a/torchvision/prototype/transforms/_presets.py
+++ b/torchvision/prototype/transforms/_presets.py
@@ -16,6 +16,7 @@
 
 # I assume this file is actually not part of the transformsV2 work - it's just
 # that StereoMatching is still protytpe?
+# A: yes
 
 class StereoMatching(torch.nn.Module):
     def __init__(
diff --git a/torchvision/prototype/transforms/_transform.py b/torchvision/prototype/transforms/_transform.py
index 00829e6ffd6..b4e56a99b6f 100644
--- a/torchvision/prototype/transforms/_transform.py
+++ b/torchvision/prototype/transforms/_transform.py
@@ -5,6 +5,7 @@
 import torch
 from torch import nn
 # I remember that the core team is open to making it public. Is still private?
+# A: yes but core is committed to full BC
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from torchvision.prototype.transforms.utils import check_type
 from torchvision.utils import _log_api_usage_once
@@ -32,7 +33,6 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         raise NotImplementedError
 
     def forward(self, *inputs: Any) -> Any:
-        # TL;DR of the flatten -> unflatten thing?
         flat_inputs, spec = tree_flatten(inputs if len(inputs) > 1 else inputs[0])
 
         self._check_inputs(flat_inputs)
diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py
index 8574ffd5d9d..efee0cb6fa2 100644
--- a/torchvision/prototype/transforms/functional/__init__.py
+++ b/torchvision/prototype/transforms/functional/__init__.py
@@ -129,7 +129,7 @@
     perspective_image_tensor,
     perspective_mask,
     perspective_video,
-    resize,  # this is the "dispatcher layer" and the resize_xyz are the low level kernels?
+    resize,
     resize_bounding_box,
     resize_image_pil,
     resize_image_tensor,

From a20e581c4877dc50fc7d8c713e3e60cc9fa6d8f8 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 18 Jan 2023 16:01:51 +0000
Subject: [PATCH 07/14] more

---
 torchvision/prototype/transforms/_augment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py
index 3160770a09d..806ca40bbf7 100644
--- a/torchvision/prototype/transforms/_augment.py
+++ b/torchvision/prototype/transforms/_augment.py
@@ -94,7 +94,7 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
     def _transform(
         self, inpt: Union[datapoints.ImageType, datapoints.VideoType], params: Dict[str, Any]
     ) -> Union[datapoints.ImageType, datapoints.VideoType]:
-        if params["v"] is not None:
+        if params["v"] is not None:  # What is this?
             inpt = F.erase(inpt, **params, inplace=self.inplace)
 
         return inpt

From 03ef398e68af94799981b179014efe8e8ede8d9d Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 19 Jan 2023 15:45:55 +0000
Subject: [PATCH 08/14] Some more

---
 .../prototype/datapoints/_bounding_box.py     | 25 +++++++++++++++++++
 .../prototype/transforms/functional/_meta.py  |  5 ++++
 2 files changed, 30 insertions(+)

diff --git a/torchvision/prototype/datapoints/_bounding_box.py b/torchvision/prototype/datapoints/_bounding_box.py
index feedcc20768..9cf314d7576 100644
--- a/torchvision/prototype/datapoints/_bounding_box.py
+++ b/torchvision/prototype/datapoints/_bounding_box.py
@@ -15,6 +15,31 @@ class BoundingBoxFormat(StrEnum):
     CXCYWH = StrEnum.auto()
 
 
+# What if... we just removed the format and spatial_size meta-data?
+# A: We could, but it comes with trade-offs. For the format, this wouldn't lead
+# to much of a difference, except that users would have to convert to XYXY
+# before doing anything. All of the current stable ops expect XYXY already so
+# it's not much of a change. Worth noting as well that a few BBox transforms
+# only have an implementation for the XYXY format, and they convert / re-convert
+# internally (see e.g. affine_bounding_box, but there are others)
+# Removing spatial_size however would make the dispatcher-level more cluncky for
+# users. It wouldn't change much of the tranforms classes as long as they're
+# called with their respective image e.g.
+# T(image, bbox)
+# because the spatial_size can be known from the image param. But in a mid-level
+# dispatcher which only accept 1 kind of input like
+# dispatcher(bbox)
+# there's no way to know the spatial_size unless it's passed as a parameter.
+# Users would also need to keep track of it since some transforms actually
+# change it:
+# bbox, sz = resize(bbox, spatial_size=sz)
+# This also means the mid-level dispatchers:
+# - need to accept as input anything that was a meta-data 9in this case
+#   spatial_size
+# - need to return them as well; which means they need to return either a single
+#   image, a single video, or a tuple of (bbox, spatial_size),
+# TL;DR: things would get messy for users and for us.
+
 class BoundingBox(Datapoint):
     format: BoundingBoxFormat  # TODO: do not use a builtin?
     # TODO: This is the size of the image, not the box. Maybe make this explicit in the name?
diff --git a/torchvision/prototype/transforms/functional/_meta.py b/torchvision/prototype/transforms/functional/_meta.py
index 28de0536978..6184f183047 100644
--- a/torchvision/prototype/transforms/functional/_meta.py
+++ b/torchvision/prototype/transforms/functional/_meta.py
@@ -190,6 +190,7 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
     return xyxy
 
 
+# TODO: Maybe make this available as a class transform as well?
 def convert_format_bounding_box(
     bounding_box: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
 ) -> torch.Tensor:
@@ -437,6 +438,10 @@ def convert_dtype_video(video: torch.Tensor, dtype: torch.dtype = torch.float) -
     return convert_dtype_image_tensor(video, dtype)
 
 
+# TODO: this doesn't just change the dtype, it also changes the value range.
+# This name relies on the implicit assumption that the value range is determined
+# by the dtype. Maybe think of a more descriptive name if we can (once and for
+# all)
 def convert_dtype(
     inpt: Union[datapoints.ImageTypeJIT, datapoints.VideoTypeJIT], dtype: torch.dtype = torch.float
 ) -> torch.Tensor:

From d4b1a3ebe3c7c4a5a709a85d6f5d64d883b41c93 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 23 Jan 2023 14:40:08 +0000
Subject: [PATCH 09/14] Add more deets to Migration doc

---
 torchvision/prototype/transforms/Migration.md | 39 ++++++++++++++-----
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index 736d55cabe8..4adc131391b 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -1,20 +1,35 @@
-Needs to be done in order to migrate to stable
-----------------------------------------------
+Needs to be done in order to migrate to Beta
+--------------------------------------------
 
 (Some of the items really just mean "Nicolas needs to understand this better")
 
-* Figure out criticality of JIT compat for classes. Is this absolutely needed, by whom, potential workarounds, etc.
-* Write Docs
+* Figure out logistics of migration (extra .v2 namespace, programmatic "opt-in",
+  stuff like that): tracked in https://github.com/pytorch/vision/issues/7097
+* Figure out criticality of JIT compat for classes. Is this absolutely needed,
+  by whom, potential workarounds, etc.
+* Figure out whether `__torch_dispatch__` is preferable to `__torch_function__`.
 * Figure out dataset <-> transformsV2 layer (including HF or other external datasets)
-* address TODOs below and in code, code review etc.
-* pytorch 2.0 compat?
-* Video transforms
+* Figure out internal video partners and what they actually need. Some of the
+  Video transforms like `uniform_temporal_subsample()` are outliers (awkward
+  support, doesn't fit well into the current API). Same for `PermuteDimensions`
+  and `TransposeDimension` which break underlying assumptions about dimension
+  order.
+* Address critical TODOs below and in code, code review etc.
+* Simplify interface and Image meta-data: Remove color_space metadata and
+  ConvertColorSpace() transform (https://github.com/pytorch/vision/pull/7120)
+* Write Docs
+
+Needs to be done before migrating to stable
+-------------------------------------------
+
+* Polish tests - make sure they are at least functionally equivalent to the v1
+  tests. This requires individually checking them.
+* Address rest of TODOs below and in code, code review etc.
+* Look into pytorch 2.0 compat? (**Should this be bumped up??**)
 * Figure out path to user-defined transforms and sub-classes 
 * Does Mask support Optical Flow masks?
 * Handle strides, e.g. https://github.com/pytorch/vision/issues/7090 ? Looks like it's a non-issue?
 * Figure out what transformsV2 mean for inference presets
-* Figure out logistics of migration into stable area (extra .v2 namespace, stuff like that)
-
 
 
 TODOs
@@ -28,4 +43,8 @@ TODOs
 Done
 ----
 
-* Figure out what to do about get_params() static methods (See https://github.com/pytorch/vision/pull/7092)
\ No newline at end of file
+* Figure out what to do about get_params() static methods (See https://github.com/pytorch/vision/pull/7092).
+  A: we want them back - waiting to decide on JIT compat for final solution.
+* Avoid inconsistent output type: Let Normalize() and RandomPhotometricDistort
+  return datapoints instead of tensors
+  (https://github.com/pytorch/vision/pull/7113)
\ No newline at end of file

From 91cf82e65e4193d55262c35f694c5c44510f4962 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 1 Feb 2023 13:31:01 +0000
Subject: [PATCH 10/14] Update migration TODOs

---
 torchvision/prototype/transforms/Migration.md | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index 4adc131391b..4f9442f4a8c 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -5,18 +5,14 @@ Needs to be done in order to migrate to Beta
 
 * Figure out logistics of migration (extra .v2 namespace, programmatic "opt-in",
   stuff like that): tracked in https://github.com/pytorch/vision/issues/7097
-* Figure out criticality of JIT compat for classes. Is this absolutely needed,
-  by whom, potential workarounds, etc.
-* Figure out whether `__torch_dispatch__` is preferable to `__torch_function__`.
-* Figure out dataset <-> transformsV2 layer (including HF or other external datasets)
+* Figure out dataset <-> transformsV2 layer (including HF or other external
+  datasets): tracked in https://github.com/pytorch/vision/pull/6663
 * Figure out internal video partners and what they actually need. Some of the
   Video transforms like `uniform_temporal_subsample()` are outliers (awkward
   support, doesn't fit well into the current API). Same for `PermuteDimensions`
   and `TransposeDimension` which break underlying assumptions about dimension
   order.
 * Address critical TODOs below and in code, code review etc.
-* Simplify interface and Image meta-data: Remove color_space metadata and
-  ConvertColorSpace() transform (https://github.com/pytorch/vision/pull/7120)
 * Write Docs
 
 Needs to be done before migrating to stable
@@ -27,7 +23,8 @@ Needs to be done before migrating to stable
 * Address rest of TODOs below and in code, code review etc.
 * Look into pytorch 2.0 compat? (**Should this be bumped up??**)
 * Figure out path to user-defined transforms and sub-classes 
-* Does Mask support Optical Flow masks?
+* Add support for Optical Flow tranforms (e.g. vlip needs special handling for
+  flow masks)
 * Handle strides, e.g. https://github.com/pytorch/vision/issues/7090 ? Looks like it's a non-issue?
 * Figure out what transformsV2 mean for inference presets
 
@@ -44,7 +41,17 @@ Done
 ----
 
 * Figure out what to do about get_params() static methods (See https://github.com/pytorch/vision/pull/7092).
-  A: we want them back - waiting to decide on JIT compat for final solution.
+  A: we want them back - tracked in https://github.com/pytorch/vision/pull/7153
 * Avoid inconsistent output type: Let Normalize() and RandomPhotometricDistort
   return datapoints instead of tensors
-  (https://github.com/pytorch/vision/pull/7113)
\ No newline at end of file
+  (https://github.com/pytorch/vision/pull/7113)
+* Figure out criticality of JIT compat for classes. Is this absolutely needed,
+  by whom, potential workarounds, etc.
+  * Done: Philip found elegant way to support JIT as long as the v1 transforms
+    are still around: https://github.com/pytorch/vision/pull/7135
+* Figure out whether `__torch_dispatch__` is preferable to `__torch_function__`.
+  * After chat with Alban, there's no reason to use `__torch_dispatch__`.
+    Everything should work as expected with `__torch_function__`, including
+    AutoGrad.
+* Simplify interface and Image meta-data: Remove color_space metadata and
+  ConvertColorSpace() transform (https://github.com/pytorch/vision/pull/7120)
\ No newline at end of file

From c2db00a396b878b06d713251c1d6b3b00f01c3e1 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 1 Feb 2023 15:07:55 +0000
Subject: [PATCH 11/14] update migration

---
 torchvision/prototype/transforms/Migration.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index 4f9442f4a8c..ff3df1bfc3b 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -3,23 +3,23 @@ Needs to be done in order to migrate to Beta
 
 (Some of the items really just mean "Nicolas needs to understand this better")
 
-* Figure out logistics of migration (extra .v2 namespace, programmatic "opt-in",
+* (P V N)Figure out logistics of migration (extra .v2 namespace, programmatic "opt-in",
   stuff like that): tracked in https://github.com/pytorch/vision/issues/7097
-* Figure out dataset <-> transformsV2 layer (including HF or other external
+* (P and N)Figure out dataset <-> transformsV2 layer (including HF or other external
   datasets): tracked in https://github.com/pytorch/vision/pull/6663
-* Figure out internal video partners and what they actually need. Some of the
+* (N) Figure out internal video partners and what they actually need. Some of the
   Video transforms like `uniform_temporal_subsample()` are outliers (awkward
   support, doesn't fit well into the current API). Same for `PermuteDimensions`
   and `TransposeDimension` which break underlying assumptions about dimension
   order.
 * Address critical TODOs below and in code, code review etc.
 * Write Docs
+* Philip: (EDIT: submit dummy PR)Polish tests - make sure they are at least functionally equivalent to the v1
+  tests. This requires individually checking them.
 
 Needs to be done before migrating to stable
 -------------------------------------------
 
-* Polish tests - make sure they are at least functionally equivalent to the v1
-  tests. This requires individually checking them.
 * Address rest of TODOs below and in code, code review etc.
 * Look into pytorch 2.0 compat? (**Should this be bumped up??**)
 * Figure out path to user-defined transforms and sub-classes 

From a0f2b800b481d919495c9761bc363788dfb2e600 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 9 Feb 2023 15:58:08 +0000
Subject: [PATCH 12/14] bikeshed

---
 torchvision/prototype/transforms/Migration.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index ff3df1bfc3b..3ca782a4af2 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -16,6 +16,7 @@ Needs to be done in order to migrate to Beta
 * Write Docs
 * Philip: (EDIT: submit dummy PR)Polish tests - make sure they are at least functionally equivalent to the v1
   tests. This requires individually checking them.
+* (P V N) Bikeshed on a good name for wrap_dataset_for_transforms_v2
 
 Needs to be done before migrating to stable
 -------------------------------------------

From 385af4b10c4034735538c1ff5fc75ece598a7335 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 9 Feb 2023 16:19:31 +0000
Subject: [PATCH 13/14] Some more

---
 torchvision/prototype/transforms/Migration.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index 3ca782a4af2..53f20a4a208 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -17,6 +17,7 @@ Needs to be done in order to migrate to Beta
 * Philip: (EDIT: submit dummy PR)Polish tests - make sure they are at least functionally equivalent to the v1
   tests. This requires individually checking them.
 * (P V N) Bikeshed on a good name for wrap_dataset_for_transforms_v2
+* (P V N ) make `Image(some_pil_image)` work - https://github.com/pytorch/vision/pull/6663#discussion_r1093121007
 
 Needs to be done before migrating to stable
 -------------------------------------------

From 0af3f374270189b1ef4173fb3d1ad25c668942e0 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 9 Feb 2023 17:20:10 +0000
Subject: [PATCH 14/14] Update torchvision/prototype/transforms/Migration.md

Co-authored-by: Philip Meier <github.pmeier@posteo.de>
---
 torchvision/prototype/transforms/Migration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/prototype/transforms/Migration.md b/torchvision/prototype/transforms/Migration.md
index 53f20a4a208..a9dda03f927 100644
--- a/torchvision/prototype/transforms/Migration.md
+++ b/torchvision/prototype/transforms/Migration.md
@@ -16,7 +16,7 @@ Needs to be done in order to migrate to Beta
 * Write Docs
 * Philip: (EDIT: submit dummy PR)Polish tests - make sure they are at least functionally equivalent to the v1
   tests. This requires individually checking them.
-* (P V N) Bikeshed on a good name for wrap_dataset_for_transforms_v2
+* (P V N) Bikeshed on a good name and location for wrap_dataset_for_transforms_v2
 * (P V N ) make `Image(some_pil_image)` work - https://github.com/pytorch/vision/pull/6663#discussion_r1093121007
 
 Needs to be done before migrating to stable