diff --git a/docs/source/conf.py b/docs/source/conf.py index 8b4ce17de9f..6d748f5b717 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -34,6 +34,7 @@ sys.path.append(os.path.abspath(".")) torchvision.disable_beta_transforms_warning() +import torchvision.datapoints # Don't remove, otherwise the docs for datapoints aren't linked properly # -- General configuration ------------------------------------------------ diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst index 07e20b090e6..1cc62413e66 100644 --- a/docs/source/datapoints.rst +++ b/docs/source/datapoints.rst @@ -2,6 +2,12 @@ Datapoints ========== .. currentmodule:: torchvision.datapoints + +Datapoints are tensor subclasses which the :mod:`~torchvision.transforms.v2` v2 transforms use under the hood to +dispatch their inputs to the appropriate lower-level kernels. Most users do not +need to manipulate datapoints directly and can simply rely on dataset wrapping - +see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`. + .. autosummary:: :toctree: generated/ :template: class.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index ac047ff5869..bc38fdb0307 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,8 +31,8 @@ architectures, and common image transformations for computer vision. :maxdepth: 2 :caption: Package Reference - datapoints transforms + datapoints models datasets utils diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 0e9b053fb72..1fe3e78f55f 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -198,6 +198,12 @@ Miscellaneous Conversion ---------- +.. note:: + Beware, some of these conversion transforms below will scale the values + while performing the conversion, while some may not do any scaling. By + scaling, we mean e.g. that a ``uint8`` -> ``float32`` would map the [0, + 255] range into [0, 1] (and vice-versa). + .. autosummary:: :toctree: generated/ :template: class.rst @@ -211,8 +217,8 @@ Conversion v2.PILToTensor v2.ToImageTensor ConvertImageDtype - v2.ConvertImageDtype v2.ConvertDtype + v2.ConvertImageDtype v2.ToDtype v2.ConvertBoundingBoxFormat diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 90cb0374eee..95eb9199ef3 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -105,7 +105,9 @@ def __repr__(self) -> str: class ToTensor: - """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript. + """Convert a PIL Image or ndarray to tensor and scale the values accordingly. + + This transform does not support torchscript. Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] @@ -139,7 +141,9 @@ def __repr__(self) -> str: class PILToTensor: - """Convert a ``PIL Image`` to a tensor of the same type. This transform does not support torchscript. + """Convert a PIL Image to a tensor of the same type - this does not scale values. + + This transform does not support torchscript. Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W). """ @@ -166,7 +170,8 @@ def __repr__(self) -> str: class ConvertImageDtype(torch.nn.Module): - """Convert a tensor image to the given ``dtype`` and scale the values accordingly + """Convert a tensor image to the given ``dtype`` and scale the values accordingly. + This function does not support PIL Image. Args: @@ -194,7 +199,9 @@ def forward(self, image): class ToPILImage: - """Convert a tensor or an ndarray to PIL Image. This transform does not support torchscript. + """Convert a tensor or an ndarray to PIL Image - this does not scale values. + + This transform does not support torchscript. Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range. diff --git a/torchvision/transforms/v2/_container.py b/torchvision/transforms/v2/_container.py index 7f9df337352..2f34a58902e 100644 --- a/torchvision/transforms/v2/_container.py +++ b/torchvision/transforms/v2/_container.py @@ -138,9 +138,7 @@ def __init__( if p is None: p = [1] * len(transforms) elif len(p) != len(transforms): - raise ValueError( - f"Length of p doesn't match the number of transforms: " f"{len(p)} != {len(transforms)}" - ) + raise ValueError(f"Length of p doesn't match the number of transforms: {len(p)} != {len(transforms)}") super().__init__() diff --git a/torchvision/transforms/v2/_deprecated.py b/torchvision/transforms/v2/_deprecated.py index c44e6b08d11..b5544ecfd49 100644 --- a/torchvision/transforms/v2/_deprecated.py +++ b/torchvision/transforms/v2/_deprecated.py @@ -10,7 +10,7 @@ class ToTensor(Transform): - """[BETA] Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + """[BETA] Convert a PIL Image or ndarray to tensor and scale the values accordingly. .. betastatus:: ToTensor transform diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index 94ec851d045..7f28e25c602 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -9,7 +9,7 @@ class ConvertBoundingBoxFormat(Transform): - """[BETA] Convert bounding box coordinates to the given ``format``, e.g. from "CXCYWH" to "XYXY". + """[BETA] Convert bounding box coordinates to the given ``format``, eg from "CXCYWH" to "XYXY". .. betastatus:: ConvertBoundingBoxFormat transform @@ -18,6 +18,7 @@ class ConvertBoundingBoxFormat(Transform): Possible values are defined by :class:`~torchvision.datapoints.BoundingBoxFormat` and string values match the enums, e.g. "XYXY" or "XYWH" etc. """ + _transformed_types = (datapoints.BoundingBox,) def __init__(self, format: Union[str, datapoints.BoundingBoxFormat]) -> None: @@ -79,6 +80,7 @@ class ClampBoundingBox(Transform): .. betastatus:: ClampBoundingBox transform """ + _transformed_types = (datapoints.BoundingBox,) def _transform(self, inpt: datapoints.BoundingBox, params: Dict[str, Any]) -> datapoints.BoundingBox: diff --git a/torchvision/transforms/v2/_misc.py b/torchvision/transforms/v2/_misc.py index 2237334f7a2..40d57856292 100644 --- a/torchvision/transforms/v2/_misc.py +++ b/torchvision/transforms/v2/_misc.py @@ -223,13 +223,15 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class ToDtype(Transform): - """[BETA] Converts the input to a specific dtype. + """[BETA] Converts the input to a specific dtype - this does not scale values. .. betastatus:: ToDtype transform Args: - dtype (dtype or dict of Datapoint -> dtype): The dtype to convert to. A dict can be passed to specify - per-datapoint conversions, e.g. ``dtype={datapoints.Image: torch.float32, datapoints.Video: torch.float64}``. + dtype (``torch.dtype`` or dict of ``Datapoint`` -> ``torch.dtype``): The dtype to convert to. + A dict can be passed to specify per-datapoint conversions, e.g. + ``dtype={datapoints.Image: torch.float32, datapoints.Video: + torch.float64}``. """ _transformed_types = (torch.Tensor,) diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py index 504c5cc3d70..92de314608c 100644 --- a/torchvision/transforms/v2/_type_conversion.py +++ b/torchvision/transforms/v2/_type_conversion.py @@ -11,7 +11,7 @@ class PILToTensor(Transform): - """[BETA] Convert a ``PIL Image`` to a tensor of the same type. + """[BETA] Convert a PIL Image to a tensor of the same type - this does not scale values. .. betastatus:: PILToTensor transform @@ -27,7 +27,8 @@ def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Ten class ToImageTensor(Transform): - """[BETA] Convert a tensor or an ndarray or PIL Image to :class:`~torchvision.datapoints.Image`. + """[BETA] Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.datapoints.Image` + ; this does not scale values. .. betastatus:: ToImageTensor transform @@ -43,7 +44,7 @@ def _transform( class ToImagePIL(Transform): - """[BETA] Convert a tensor or an ndarray to PIL Image. + """[BETA] Convert a tensor or an ndarray to PIL Image - this does not scale values. .. betastatus:: ToImagePIL transform