Skip to content

Commit e367139

Browse files
authored
Merge branch 'main' into fix-det-ref-tutorials
2 parents a7509e4 + 5c1e62f commit e367139

29 files changed

+518
-282
lines changed

docs/source/beta_status.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55
class BetaStatus(Directive):
66
has_content = True
77
text = "The {api_name} is in Beta stage, and backward compatibility is not guaranteed."
8+
node = nodes.warning
89

910
def run(self):
1011
text = self.text.format(api_name=" ".join(self.content))
11-
return [nodes.warning("", nodes.paragraph("", "", nodes.Text(text)))]
12+
return [self.node("", nodes.paragraph("", "", nodes.Text(text)))]
1213

1314

1415
class V2BetaStatus(BetaStatus):
1516
text = (
16-
"The {api_name} is in Beta stage, and while we do not expect major breaking changes, "
17-
"some APIs may still change according to user feedback. Please submit any feedback you may have "
18-
"in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check "
19-
"out https://github.com/pytorch/vision/issues/7319 to learn "
20-
"more about the APIs that we suspect might involve future changes."
17+
"The {api_name} is in Beta stage, and while we do not expect disruptive breaking changes, "
18+
"some APIs may slightly change according to user feedback. Please submit any feedback you may have "
19+
"in this issue: https://github.com/pytorch/vision/issues/6753."
2120
)
21+
node = nodes.note
2222

2323

2424
def setup(app):

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
"backreferences_dir": "gen_modules/backreferences",
8484
"doc_module": ("torchvision",),
8585
"remove_config_comments": True,
86+
"ignore_pattern": "helpers.py",
8687
}
8788

8889
napoleon_use_ivar = True

docs/source/transforms.rst

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,17 @@ tasks (image classification, detection, segmentation, video classification).
4545
Transforms are typically passed as the ``transform`` or ``transforms`` argument
4646
to the :ref:`Datasets <datasets>`.
4747

48+
.. TODO: Reader guide, i.e. what to read depending on what you're looking for
4849
.. TODO: add link to getting started guide here.
4950
51+
.. _conventions:
52+
5053
Supported input types and conventions
5154
-------------------------------------
5255

5356
Most transformations accept both `PIL <https://pillow.readthedocs.io>`_ images
54-
and tensor images. The result of both backends (PIL or Tensors) should be very
57+
and tensor inputs. Both CPU and CUDA tensors are supported.
58+
The result of both backends (PIL or Tensors) should be very
5559
close. In general, we recommend relying on the tensor backend :ref:`for
5660
performance <transforms_perf>`. The :ref:`conversion transforms
5761
<conversion_transforms>` may be used to convert to and from PIL images, or for
@@ -152,13 +156,15 @@ The above should give you the best performance in a typical training environment
152156
that relies on the :class:`torch.utils.data.DataLoader` with ``num_workers >
153157
0``.
154158

155-
Transforms tend to be sensitive to the input strides / memory layout. Some
159+
Transforms tend to be sensitive to the input strides / memory format. Some
156160
transforms will be faster with channels-first images while others prefer
157-
channels-last. You may want to experiment a bit if you're chasing the very
158-
best performance. Using :func:`torch.compile` on individual transforms may
159-
also help factoring out the memory layout variable (e.g. on
161+
channels-last. Like ``torch`` operators, most transforms will preserve the
162+
memory format of the input, but this may not always be respected due to
163+
implementation details. You may want to experiment a bit if you're chasing the
164+
very best performance. Using :func:`torch.compile` on individual transforms may
165+
also help factoring out the memory format variable (e.g. on
160166
:class:`~torchvision.transforms.v2.Normalize`). Note that we're talking about
161-
**memory layout**, not tensor shape.
167+
**memory format**, not :ref:`tensor shape <conventions>`.
162168

163169
Note that resize transforms like :class:`~torchvision.transforms.v2.Resize`
164170
and :class:`~torchvision.transforms.v2.RandomResizedCrop` typically prefer

gallery/v2_transforms/helpers.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import matplotlib.pyplot as plt
2+
from torchvision.utils import draw_bounding_boxes
3+
4+
5+
def plot(imgs):
6+
if not isinstance(imgs[0], list):
7+
# Make a 2d grid even if there's just 1 row
8+
imgs = [imgs]
9+
10+
num_rows = len(imgs)
11+
num_cols = len(imgs[0])
12+
_, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False)
13+
for row_idx, row in enumerate(imgs):
14+
for col_idx, img in enumerate(row):
15+
bboxes = None
16+
if isinstance(img, tuple):
17+
bboxes = img[1]
18+
img = img[0]
19+
if isinstance(bboxes, dict):
20+
bboxes = bboxes['bboxes']
21+
if img.dtype.is_floating_point and img.min() < 0:
22+
# Poor man's re-normalization for the colors to be OK-ish. This
23+
# is useful for images coming out of Normalize()
24+
img -= img.min()
25+
img /= img.max()
26+
27+
if bboxes is not None:
28+
img = draw_bounding_boxes(img, bboxes, colors="yellow", width=3)
29+
ax = axs[row_idx, col_idx]
30+
ax.imshow(img.permute(1, 2, 0).numpy())
31+
ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
32+
33+
plt.tight_layout()

gallery/v2_transforms/plot_datapoints.py

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
import torch
3030
from torchvision import datapoints
31-
from torchvision.transforms.v2 import functional as F
3231

3332

3433
# %%
@@ -119,83 +118,10 @@
119118
assert isinstance(new_bboxes, datapoints.BoundingBoxes)
120119
assert new_bboxes.canvas_size == bboxes.canvas_size
121120

122-
123121
# %%
124122
# The metadata of ``new_bboxes`` is the same as ``bboxes``, but you could pass
125123
# it as a parameter to override it.
126124
#
127-
# Do I have to wrap the output of the datasets myself?
128-
# ----------------------------------------------------
129-
#
130-
# TODO: Move this in another guide - this is user-facing, not dev-facing.
131-
#
132-
# Only if you are using custom datasets. For the built-in ones, you can use
133-
# :func:`torchvision.datasets.wrap_dataset_for_transforms_v2`. Note that the function also supports subclasses of the
134-
# built-in datasets. Meaning, if your custom dataset subclasses from a built-in one and the output type is the same, you
135-
# also don't have to wrap manually.
136-
#
137-
# If you have a custom dataset, for example the ``PennFudanDataset`` from
138-
# `this tutorial <https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html>`_, you have two options:
139-
#
140-
# 1. Perform the wrapping inside ``__getitem__``:
141-
142-
class PennFudanDataset(torch.utils.data.Dataset):
143-
...
144-
145-
def __getitem__(self, item):
146-
...
147-
148-
target["bboxes"] = datapoints.BoundingBoxes(
149-
bboxes,
150-
format=datapoints.BoundingBoxFormat.XYXY,
151-
canvas_size=F.get_size(img),
152-
)
153-
target["labels"] = labels
154-
target["masks"] = datapoints.Mask(masks)
155-
156-
...
157-
158-
if self.transforms is not None:
159-
img, target = self.transforms(img, target)
160-
161-
...
162-
163-
# %%
164-
# 2. Perform the wrapping inside a custom transformation at the beginning of your pipeline:
165-
166-
167-
class WrapPennFudanDataset:
168-
def __call__(self, img, target):
169-
target["boxes"] = datapoints.BoundingBoxes(
170-
target["boxes"],
171-
format=datapoints.BoundingBoxFormat.XYXY,
172-
canvas_size=F.get_size(img),
173-
)
174-
target["masks"] = datapoints.Mask(target["masks"])
175-
return img, target
176-
177-
178-
...
179-
180-
181-
def get_transform(train):
182-
transforms = []
183-
transforms.append(WrapPennFudanDataset())
184-
transforms.append(T.PILToTensor())
185-
...
186-
187-
# %%
188-
# .. note::
189-
#
190-
# If both :class:`~torchvision.datapoints.BoundingBoxes` and :class:`~torchvision.datapoints.Mask`'s are included in
191-
# the sample, ``torchvision.transforms.v2`` will transform them both. Meaning, if you don't need both, dropping or
192-
# at least not wrapping the obsolete parts, can lead to a significant performance boost.
193-
#
194-
# For example, if you are using the ``PennFudanDataset`` for object detection, not wrapping the masks avoids
195-
# transforming them over and over again in the pipeline just to ultimately ignoring them. In general, it would be
196-
# even better to not load the masks at all, but this is not possible in this example, since the bounding boxes are
197-
# generated from the masks.
198-
#
199125
# .. _datapoint_unwrapping_behaviour:
200126
#
201127
# I had a Datapoint but now I have a Tensor. Help!

0 commit comments

Comments
 (0)