Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract make_* functions out of make_*_loader #7717

Merged
merged 20 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 124 additions & 66 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,28 @@ def get_num_channels(color_space):
return num_channels


def make_image(
spatial_size, *, color_space="RGB", batch_dims=(), dtype=None, device="cpu", memory_format=torch.contiguous_format
pmeier marked this conversation as resolved.
Show resolved Hide resolved
):
spatial_size = _parse_spatial_size(spatial_size)
pmeier marked this conversation as resolved.
Show resolved Hide resolved
num_channels = get_num_channels(color_space)
dtype = dtype or torch.uint8
max_value = get_max_value(dtype)

data = torch.testing.make_tensor(
(*batch_dims, num_channels, *spatial_size),
low=0,
high=max_value,
dtype=dtype,
device=device,
memory_format=memory_format,
)
if color_space in {"GRAY_ALPHA", "RGBA"}:
data[..., -1, :, :] = max_value

return datapoints.Image(data)


def make_image_loader(
size="random",
*,
Expand All @@ -501,24 +523,25 @@ def make_image_loader(
constant_alpha=True,
memory_format=torch.contiguous_format,
):
if not constant_alpha:
raise ValueError("This should never happen")
pmeier marked this conversation as resolved.
Show resolved Hide resolved
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)

def fn(shape, dtype, device, memory_format):
max_value = get_max_value(dtype)
data = torch.testing.make_tensor(
shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format
*batch_dims, _, height, width = shape
return make_image(
(height, width),
color_space=color_space,
batch_dims=batch_dims,
dtype=dtype,
device=device,
memory_format=memory_format,
)
if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha:
data[..., -1, :, :] = max_value
return datapoints.Image(data)

return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format)


make_image = from_loader(make_image_loader)


def make_image_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
Expand Down Expand Up @@ -601,59 +624,65 @@ def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
).reshape(low.shape)


def make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size="random", batch_dims=(), dtype=None, device="cpu"
):
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]

spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
pmeier marked this conversation as resolved.
Show resolved Hide resolved
dtype = dtype or torch.float32

if any(dim == 0 for dim in batch_dims):
return datapoints.BoundingBox(
torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)

height, width = spatial_size
if format == datapoints.BoundingBoxFormat.XYXY:
x1 = torch.randint(0, width // 2, batch_dims)
y1 = torch.randint(0, height // 2, batch_dims)
x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
parts = (x1, y1, x2, y2)
elif format == datapoints.BoundingBoxFormat.XYWH:
x = torch.randint(0, width // 2, batch_dims)
y = torch.randint(0, height // 2, batch_dims)
w = randint_with_tensor_bounds(1, width - x)
h = randint_with_tensor_bounds(1, height - y)
parts = (x, y, w, h)
elif format == datapoints.BoundingBoxFormat.CXCYWH:
cx = torch.randint(1, width - 1, batch_dims)
cy = torch.randint(1, height - 1, batch_dims)
w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1)
parts = (cx, cy, w, h)
else:
raise ValueError(f"Can't make bounding box in format {format}")

return datapoints.BoundingBox(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
)


def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32):
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]
if format not in {
datapoints.BoundingBoxFormat.XYXY,
datapoints.BoundingBoxFormat.XYWH,
datapoints.BoundingBoxFormat.CXCYWH,
}:
raise pytest.UsageError(f"Can't make bounding box in format {format}")

spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")

def fn(shape, dtype, device):
*extra_dims, num_coordinates = shape
*batch_dims, num_coordinates = shape
if num_coordinates != 4:
raise pytest.UsageError()

if any(dim == 0 for dim in extra_dims):
return datapoints.BoundingBox(
torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)

height, width = spatial_size

if format == datapoints.BoundingBoxFormat.XYXY:
x1 = torch.randint(0, width // 2, extra_dims)
y1 = torch.randint(0, height // 2, extra_dims)
x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
parts = (x1, y1, x2, y2)
elif format == datapoints.BoundingBoxFormat.XYWH:
x = torch.randint(0, width // 2, extra_dims)
y = torch.randint(0, height // 2, extra_dims)
w = randint_with_tensor_bounds(1, width - x)
h = randint_with_tensor_bounds(1, height - y)
parts = (x, y, w, h)
else: # format == features.BoundingBoxFormat.CXCYWH:
cx = torch.randint(1, width - 1, extra_dims)
cy = torch.randint(1, height - 1, extra_dims)
w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1)
parts = (cx, cy, w, h)

return datapoints.BoundingBox(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
return make_bounding_box(
format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
)

return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)


make_bounding_box = from_loader(make_bounding_box_loader)


def make_bounding_box_loaders(
*,
extra_dims=DEFAULT_EXTRA_DIMS,
Expand All @@ -672,21 +701,32 @@ class MaskLoader(TensorLoader):
pass


def make_detection_mask(spatial_size, *, num_objects="random", batch_dims=(), dtype=None, device="cpu"):
"""Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks"""
spatial_size = _parse_spatial_size(spatial_size)
num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects
dtype = dtype or torch.bool

data = torch.testing.make_tensor(
(*batch_dims, num_objects, *spatial_size), low=0, high=2, dtype=dtype, device=device
)
return datapoints.Mask(data)


def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8):
# This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
size = _parse_spatial_size(size)
num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects

def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device)
return datapoints.Mask(data)
*batch_dims, num_objects, height, width = shape
return make_detection_mask(
(height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device
)

return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype)


make_detection_mask = from_loader(make_detection_mask_loader)


def make_detection_mask_loaders(
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
Expand All @@ -700,19 +740,29 @@ def make_detection_mask_loaders(
make_detection_masks = from_loaders(make_detection_mask_loaders)


def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
size = _parse_spatial_size(size)
def make_segmentation_mask(spatial_size, *, num_categories="random", batch_dims=(), dtype=None, device="cpu"):
"""Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value"""
spatial_size = _parse_spatial_size(spatial_size)
num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories
dtype = dtype or torch.uint8

data = torch.testing.make_tensor(
(*batch_dims, *spatial_size), low=0, high=num_categories, dtype=dtype, device=device
)
return datapoints.Mask(data)

def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device)
return datapoints.Mask(data)

return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype)
def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
spatial_size = _parse_spatial_size(size)

def fn(shape, dtype, device):
*batch_dims, height, width = shape
return make_segmentation_mask(
(height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device
)

make_segmentation_mask = from_loader(make_segmentation_mask_loader)
return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype)


def make_segmentation_mask_loaders(
Expand Down Expand Up @@ -750,6 +800,11 @@ class VideoLoader(ImageLoader):
pass


def make_video(spatial_size, *, num_frames="random", batch_dims=(), **kwargs):
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames
return datapoints.Video(make_image(spatial_size, batch_dims=(*batch_dims, num_frames), **kwargs))


def make_video_loader(
size="random",
*,
Expand All @@ -762,17 +817,20 @@ def make_video_loader(
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames

def fn(shape, dtype, device, memory_format):
video = make_image(
size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format
*batch_dims, num_frames, _, height, width = shape
return make_video(
(height, width),
num_frames=num_frames,
color_space=color_space,
batch_dims=batch_dims,
dtype=dtype,
device=device,
memory_format=memory_format,
)
return datapoints.Video(video)

return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)


make_video = from_loader(make_video_loader)


def make_video_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
Expand Down
45 changes: 23 additions & 22 deletions test/test_prototype_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from common_utils import (
assert_equal,
DEFAULT_EXTRA_DIMS,
DEFAULT_PORTRAIT_SPATIAL_SIZE,
make_bounding_box,
make_detection_mask,
make_image,
Expand Down Expand Up @@ -79,8 +80,8 @@ def test_mixup_cutmix(transform, input):
for unsup_data in [
make_label(),
make_bounding_box(format="XYXY"),
make_detection_mask(),
make_segmentation_mask(),
make_detection_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE),
make_segmentation_mask(DEFAULT_PORTRAIT_SPATIAL_SIZE),
]:
input_copy["unsupported"] = unsup_data
with pytest.raises(TypeError, match=err_msg):
Expand Down Expand Up @@ -215,8 +216,8 @@ def test__get_params(self, mocker):
transform = transforms.FixedSizeCrop(size=crop_size)

flat_inputs = [
make_image(size=spatial_size, color_space="RGB"),
make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape),
make_image(spatial_size=spatial_size, color_space="RGB"),
make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
]
params = transform._get_params(flat_inputs)

Expand Down Expand Up @@ -312,9 +313,9 @@ def test__transform_culling(self, mocker):
)

bounding_boxes = make_bounding_box(
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
)
masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,))
masks = make_detection_mask(spatial_size=spatial_size, batch_dims=(batch_size,))
labels = make_label(extra_dims=(batch_size,))

transform = transforms.FixedSizeCrop((-1, -1))
Expand Down Expand Up @@ -350,7 +351,7 @@ def test__transform_bounding_box_clamping(self, mocker):
)

bounding_box = make_bounding_box(
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
)
mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box")

Expand Down Expand Up @@ -389,9 +390,9 @@ class TestPermuteDimensions:
)
def test_call(self, dims, inverse_dims):
sample = dict(
image=make_image(),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
video=make_video(),
image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE),
video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE),
str="str",
int=0,
)
Expand Down Expand Up @@ -433,9 +434,9 @@ class TestTransposeDimensions:
)
def test_call(self, dims):
sample = dict(
image=make_image(),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
video=make_video(),
image=make_image(DEFAULT_PORTRAIT_SPATIAL_SIZE),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE),
video=make_video(DEFAULT_PORTRAIT_SPATIAL_SIZE),
str="str",
int=0,
)
Expand Down Expand Up @@ -494,29 +495,29 @@ def make_datapoints():
size = (600, 800)
num_objects = 22

pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
pil_image = to_image_pil(make_image(spatial_size=size, color_space="RGB"))
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
"masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long),
}

yield (pil_image, target)

tensor_image = torch.Tensor(make_image(size=size, color_space="RGB"))
tensor_image = torch.Tensor(make_image(spatial_size=size, color_space="RGB"))
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
"masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long),
}

yield (tensor_image, target)

datapoint_image = make_image(size=size, color_space="RGB")
datapoint_image = make_image(spatial_size=size, color_space="RGB")
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
"masks": make_detection_mask(spatial_size=size, num_objects=num_objects, dtype=torch.long),
}

yield (datapoint_image, target)
Expand Down
Loading