Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[prototype] Restore BC on perspective #6902

Merged
merged 9 commits into from
Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions test/prototype_transforms_kernel_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,38 +1184,38 @@ def reference_inputs_pad_bounding_box():
def sample_inputs_perspective_image_tensor():
for image_loader in make_image_loaders(sizes=["random"]):
for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: While correct, can we pass keywords here to make this call more readable?

Suggested change
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to make this change on the tests. I wouldn't want to make it on the main code because the arguments are positional not keyword args. Any concerns on that?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nono, fine with that. The test definition is just hard to read without knowledge of the exact signature of the kernel.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something, somewhere in the testing framework seems to send multiple parameters to the kernel. I don't want to waste too much time on this, I've reverted the named params change and we can do the optimization on a follow up as it doesn't affect the main implementation or the quality of the tests.



def reference_inputs_perspective_image_tensor():
for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
for image_loader, coefficients in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]:
yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs)
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients)


def sample_inputs_perspective_bounding_box():
for bounding_box_loader in make_bounding_box_loaders():
yield ArgsKwargs(
bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0]
bounding_box_loader, bounding_box_loader.format, None, None, coefficients=_PERSPECTIVE_COEFFS[0]
)


def sample_inputs_perspective_mask():
for mask_loader in make_mask_loaders(sizes=["random"]):
yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(mask_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])


def reference_inputs_perspective_mask():
for mask_loader, perspective_coeffs in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS
):
yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs)
yield ArgsKwargs(mask_loader, None, None, coefficients=perspective_coeffs)


def sample_inputs_perspective_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0])
yield ArgsKwargs(video_loader, None, None, coefficients=_PERSPECTIVE_COEFFS[0])


KERNEL_INFOS.extend(
Expand Down
6 changes: 3 additions & 3 deletions test/test_prototype_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,8 +917,8 @@ def test__get_params(self, mocker):
params = transform._get_params([image])

h, w = image.spatial_size
assert "perspective_coeffs" in params
assert len(params["perspective_coeffs"]) == 8
assert "coefficients" in params
assert len(params["coefficients"]) == 8

@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker):
Expand All @@ -940,7 +940,7 @@ def test__transform(self, distortion_scale, mocker):
params = transform._get_params([inpt])

fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)


class TestElasticTransform:
Expand Down
4 changes: 3 additions & 1 deletion test/test_prototype_transforms_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,9 @@ def _compute_expected_bbox(bbox, pcoeffs_):
output_bboxes = F.perspective_bounding_box(
bboxes,
bboxes_format,
perspective_coeffs=pcoeffs,
None,
None,
coefficients=pcoeffs,
)

if bboxes.ndim < 2:
Expand Down
8 changes: 6 additions & 2 deletions torchvision/prototype/features/_bounding_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,15 @@ def affine(

def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> BoundingBox:
output = self._F.perspective_bounding_box(self.as_subclass(torch.Tensor), self.format, perspective_coeffs)
output = self._F.perspective_bounding_box(
self.as_subclass(torch.Tensor), startpoints, endpoints, self.format, coefficients=coefficients
)
return BoundingBox.wrap_like(self, output)

def elastic(
Expand Down
4 changes: 3 additions & 1 deletion torchvision/prototype/features/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,11 @@ def affine(

def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> _Feature:
return self

Expand Down
11 changes: 9 additions & 2 deletions torchvision/prototype/features/_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,19 @@ def affine(

def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Image:
output = self._F.perspective_image_tensor(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)
return Image.wrap_like(self, output)

Expand Down
8 changes: 6 additions & 2 deletions torchvision/prototype/features/_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,15 @@ def affine(

def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Mask:
output = self._F.perspective_mask(self.as_subclass(torch.Tensor), perspective_coeffs, fill=fill)
output = self._F.perspective_mask(
self.as_subclass(torch.Tensor), startpoints, endpoints, fill=fill, coefficients=coefficients
)
return Mask.wrap_like(self, output)

def elastic(
Expand Down
11 changes: 9 additions & 2 deletions torchvision/prototype/features/_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,19 @@ def affine(

def perspective(
self,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> Video:
output = self._F.perspective_video(
self.as_subclass(torch.Tensor), perspective_coeffs, interpolation=interpolation, fill=fill
self.as_subclass(torch.Tensor),
startpoints,
endpoints,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)
return Video.wrap_like(self, output)

Expand Down
6 changes: 4 additions & 2 deletions torchvision/prototype/transforms/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,17 @@ def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
endpoints = [topleft, topright, botright, botleft]
perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)
return dict(perspective_coeffs=perspective_coeffs)
return dict(coefficients=perspective_coeffs)

def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
fill = self.fill[type(inpt)]
return F.perspective(
inpt,
**params,
None,
None,
fill=fill,
interpolation=self.interpolation,
**params,
)


Expand Down
64 changes: 50 additions & 14 deletions torchvision/prototype/transforms/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from torchvision.transforms.functional import (
_compute_resized_output_size as __compute_resized_output_size,
_get_inverse_affine_matrix,
_get_perspective_coeffs,
InterpolationMode,
pil_modes_mapping,
pil_to_tensor,
Expand Down Expand Up @@ -906,12 +907,30 @@ def crop(inpt: features.InputTypeJIT, top: int, left: int, height: int, width: i
return crop_image_pil(inpt, top, left, height, width)


def _perspective_coefficients(
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]],
) -> List[float]:
if coefficients is not None:
if len(coefficients) != 8:
raise ValueError("Argument coefficients should have 8 float values")
return coefficients
elif startpoints is not None and endpoints is not None:
return _get_perspective_coeffs(startpoints, endpoints)
else:
raise ValueError("Either the startpoints/endpoints or the coefficients must have non `None` values.")
datumbox marked this conversation as resolved.
Show resolved Hide resolved


def perspective_image_tensor(
image: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
if image.numel() == 0:
return image

Expand All @@ -934,21 +953,24 @@ def perspective_image_tensor(
@torch.jit.unused
def perspective_image_pil(
image: PIL.Image.Image,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BICUBIC,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> PIL.Image.Image:
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)
return _FP.perspective(image, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill)


def perspective_bounding_box(
bounding_box: torch.Tensor,
format: features.BoundingBoxFormat,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:

if len(perspective_coeffs) != 8:
raise ValueError("Argument perspective_coeffs should have 8 float values")
perspective_coeffs = _perspective_coefficients(startpoints, endpoints, coefficients)

original_shape = bounding_box.shape
bounding_box = (
Expand Down Expand Up @@ -1029,8 +1051,10 @@ def perspective_bounding_box(

def perspective_mask(
mask: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
if mask.ndim < 3:
mask = mask.unsqueeze(0)
Expand All @@ -1039,7 +1063,7 @@ def perspective_mask(
needs_squeeze = False

output = perspective_image_tensor(
mask, perspective_coeffs=perspective_coeffs, interpolation=InterpolationMode.NEAREST, fill=fill
mask, startpoints, endpoints, interpolation=InterpolationMode.NEAREST, fill=fill, coefficients=coefficients
)

if needs_squeeze:
Expand All @@ -1050,25 +1074,37 @@ def perspective_mask(

def perspective_video(
video: torch.Tensor,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> torch.Tensor:
return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_tensor(
video, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)


def perspective(
inpt: features.InputTypeJIT,
perspective_coeffs: List[float],
startpoints: Optional[List[List[int]]],
endpoints: Optional[List[List[int]]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: features.FillTypeJIT = None,
coefficients: Optional[List[float]] = None,
) -> features.InputTypeJIT:
if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_tensor(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
elif isinstance(inpt, features._Feature):
return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill)
return inpt.perspective(
startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)
else:
return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill)
return perspective_image_pil(
inpt, startpoints, endpoints, interpolation=interpolation, fill=fill, coefficients=coefficients
)


def elastic_image_tensor(
Expand Down