[Typing][A-83] Add type annotations for `paddle/vision/transforms/fun…

…ctional.py` (#65560) --------- Co-authored-by: SigureMo <sigure.qaq@gmail.com> Co-authored-by: gouzi <530971494@qq.com>
PaddlePaddle · Jun 30, 2024 · 8ff2ac1 · 8ff2ac1
1 parent 5cc9895
commit 8ff2ac1
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 48 deletions.
diff --git a/python/paddle/_typing/__init__.py b/python/paddle/_typing/__init__.py
@@ -28,6 +28,7 @@
     TensorIndex as TensorIndex,
     TensorLike as TensorLike,
     TensorOrTensors as TensorOrTensors,
+    unreached as unreached,
 )
 
 # Device

diff --git a/python/paddle/_typing/basic.py b/python/paddle/_typing/basic.py
@@ -26,7 +26,7 @@
 
 import numpy as np
 import numpy.typing as npt
-from typing_extensions import TypeAlias
+from typing_extensions import Never, TypeAlias
 
 from .backport import EllipsisType
 
@@ -63,3 +63,10 @@
 ParamAttrLike: TypeAlias = Union[
     "ParamAttr", "Initializer", "WeightDecayRegularizer", str, bool
 ]
+
+
+def unreached() -> Never:
+    """Mark a code path as unreachable.
+    Refer to https://typing.readthedocs.io/en/latest/source/unreachable.html#marking-code-as-unreachable
+    """
+    raise RuntimeError("Unreachable code path")
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
@@ -66,7 +66,7 @@
         DTypeLike,
         ShapeLike,
     )
-    from paddle.nn import Layer
+
 
 __all__ = []
 

diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py
@@ -12,13 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import math
 import numbers
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from PIL import Image
 
 import paddle
+from paddle._typing import unreached
 
 from ...base.framework import Variable
 from . import (
@@ -27,25 +31,49 @@
     functional_tensor as F_t,
 )
 
+if TYPE_CHECKING:
+    from typing import Literal, TypeGuard, TypeVar, Union
+
+    import numpy.typing as npt
+    from PIL.Image import Image as PILImage
+    from typing_extensions import TypeAlias
+
+    from paddle import Tensor
+    from paddle._typing import DataLayoutImage, Size2, Size3, Size4
+
+    _InterpolationPil: TypeAlias = Literal[
+        "nearest", "bilinear", "bicubic", "lanczos", "hamming"
+    ]
+    _InterpolationCv2: TypeAlias = Literal[
+        "nearest", "bilinear", "area", "bicubic", "lanczos"
+    ]
+    _PaddingMode: TypeAlias = Literal[
+        "constant", "edge", "reflect", "symmetric"
+    ]
+    _ImageDataT = TypeVar("_ImageDataT", Tensor, PILImage, npt.NDArray[Any])
+    _ImageDataType = Union[Tensor, PILImage, npt.NDArray[Any]]
+
 __all__ = []
 
 
-def _is_pil_image(img):
+def _is_pil_image(img: _ImageDataType) -> TypeGuard[PILImage]:
     return isinstance(img, Image.Image)
 
 
-def _is_tensor_image(img):
+def _is_tensor_image(img: _ImageDataType) -> TypeGuard[Tensor]:
     """
     Return True if img is a Tensor for dynamic mode or Variable for static graph mode.
     """
     return isinstance(img, (paddle.Tensor, Variable))
 
 
-def _is_numpy_image(img):
+def _is_numpy_image(img: _ImageDataType) -> TypeGuard[npt.NDArray[Any]]:
     return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
 
 
-def to_tensor(pic, data_format='CHW'):
+def to_tensor(
+    pic: PILImage | npt.NDArray[Any], data_format: DataLayoutImage = 'CHW'
+) -> Tensor:
     """Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.
 
     Converts a PIL.Image or numpy.ndarray (H x W x C) to a paddle.Tensor of shape (C x H x W).
@@ -97,7 +125,11 @@ def to_tensor(pic, data_format='CHW'):
         return pic if data_format.lower() == 'chw' else pic.transpose((1, 2, 0))
 
 
-def resize(img, size, interpolation='bilinear'):
+def resize(
+    img: _ImageDataT,
+    size: Size2,
+    interpolation: _InterpolationPil | _InterpolationCv2 = 'bilinear',
+) -> _ImageDataT:
     """
     Resizes the image to given size
 
@@ -153,7 +185,12 @@ def resize(img, size, interpolation='bilinear'):
         return F_cv2.resize(img, size, interpolation)
 
 
-def pad(img, padding, fill=0, padding_mode='constant'):
+def pad(
+    img: _ImageDataT,
+    padding: Size2 | Size4,
+    fill: Size3 = 0,
+    padding_mode: _PaddingMode = 'constant',
+) -> _ImageDataT:
     """
     Pads the given PIL.Image or numpy.array or paddle.Tensor on all sides with specified padding mode and fill value.
 
@@ -217,7 +254,9 @@ def pad(img, padding, fill=0, padding_mode='constant'):
         return F_cv2.pad(img, padding, fill, padding_mode)
 
 
-def crop(img, top, left, height, width):
+def crop(
+    img: _ImageDataT, top: int, left: int, height: int, width: int
+) -> _ImageDataT:
     """Crops the given Image.
 
     Args:
@@ -259,7 +298,7 @@ def crop(img, top, left, height, width):
         return F_cv2.crop(img, top, left, height, width)
 
 
-def center_crop(img, output_size):
+def center_crop(img: _ImageDataT, output_size: Size2) -> _ImageDataT:
     """Crops the given Image and resize it to desired size.
 
     Args:
@@ -297,7 +336,7 @@ def center_crop(img, output_size):
         return F_cv2.center_crop(img, output_size)
 
 
-def hflip(img):
+def hflip(img: _ImageDataT) -> _ImageDataT:
     """Horizontally flips the given Image or np.array or paddle.Tensor.
 
     Args:
@@ -334,7 +373,7 @@ def hflip(img):
         return F_cv2.hflip(img)
 
 
-def vflip(img):
+def vflip(img: _ImageDataT) -> _ImageDataT:
     """Vertically flips the given Image or np.array or paddle.Tensor.
 
     Args:
@@ -371,7 +410,9 @@ def vflip(img):
         return F_cv2.vflip(img)
 
 
-def adjust_brightness(img, brightness_factor):
+def adjust_brightness(
+    img: _ImageDataT, brightness_factor: float
+) -> _ImageDataT:
     """Adjusts brightness of an Image.
 
     Args:
@@ -423,7 +464,7 @@ def adjust_brightness(img, brightness_factor):
         return F_t.adjust_brightness(img, brightness_factor)
 
 
-def adjust_contrast(img, contrast_factor):
+def adjust_contrast(img: _ImageDataT, contrast_factor: float) -> _ImageDataT:
     """Adjusts contrast of an Image.
 
     Args:
@@ -462,7 +503,9 @@ def adjust_contrast(img, contrast_factor):
         return F_t.adjust_contrast(img, contrast_factor)
 
 
-def adjust_saturation(img, saturation_factor):
+def adjust_saturation(
+    img: _ImageDataT, saturation_factor: float
+) -> _ImageDataT:
     """Adjusts color saturation of an image.
 
     Args:
@@ -502,7 +545,7 @@ def adjust_saturation(img, saturation_factor):
         return F_t.adjust_saturation(img, saturation_factor)
 
 
-def adjust_hue(img, hue_factor):
+def adjust_hue(img: _ImageDataT, hue_factor: float) -> _ImageDataT:
     """Adjusts hue of an image.
 
     The image hue is adjusted by converting the image to HSV and
@@ -583,15 +626,15 @@ def _get_affine_matrix(center, angle, translate, scale, shear):
 
 
 def affine(
-    img,
-    angle,
-    translate,
-    scale,
-    shear,
-    interpolation="nearest",
-    fill=0,
-    center=None,
-):
+    img: _ImageDataT,
+    angle: float,
+    translate: list[float] | tuple[float, float],
+    scale: float,
+    shear: list[float] | tuple[float, float],
+    interpolation: _InterpolationPil | _InterpolationCv2 = "nearest",
+    fill: Size3 = 0,
+    center: list[float] | tuple[float, float] | None = None,
+) -> _ImageDataT:
     """Apply affine transformation on the image.
 
     Args:
@@ -613,7 +656,7 @@ def affine(
             - "bicubic": cv2.INTER_CUBIC
         fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
-        center (2-tuple, optional): Optional center of rotation, (x, y).
+        center (tuple|None, optional): Optional center of rotation, (x, y).
             Origin is the upper left corner.
             Default is the center of the image.
 
@@ -715,10 +758,17 @@ def affine(
         matrix = _get_affine_matrix(center_f, angle, translate_f, scale, shear)
         return F_t.affine(img, matrix, interpolation, fill)
 
+    unreached()
+
 
 def rotate(
-    img, angle, interpolation="nearest", expand=False, center=None, fill=0
-):
+    img: _ImageDataT,
+    angle: float,
+    interpolation: _InterpolationPil | _InterpolationCv2 = "nearest",
+    expand: bool = False,
+    center: list[float] | tuple[float, float] | None = None,
+    fill: Size3 = 0,
+) -> _ImageDataT:
     """Rotates the image by angle.
 
 
@@ -739,10 +789,10 @@ def rotate(
             If true, expands the output image to make it large enough to hold the entire rotated image.
             If false or omitted, make the output image the same size as the input image.
             Note that the expand flag assumes rotation around the center and no translation.
-        center (2-list|2-tuple, optional): Optional center of rotation.
+        center (list|tuple|None, optional): Optional center of rotation.
             Origin is the upper left corner.
             Default is the center of the image.
-        fill (3-list|3-tuple or int, optional): RGB pixel fill value for area outside the rotated image.
+        fill (list|tuple or int, optional): RGB pixel fill value for area outside the rotated image.
             If int, it is used for all channels respectively. Default value is 0.
 
 
@@ -827,7 +877,13 @@ def _get_perspective_coeffs(startpoints, endpoints):
     return output
 
 
-def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
+def perspective(
+    img: _ImageDataT,
+    startpoints: list[list[int]],
+    endpoints: list[list[int]],
+    interpolation: _InterpolationPil | _InterpolationCv2 = 'nearest',
+    fill: Size3 = 0,
+) -> _ImageDataT:
     """Perform perspective transform of the given image.
 
     Args:
@@ -885,7 +941,7 @@ def perspective(img, startpoints, endpoints, interpolation='nearest', fill=0):
         )
 
 
-def to_grayscale(img, num_output_channels=1):
+def to_grayscale(img: _ImageDataT, num_output_channels: int = 1) -> _ImageDataT:
     """Converts image to grayscale version of image.
 
     Args:
@@ -926,14 +982,20 @@ def to_grayscale(img, num_output_channels=1):
         return F_cv2.to_grayscale(img, num_output_channels)
 
 
-def normalize(img, mean, std, data_format='CHW', to_rgb=False):
+def normalize(
+    img: _ImageDataT,
+    mean: list[float] | tuple[float, float, float],
+    std: list[float] | tuple[float, float, float],
+    data_format: DataLayoutImage = 'CHW',
+    to_rgb: bool = False,
+) -> _ImageDataT:
     """Normalizes a tensor or image with mean and standard deviation.
 
     Args:
         img (PIL.Image|np.array|paddle.Tensor): input data to be normalized.
         mean (list|tuple): Sequence of means for each channel.
         std (list|tuple): Sequence of standard deviations for each channel.
-        data_format (str, optional): Data format of input img, should be 'HWC' or
+        data_format (str|None, optional): Data format of input img, should be 'HWC' or
             'CHW'. Default: 'CHW'.
         to_rgb (bool, optional): Whether to convert to rgb. If input is tensor,
             this option will be ignored. Default: False.
@@ -966,7 +1028,15 @@ def normalize(img, mean, std, data_format='CHW', to_rgb=False):
         return F_cv2.normalize(img, mean, std, data_format, to_rgb)
 
 
-def erase(img, i, j, h, w, v, inplace=False):
+def erase(
+    img: _ImageDataT,
+    i: int,
+    j: int,
+    h: int,
+    w: int,
+    v: npt.NDArray[Any] | Tensor,
+    inplace: bool = False,
+) -> _ImageDataT:
     """Erase the pixels of selected area in input image with given value.
 
     Args:

diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
@@ -45,15 +45,12 @@
     _TransformInputKeys: TypeAlias = Sequence[
         Literal["image", "coords", "boxes", "mask"]
     ]
-    _InterpolationPil: TypeAlias = Literal[
-        "nearest", "bilinear", "bicubic", "lanczos", "hamming"
-    ]
-    _InterpolationCv2: TypeAlias = Literal[
-        "nearest", "bilinear", "area", "bicubic", "lanczos"
-    ]
-    _PaddingMode: TypeAlias = Literal[
-        "constant", "edge", "reflect", "symmetric"
-    ]
+    from .functional import (
+        _InterpolationCv2,
+        _InterpolationPil,
+        _PaddingMode,
+    )
+
 
 _InputT = TypeVar(
     "_InputT", "Tensor", "PILImage", "npt.NDArray[Any]", contravariant=True
@@ -1578,10 +1575,10 @@ class RandomAffine(BaseTransform[_InputT, _RetT]):
             - "bicubic": cv2.INTER_CUBIC
         fill (int|list|tuple, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
-        center (2-tuple, optional): Optional center of rotation, (x, y).
+        center (tuple|None, optional): Optional center of rotation, (x, y).
             Origin is the upper left corner.
             Default is the center of the image.
-        keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
+        keys (list[str]|tuple[str]|None, optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
         - img(PIL.Image|np.ndarray|Paddle.Tensor): The input image with shape (H x W x C).
@@ -1616,7 +1613,7 @@ class RandomAffine(BaseTransform[_InputT, _RetT]):
     )
     interpolation: _InterpolationPil | _InterpolationCv2
     fill: Size3
-    center: list[float] | tuple[float, float]
+    center: list[float] | tuple[float, float] | None
 
     def __init__(
         self,
@@ -1632,7 +1629,7 @@ def __init__(
         ) = None,
         interpolation: _InterpolationPil | _InterpolationCv2 = 'nearest',
         fill: Size3 = 0,
-        center: list[float] | tuple[float, float] = None,
+        center: list[float] | tuple[float, float] | None = None,
         keys: _TransformInputKeys | None = None,
     ) -> None:
         self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))