diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 99e8c6b4354c5..d6d08c05208ee 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -63,7 +63,7 @@
     ]
     _DropoutMode: TypeAlias = Literal['upscale_in_train', 'downscale_in_infer']
     _PaddingTensorMode: TypeAlias = Literal[
-        "constant", "reflect", "replicate", "circular"
+        "zeros", "constant", "reflect", "replicate", "circular"
     ]
     _PaddingSizeMode: TypeAlias = Literal[  # noqa: PYI047
         'valid', 'same', 'VALID', 'SAME'
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index e48cc13ba330b..3a83dbd3965ad 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -26,6 +26,7 @@
     DataLayout1D,
     DataLayout2D,
     DataLayout3D,
+    DataLayoutND,
     Size1,
     Size2,
     Size3,
@@ -127,20 +128,20 @@ def _update_padding_nd(padding, channel_last, num_dims):
 
 
 def _conv_nd(
-    x,
-    weight,
-    bias=None,
-    stride=1,
-    padding=0,
+    x: Tensor,
+    weight: Tensor,
+    bias: Tensor | None = None,
+    stride: int | Sequence[int] = 1,
+    padding: _PaddingSizeMode | int | Sequence[int] | Sequence[Size2] = 0,
     padding_algorithm=None,
-    dilation=1,
-    groups=1,
-    data_format="NCHW",
-    channel_dim=1,
-    op_type="conv2d",
-    use_cudnn=True,
-    name=None,
-):
+    dilation: int | Sequence[int] = 1,
+    groups: int = 1,
+    data_format: DataLayoutND = "NCHW",
+    channel_dim: int = 1,
+    op_type: str = "conv2d",
+    use_cudnn: bool = True,
+    name: str | None = None,
+) -> Tensor:
     # Due to the poor performance of NHWC, we transpose the input to NCHW.
     if in_dynamic_or_pir_mode() and op_type == "conv2d":
         pre_bias = _C_ops.conv2d(
@@ -777,7 +778,7 @@ def conv1d_transpose(
     weight: Tensor,
     bias: Tensor | None = None,
     stride: Size1 = 1,
-    padding: _PaddingSizeMode | Size1 | Size2 = 0,
+    padding: _PaddingSizeMode | Size1 | Size2 | Sequence[Size2] = 0,
     output_padding: Size1 = 0,
     groups: int = 1,
     dilation: Size1 = 1,
@@ -1356,7 +1357,7 @@ def conv3d(
     weight: Tensor,
     bias: Tensor | None = None,
     stride: Size3 = 1,
-    padding=0,
+    padding: _PaddingSizeMode | Size3 | Size6 | Sequence[Size2] = 0,
     dilation: Size3 = 1,
     groups: int = 1,
     data_format: DataLayout3D = "NCDHW",
diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py
index 2990969ef0503..77d5194daadd4 100644
--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO: define classes of convolutional neural network
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Sequence
 
 import numpy as np
 
@@ -29,6 +31,24 @@
 from ..initializer import Normal
 from .layers import Layer
 
+if TYPE_CHECKING:
+    from paddle import Tensor
+    from paddle._typing import (
+        DataLayout1D,
+        DataLayout2D,
+        DataLayout3D,
+        DataLayoutND,
+        ParamAttrLike,
+        Size1,
+        Size2,
+        Size3,
+        Size4,
+        Size6,
+    )
+
+    from ..functional.common import _PaddingSizeMode, _PaddingTensorMode
+
+
 __all__ = []
 
 
@@ -47,23 +67,28 @@ def _reverse_repeat_list(t, n):
 
 
 class _ConvNd(Layer):
+    weight: Tensor
+    bias: Tensor
+
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        transposed,
-        dims,
-        stride=1,
-        padding=0,
-        padding_mode='zeros',
-        output_padding=0,
-        dilation=1,
-        groups=1,
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCHW",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int | Sequence[int],
+        transposed: bool,
+        dims: int,
+        stride: int | Sequence[int] = 1,
+        padding: _PaddingSizeMode | int | Sequence[int] | Sequence[Size2] = 0,
+        padding_mode: _PaddingTensorMode = 'zeros',
+        output_padding: (
+            _PaddingSizeMode | int | Sequence[int] | Sequence[Size2]
+        ) = 0,
+        dilation: int | Sequence[int] = 1,
+        groups: int = 1,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayoutND = "NCHW",
+    ) -> None:
         super().__init__()
         assert (
             weight_attr is not False
@@ -328,18 +353,18 @@ class Conv1D(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode='zeros',
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCL",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size1,
+        stride: Size1 = 1,
+        padding: _PaddingSizeMode | Size1 | Size2 | Sequence[Size2] = 0,
+        dilation: Size1 = 1,
+        groups: int = 1,
+        padding_mode: _PaddingTensorMode = 'zeros',
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout1D = "NCL",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -356,7 +381,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         padding = 0
         if self._padding_mode != "zeros":
             x = F.pad(
@@ -430,12 +455,12 @@ class Conv1DTranspose(_ConvNd):
            L_{out} &\in [ L^\prime_{out}, L^\prime_{out} + stride ]
 
     Note:
-          The conv1d_transpose can be seen as the backward of the conv1d. For conv1d,
-          when stride > 1, conv1d maps multiple input shape to the same output shape,
-          so for conv1d_transpose, when stride > 1, input shape maps multiple output shape.
-          If output_size is None, :math:`L_{out} = L^\prime_{out}`;
-          else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}`
-          and :math:`L^\prime_{out} + stride`.
+        The conv1d_transpose can be seen as the backward of the conv1d. For conv1d,
+        when stride > 1, conv1d maps multiple input shape to the same output shape,
+        so for conv1d_transpose, when stride > 1, input shape maps multiple output shape.
+        If output_size is None, :math:`L_{out} = L^\prime_{out}`;
+        else, the :math:`L_{out}` of the output size must between :math:`L^\prime_{out}`
+        and :math:`L^\prime_{out} + stride`.
 
     Args:
         in_channels(int): The number of channels in the input image.
@@ -449,12 +474,12 @@ class Conv1DTranspose(_ConvNd):
             If stride is a tuple/list, it must contain one integer, (stride_size).
             Default: stride = 1.
         padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
-             `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
-             string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
-             If `padding` is a tuple or list, it could be in two forms:
-             `[pad]` or `[pad_left, pad_right]`. Default: padding = 0.
+            `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
+            string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
+            If `padding` is a tuple or list, it could be in two forms:
+            `[pad]` or `[pad_left, pad_right]`. Default: padding = 0.
         output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension.
-             If it is a tuple/list, it must contain one integer. Default: 0.
+            If it is a tuple/list, it must contain one integer. Default: 0.
         groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by
             grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
             when group=2, the first half of the filters is only connected to the
@@ -488,7 +513,7 @@ class Conv1DTranspose(_ConvNd):
         - output(Tensor): 3-D tensor with same shape as input x.
 
     Examples:
-       .. code-block:: python
+        .. code-block:: python
 
             >>> import paddle
             >>> from paddle.nn import Conv1DTranspose
@@ -515,18 +540,18 @@ class Conv1DTranspose(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        output_padding=0,
-        groups=1,
-        dilation=1,
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCL",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size1,
+        stride: Size1 = 1,
+        padding: _PaddingSizeMode | Size1 | Size2 | Sequence[Size2] = 0,
+        output_padding: _PaddingSizeMode | Size1 | Size2 | Sequence[Size2] = 0,
+        groups: int = 1,
+        dilation: Size1 = 1,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout1D = "NCL",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -543,7 +568,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x, output_size=None):
+    def forward(self, x: Tensor, output_size: Size1 | None = None) -> Tensor:
         out = F.conv1d_transpose(
             x,
             self.weight,
@@ -671,18 +696,18 @@ class Conv2D(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode='zeros',
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCHW",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size2,
+        stride: Size2 = 1,
+        padding: _PaddingSizeMode | Size2 | Size4 | Sequence[Size2] = 0,
+        dilation: Size2 = 1,
+        groups: int = 1,
+        padding_mode: _PaddingTensorMode = 'zeros',
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout2D = "NCHW",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -699,7 +724,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         if self._padding_mode != 'zeros':
             x = F.pad(
                 x,
@@ -758,7 +783,7 @@ class Conv2DTranspose(_ConvNd):
     * :math:`Out`: Output value, a 4-D ``Tensor`` with NCHW or NHWC format, the shape of :math:`Out` and :math:`X` may be different.
 
     Note:
-     If output_size is None, :math:`H_{out}` = :math:`H^\prime_{out}` , :math:`W_{out}` = :math:`W^\prime_{out}`. Otherwise, the specified output_size_height (the height of the output feature layer) :math:`H_{out}` should be between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[0]` (excluding :math:`H^\prime_{out} + strides[0]` ).
+        If output_size is None, :math:`H_{out}` = :math:`H^\prime_{out}` , :math:`W_{out}` = :math:`W^\prime_{out}`. Otherwise, the specified output_size_height (the height of the output feature layer) :math:`H_{out}` should be between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[0]` (excluding :math:`H^\prime_{out} + strides[0]` ).
 
     Parameters:
         in_channels(int): The number of channels in the input image.
@@ -829,7 +854,7 @@ class Conv2DTranspose(_ConvNd):
 
     Examples:
 
-       .. code-block:: python
+        .. code-block:: python
 
             >>> import paddle
             >>> import paddle.nn as nn
@@ -846,18 +871,18 @@ class Conv2DTranspose(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        output_padding=0,
-        dilation=1,
-        groups=1,
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCHW",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size2,
+        stride: Size2 = 1,
+        padding: _PaddingSizeMode | Size2 | Size4 | Sequence[Size2] = 0,
+        output_padding: _PaddingSizeMode | Size2 | Size4 | Sequence[Size2] = 0,
+        dilation: Size2 = 1,
+        groups: int = 1,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout2D = "NCHW",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -874,7 +899,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x, output_size=None):
+    def forward(self, x: Tensor, output_size: Size2 | None = None) -> Tensor:
         if output_size is None:
             output_padding = self.output_padding
         else:
@@ -1003,18 +1028,18 @@ class Conv3D(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode='zeros',
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCDHW",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size3,
+        stride: Size3 = 1,
+        padding: _PaddingSizeMode | Size3 | Size6 | Sequence[Size2] = 0,
+        dilation: Size3 = 1,
+        groups: int = 1,
+        padding_mode: _PaddingTensorMode = 'zeros',
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout3D = "NCDHW",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -1031,7 +1056,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         if self._padding_mode != 'zeros':
             x = F.pad(
                 x,
@@ -1165,7 +1190,7 @@ class Conv3DTranspose(_ConvNd):
 
     Examples:
 
-       .. code-block:: python
+        .. code-block:: python
 
             >>> import paddle
             >>> import paddle.nn as nn
@@ -1182,18 +1207,18 @@ class Conv3DTranspose(_ConvNd):
 
     def __init__(
         self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        output_padding=0,
-        dilation=1,
-        groups=1,
-        weight_attr=None,
-        bias_attr=None,
-        data_format="NCDHW",
-    ):
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Size3,
+        stride: Size3 = 1,
+        padding: _PaddingSizeMode | Size3 | Size6 | Sequence[Size2] = 0,
+        output_padding: _PaddingSizeMode | Size3 | Size6 | Sequence[Size2] = 0,
+        dilation: Size3 = 1,
+        groups: int = 1,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayout3D = "NCDHW",
+    ) -> None:
         super().__init__(
             in_channels,
             out_channels,
@@ -1210,7 +1235,7 @@ def __init__(
             data_format=data_format,
         )
 
-    def forward(self, x, output_size=None):
+    def forward(self, x: Tensor, output_size: Size3 | None = None) -> Tensor:
         if output_size is None:
             output_padding = self.output_padding
         else: