diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index c3c60c7088995..89b3369b59817 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -11,6 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Sequence + import paddle from paddle import _C_ops @@ -23,6 +28,9 @@ ) from .initializer import Initializer +if TYPE_CHECKING: + import numpy.typing as npt + __all__ = [] @@ -38,19 +46,21 @@ class NumpyArrayInitializer(Initializer): """ - def __init__(self, value): + def __init__(self, value: npt.NDArray[Any]) -> None: import numpy assert isinstance(value, numpy.ndarray) super().__init__() self._value = value - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: paddle.pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with Numpy array. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: @@ -172,7 +182,7 @@ class Assign(NumpyArrayInitializer): Args: value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter. - name(str, optional): Normally there is no need for user to set this + name(str|None, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default is None. Returns: @@ -239,7 +249,11 @@ class Assign(NumpyArrayInitializer): [6.] """ - def __init__(self, value, name=None): + def __init__( + self, + value: npt.NDArray[Any] | Sequence[int] | paddle.Tensor, + name: str | None = None, + ) -> None: import numpy check_type( diff --git a/python/paddle/nn/initializer/bilinear.py b/python/paddle/nn/initializer/bilinear.py index 05ac3641caff2..9659ea42b2a15 100644 --- a/python/paddle/nn/initializer/bilinear.py +++ b/python/paddle/nn/initializer/bilinear.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import numpy as np +import paddle from paddle import _C_ops, pir from ...base import core, framework, unique_name @@ -74,16 +77,18 @@ class Bilinear(Initializer): """ - def __init__(self): + def __init__(self) -> None: """Constructor for BilinearInitializer.""" super().__init__() - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with Bilinear initialization. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index e02d167770749..7c85d29356945 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -47,7 +47,7 @@ def forward( self, var: paddle.Tensor, block: paddle.pir.Block | None = None, - ): + ) -> paddle.Tensor | None: """Initialize the input tensor with constant. Args: diff --git a/python/paddle/nn/initializer/initializer.py b/python/paddle/nn/initializer/initializer.py index 6eb339cff33a1..e09619a69b440 100644 --- a/python/paddle/nn/initializer/initializer.py +++ b/python/paddle/nn/initializer/initializer.py @@ -16,8 +16,10 @@ import functools import math +from typing import TYPE_CHECKING, Literal import numpy as np +from typing_extensions import TypeAlias import paddle @@ -28,6 +30,22 @@ ) from .lazy_init import lazy_init_helper +if TYPE_CHECKING: + _NonLinearity: TypeAlias = Literal[ # noqa: PYI047 + "sigmoid", + "linear", + "conv1d", + "conv2d", + "conv3d", + "conv1d_transpose", + "conv2d_transpose", + "conv3d_transpose", + "tanh", + "relu", + "leaky_relu", + "selu", + ] + __all__ = [] @@ -40,7 +58,7 @@ class Initializer: directly, but need to use one of its implementations. """ - def __init__(self): + def __init__(self) -> None: pass def __call__( @@ -53,7 +71,7 @@ def __call__( def forward( self, param: paddle.Tensor, block: paddle.pir.Block | None = None - ): + ) -> paddle.Tensor | None: """Add corresponding initialization operations to the network.""" raise NotImplementedError() diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index efb1fc486d059..dbb943d209f1a 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + # TODO: define the initializers of Kaiming functions in neural network import math +from typing import TYPE_CHECKING import paddle from paddle import _C_ops @@ -26,6 +29,9 @@ ) from .initializer import Initializer, calculate_gain +if TYPE_CHECKING: + from .initializer import _NonLinearity + __all__ = [] @@ -64,12 +70,12 @@ class MSRAInitializer(Initializer): def __init__( self, - uniform=True, - fan_in=None, - seed=0, - negative_slope=0, - nonlinearity='relu', - ): + uniform: bool = True, + fan_in: float | None = None, + seed: int = 0, + negative_slope: float = 0, + nonlinearity: _NonLinearity = 'relu', + ) -> None: """Constructor for MSRAInitializer""" assert uniform is not None assert seed is not None @@ -80,12 +86,14 @@ def __init__( self._negative_slope = negative_slope self._nonlinearity = nonlinearity - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: paddle.pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with MSRA initialization. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: @@ -271,7 +279,12 @@ class KaimingNormal(MSRAInitializer): """ - def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'): + def __init__( + self, + fan_in: float | None = None, + negative_slope: float = 0.0, + nonlinearity: str = 'relu', + ) -> None: super().__init__( uniform=False, fan_in=fan_in, @@ -317,7 +330,12 @@ class KaimingUniform(MSRAInitializer): """ - def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'): + def __init__( + self, + fan_in: float | None = None, + negative_slope: float = 0.0, + nonlinearity: str = 'relu', + ) -> None: super().__init__( uniform=True, fan_in=fan_in, diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py index 04139af51cf5b..21450b6505fa3 100644 --- a/python/paddle/nn/initializer/normal.py +++ b/python/paddle/nn/initializer/normal.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import paddle from paddle import _C_ops, pir from ...base import core, framework, unique_name @@ -36,7 +39,9 @@ class NormalInitializer(Initializer): """ - def __init__(self, loc=0.0, scale=1.0, seed=0): + def __init__( + self, loc: float = 0.0, scale: float = 1.0, seed: int = 0 + ) -> None: assert loc is not None assert scale is not None assert seed is not None @@ -45,12 +50,14 @@ def __init__(self, loc=0.0, scale=1.0, seed=0): self._std_dev = scale self._seed = seed - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with Normal distribution. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: @@ -119,7 +126,7 @@ class Normal(NormalInitializer): Args: mean (float, optional): mean of the normal distribution. Default is 0.0. std (float, optional): standard deviation of the normal distribution. Default is 1.0. - name(str, optional): The default value is None. Normally there is no need for user to set this + name(str|None, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None. Returns: @@ -156,7 +163,9 @@ class Normal(NormalInitializer): [[ 1.0754838 -4.071067 ]]]) """ - def __init__(self, mean=0.0, std=1.0, name=None): + def __init__( + self, mean: float = 0.0, std: float = 1.0, name: str | None = None + ) -> None: assert mean is not None, 'mean should not be None' assert std is not None, 'std should not be None' super().__init__(loc=mean, scale=std, seed=0) @@ -178,7 +187,14 @@ class TruncatedNormalInitializer(Initializer): """ - def __init__(self, loc=0.0, scale=1.0, seed=0, a=-2.0, b=2.0): + def __init__( + self, + loc: float = 0.0, + scale: float = 1.0, + seed: int = 0, + a: float = -2.0, + b: float = 2.0, + ) -> None: assert loc is not None assert scale is not None assert seed is not None @@ -191,12 +207,14 @@ def __init__(self, loc=0.0, scale=1.0, seed=0, a=-2.0, b=2.0): self._a = a self._b = b - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with TruncatedNormal distribution. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: @@ -289,7 +307,7 @@ class TruncatedNormal(TruncatedNormalInitializer): std (float, optional): Standard deviation of the normal distribution. Default is :math:`1.0`. a (float, optional): The minimum cutoff value. Default is -2.0. b (float, optional): The maximum cutoff value. Default is 2.0. - name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: A parameter initialized by truncated normal distribution (Gaussian distribution). @@ -325,7 +343,14 @@ class TruncatedNormal(TruncatedNormalInitializer): [[-0.11380529 -3.0696259 ]]]) """ - def __init__(self, mean=0.0, std=1.0, a=-2.0, b=2.0, name=None): + def __init__( + self, + mean: float = 0.0, + std: float = 1.0, + a: float = -2.0, + b: float = 2.0, + name: str | None = None, + ) -> None: assert mean is not None, 'mean should not be None' assert std is not None, 'std should not be None' assert a is not None, 'a should not be None' diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index f30ef1b38402d..0ce639e54df5c 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import paddle from paddle import _C_ops, pir from ...base import core, framework, unique_name @@ -43,8 +46,14 @@ class UniformInitializer(Initializer): """ def __init__( - self, low=-1.0, high=1.0, seed=0, diag_num=0, diag_step=0, diag_val=1.0 - ): + self, + low: float = -1.0, + high: float = 1.0, + seed: int = 0, + diag_num: int = 0, + diag_step: int = 0, + diag_val: float = 1.0, + ) -> None: assert low is not None assert high is not None assert high >= low @@ -62,12 +71,14 @@ def __init__( self._diag_step = diag_step self._diag_val = diag_val - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with Uniform distribution. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: @@ -176,7 +187,7 @@ class Uniform(UniformInitializer): Args: low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`. high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`. - name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: A parameter initialized by uniform distribution. @@ -213,7 +224,9 @@ class Uniform(UniformInitializer): [[-0.41843393, 0.27575102]]]) """ - def __init__(self, low=-1.0, high=1.0, name=None): + def __init__( + self, low: float = -1.0, high: float = 1.0, name: str | None = None + ) -> None: assert low is not None, 'low should not be None' assert high is not None, 'high should not be None' assert high >= low, 'high should greater or equal than low' diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 0a4c414aa274c..0b7675b38bee2 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import math +import paddle from paddle import _C_ops from ...base import core, framework, unique_name @@ -53,12 +56,12 @@ class XavierInitializer(Initializer): Args: uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True. - fan_in (float, optional): fan_in for Xavier initialization. If None, it is + fan_in (float|None, optional): fan_in for Xavier initialization. If None, it is inferred from the variable. Default is None. - fan_out (float, optional): fan_out for Xavier initialization. If None, it is + fan_out (float|None, optional): fan_out for Xavier initialization. If None, it is inferred from the variable. Default is None. - gain (float, optional): Scaling Tensor. Default is 1.0. seed (int, optional): Random seed. Default is 0. + gain (float, optional): Scaling Tensor. Default is 1.0. Note: It is recommended to set fan_in and fan_out to None for most cases. @@ -66,8 +69,13 @@ class XavierInitializer(Initializer): """ def __init__( - self, uniform=True, fan_in=None, fan_out=None, seed=0, gain=1.0 - ): + self, + uniform: bool = True, + fan_in: float | None = None, + fan_out: float | None = None, + seed: int = 0, + gain: float = 1.0, + ) -> None: assert uniform is not None assert seed is not None super().__init__() @@ -77,18 +85,19 @@ def __init__( self._seed = seed self._gain = gain - def forward(self, var, block=None): + def forward( + self, var: paddle.Tensor, block: paddle.pir.Block | None = None + ) -> paddle.Tensor | None: """Initialize the input tensor with Xavier initialization. Args: var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops + block(Block|None, optional): The block in which initialization ops should be added. Used in static graph only, default None. Returns: The initialization op """ - import paddle block = self._check_block(block) assert isinstance(block, (framework.Block, paddle.pir.Block)) @@ -262,12 +271,12 @@ class XavierNormal(XavierInitializer): Args: - fan_in (float, optional): fan_in for Xavier initialization, which is + fan_in (float|None, optional): fan_in for Xavier initialization, which is inferred from the Tensor. Default is None. - fan_out (float, optional): fan_out for Xavier initialization, which is + fan_out (float|None, optional): fan_out for Xavier initialization, which is inferred from the Tensor. Default is None. gain (float, optional): Scaling Tensor. Default is 1.0. - name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: A parameter initialized by Xavier weight, using a normal distribution. @@ -304,7 +313,13 @@ class XavierNormal(XavierInitializer): [[1.13615966, 0.89018601]]]) """ - def __init__(self, fan_in=None, fan_out=None, gain=1.0, name=None): + def __init__( + self, + fan_in: float | None = None, + fan_out: float | None = None, + gain: float = 1.0, + name: str | None = None, + ) -> None: super().__init__( uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0, gain=gain ) @@ -326,12 +341,12 @@ class XavierUniform(XavierInitializer): x = gain \times \sqrt{\frac{6.0}{fan\_in + fan\_out}}. Args: - fan_in (float, optional): fan_in for Xavier initialization, which is + fan_in (float|None, optional): fan_in for Xavier initialization, which is inferred from the Tensor. Default is None. - fan_out (float, optional): fan_out for Xavier initialization, which is + fan_out (float|None, optional): fan_out for Xavier initialization, which is inferred from the Tensor. Default is None. gain (float, optional): Scaling Tensor. Default is 1.0. - name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: A parameter initialized by Xavier weight, using a uniform distribution. @@ -367,7 +382,13 @@ class XavierUniform(XavierInitializer): [[-1.02494967, 0.67544925]]]) """ - def __init__(self, fan_in=None, fan_out=None, gain=1.0, name=None): + def __init__( + self, + fan_in: float | None = None, + fan_out: float | None = None, + gain: float = 1.0, + name: str | None = None, + ) -> None: super().__init__( uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0, gain=gain )