Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Typing][A-39,A-47,A-51] Add type annotations for paddle/optimizer/optimizer.py #65076

Merged
merged 25 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/paddle/_typing/shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

ShapeLike: TypeAlias = Union[_DynamicShapeLike, _StaticShapeLike]


# for size parameters, eg, kernel_size, stride ...
Size1: TypeAlias = Union[int, Tuple[int], List[int]]
Size2: TypeAlias = Union[int, Tuple[int, int], List[int]]
Expand Down
21 changes: 19 additions & 2 deletions python/paddle/base/dygraph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import inspect
import sys
import warnings
from typing import Callable, TypeVar
from typing import Callable, ContextManager, TypeVar, overload

import decorator
from typing_extensions import ParamSpec
Expand Down Expand Up @@ -271,6 +274,16 @@ def _switch_tracer_mode_guard_(is_train=True):
yield


@overload
def no_grad(func: None = ...) -> ContextManager:
...


@overload
def no_grad(func: Callable[_InputT, _RetT]) -> Callable[_InputT, _RetT]:
...


def no_grad(func=None):
"""
:api_attr: imperative
Expand Down Expand Up @@ -327,7 +340,11 @@ def no_grad(func=None):
else:

@decorator.decorator
def __impl__(func, *args, **kwargs):
def __impl__(
func: Callable[_InputT, _RetT],
*args: _InputT.args,
**kwargs: _InputT.kwargs,
) -> _RetT:
with _switch_tracer_mode_guard_(is_train=False):
return func(*args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion python/paddle/nn/layer/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ def parameters(self, include_sublayers: bool = True) -> list[Tensor]:
Default: True.

Returns:
list of Tensor, a list of Parameters.
list, list of Tensor, a list of Parameters.

Examples:
.. code-block:: python
Expand Down
73 changes: 47 additions & 26 deletions python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import warnings
from collections import defaultdict
from typing import TYPE_CHECKING, Sequence

from typing_extensions import NotRequired

import paddle
from paddle import _C_ops, pir
Expand All @@ -28,7 +33,21 @@
in_dynamic_or_pir_mode,
in_pir_mode,
)
from .optimizer import Optimizer
from .optimizer import Optimizer, _ParameterConfig


class _AdamParameterConfig(_ParameterConfig):
beta1: NotRequired[float | Tensor]
beta2: NotRequired[float | Tensor]


if TYPE_CHECKING:
from paddle import Tensor
from paddle.nn.clip import GradientClipBase
from paddle.regularizer import WeightDecayRegularizer

from .lr import LRScheduler


__all__ = []

Expand Down Expand Up @@ -69,20 +88,20 @@ class Adam(Optimizer):
epsilon (float|Tensor, optional): A small float value for numerical stability.
It should be a float number or a 0-D Tensor with shape [] and data type as float32.
The default value is 1e-08.
parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
This parameter is required in dygraph mode. And you can specify different options for
different parameter groups such as the learning rate, weight decay, etc,
then the parameters are list of dict. Note that the learning_rate in parameter groups
represents the scale of base learning_rate.
The default value is None in static graph mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
weight_decay (float|WeightDecayRegularizer|None, optional): The strategy of regularization.
It canbe a float value as coeff of L2 regularization or
:ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already,
the regularization setting here in optimizer will be ignored for this parameter.
Otherwise, the regularization setting here in optimizer will take effect.
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of
grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three clipping strategies
( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
:ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
Expand All @@ -95,7 +114,7 @@ class Adam(Optimizer):
The default value is False.
multi_precision (bool, optional): Whether to use multi-precision during weight updating. Default is false.
use_multi_tensor (bool, optional): Whether to use multi-tensor strategy to update all parameters at once . Default is false.
name (str, optional): Normally there is no need for user to set this property.
name (str|None, optional): Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`.
The default value is None.

Expand Down Expand Up @@ -145,14 +164,14 @@ class Adam(Optimizer):
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adam(
... learning_rate=0.1,
... parameters=[{
... 'params': linear_1.parameters()
... }, {
... 'params': linear_2.parameters(),
... 'weight_decay': 0.001,
... 'learning_rate': 0.1,
... 'beta1': 0.8
... }],
... parameters=[{ # type: ignore
... 'params': linear_1.parameters() # type: ignore
... }, { # type: ignore
... 'params': linear_2.parameters(), # type: ignore
... 'weight_decay': 0.001, # type: ignore
... 'learning_rate': 0.1, # type: ignore
... 'beta1': 0.8 # type: ignore
... }], # type: ignore
... weight_decay=0.01,
... beta1=0.9)
>>> loss.backward()
Expand All @@ -167,18 +186,20 @@ class Adam(Optimizer):

def __init__(
self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8,
parameters=None,
weight_decay=None,
grad_clip=None,
lazy_mode=False,
multi_precision=False,
use_multi_tensor=False,
name=None,
):
learning_rate: float | LRScheduler = 0.001,
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamParameterConfig]
| None = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
lazy_mode: bool = False,
multi_precision: bool = False,
use_multi_tensor: bool = False,
name: str | None = None,
) -> None:
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
Expand Down Expand Up @@ -408,7 +429,7 @@ def _append_optimize_op(self, block, param_and_grad):

@imperative_base.no_grad
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个装饰器没加类型提示吧,应该会丢失掉类型信息,可以参考 #64954 加一下

这个实现和其他的不太一样,需要自己看一下

@framework.non_static_only
def step(self):
def step(self) -> None:
"""
Execute the optimizer and update parameters once.

Expand Down
Loading