Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Typing][A-44] Add type annotations for paddle/optimizer/lbfgs.py #65308

Merged
merged 10 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 58 additions & 29 deletions python/paddle/optimizer/lbfgs.py
Copy link
Contributor

@megemini megemini Jun 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

还有几个 api 别漏了哈 ~ state_dict step minimize ~~~

咱们这次主要是关注 公开 api ,重点关注不以 _ 开头的那些函数或者方法 ~

另外,ISSUE 里面统计的每个文件中 api 数量可能不准,比如,有的是一个类,只统计了 1 次,但是类里面的方法其实都需要修改 ~ 可以参考官网有哪些接口,宁缺勿漏 宁多勿漏 吧 ~ 🤟🤟🤟

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

收到

Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from collections import defaultdict
from functools import reduce
from typing import TYPE_CHECKING, NoReturn, Sequence, TypedDict

from typing_extensions import NotRequired

import paddle

from ..base import framework
from .optimizer import Optimizer

if TYPE_CHECKING:
from paddle import Tensor
from paddle.nn.clip import GradientClipBase
from paddle.regularizer import WeightDecayRegularizer

from .optimizer import _ParameterConfig

__all__ = []


class _LbfgsState(TypedDict):
func_evals: int
n_iter: int
d: Tensor
alpha: Tensor
old_yk: list[Tensor]
old_sk: list[Tensor]
ro: list[Tensor]
H_diag: Tensor
prev_flat_grad: Tensor
prev_loss: float
al: NotRequired[list[Tensor]]


class _LbfgsStateDict(TypedDict):
state: _LbfgsState


def dot(x, y):
r"""
NOTE: This is a temporary workaround for unstable result computed by `paddle.dot`,
Expand Down Expand Up @@ -333,28 +363,28 @@ class LBFGS(Optimizer):
learning_rate (float, optional): learning rate .The default value is 1.
max_iter (int, optional): maximal number of iterations per optimization step.
The default value is 20.
max_eval (int, optional): maximal number of function evaluations per optimization
max_eval (int|None, optional): maximal number of function evaluations per optimization
step. The default value is max_iter * 1.25.
tolerance_grad (float, optional): termination tolerance on first order optimality
The default value is 1e-5.
tolerance_change (float, optional): termination tolerance on function
value/parameter changes. The default value is 1e-9.
history_size (int, optional): update history size. The default value is 100.
line_search_fn (string, optional): either 'strong_wolfe' or None. The default value is strong_wolfe.
parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
line_search_fn (string|None, optional): either 'strong_wolfe' or None. The default value is strong_wolfe.
parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
This parameter is required in dygraph mode. The default value is None.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
weight_decay (float|WeightDecayRegularizer|None, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of \
grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of \
some derived class of ``GradientClipBase`` . There are three clipping strategies \
( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` , \
:ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): Normally there is no need for user to set this property.
name (str|None, optional): Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`.
The default value is None.

Expand All @@ -369,10 +399,10 @@ class LBFGS(Optimizer):

>>> paddle.disable_static()
>>> np.random.seed(0)
>>> np_w = np.random.rand(1).astype(np.float32)
>>> np_x = np.random.rand(1).astype(np.float32)
>>> np_w = np.random.rand(1).astype(np.float32) # type: ignore
>>> np_x = np.random.rand(1).astype(np.float32) # type: ignore

>>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)]
>>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)] # type: ignore
>>> # y = 2x
>>> targets = [2 * x for x in inputs]

Expand All @@ -397,26 +427,26 @@ class LBFGS(Optimizer):
... return loss
... opt.step(closure)
...
>>> for input, target in zip(inputs, targets):
... input = paddle.to_tensor(input)
... target = paddle.to_tensor(target)
>>> for input_np, target_np in zip(inputs, targets):
... input = paddle.to_tensor(input_np)
... target = paddle.to_tensor(target_np)
... train_step(input, target)
"""

def __init__(
self,
learning_rate=1.0,
max_iter=20,
max_eval=None,
tolerance_grad=1e-7,
tolerance_change=1e-9,
history_size=100,
line_search_fn=None,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None,
):
learning_rate: float = 1.0,
max_iter: int = 20,
max_eval: int | None = None,
tolerance_grad: float = 1e-7,
tolerance_change: float = 1e-9,
history_size: int = 100,
line_search_fn: str | None = None,
parameters: Sequence[Tensor] | Sequence[_ParameterConfig] | None = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
name: str | None = None,
) -> None:
if max_eval is None:
max_eval = max_iter * 5 // 4

Expand Down Expand Up @@ -452,7 +482,7 @@ def __init__(

self._numel_cache = None

def state_dict(self):
def state_dict(self) -> _LbfgsStateDict:
r"""Returns the state of the optimizer as a :class:`dict`.

Return:
Expand Down Expand Up @@ -496,7 +526,6 @@ def state_dict(self):
... loss = train_step(inputs, targets)
... n_iter = opt.state_dict()["state"]["func_evals"]
... print("n_iter:", n_iter)
...
"""

packed_state = {}
Expand All @@ -505,7 +534,7 @@ def state_dict(self):

return {'state': packed_state}

def _numel(self):
def _numel(self) -> int:
# compute the number of all parameters
if self._numel_cache is None:
self._numel_cache = reduce(
Expand Down Expand Up @@ -553,7 +582,7 @@ def _directional_evaluate(self, closure, x, alpha, d):
return loss, flat_grad

@framework.non_static_only
def step(self, closure):
def step(self, closure) -> Tensor:
"""Performs a single optimization step.

Args:
Expand Down Expand Up @@ -778,7 +807,7 @@ def obj_func(x, alpha, d):

def minimize(
self, loss, startup_program=None, parameters=None, no_grad_set=None
):
) -> NoReturn:
"""Empty method. LBFGS optimizer does not use this way to minimize ``loss``. Please refer 'Examples' of LBFGS() above for usage."""
raise NotImplementedError(
"LBFGS optimizer does not use this way to minimize loss. Please refer 'Examples' of LBFGS() for usage."
Expand Down
5 changes: 4 additions & 1 deletion python/paddle/tensor/tensor.prototype.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ from paddle._typing import * # noqa: F403

# isort: on

from typing import Any, Literal, overload
from typing import Any, Iterator, Literal, overload

import numpy.typing as npt

Expand Down Expand Up @@ -263,5 +263,8 @@ class Tensor:
@property
def type(self) -> Any: ...

# virtual methods
def __iter__(self) -> Iterator[Tensor]: ... # For iterating over the tensor

# annotation: ${tensor_alias}
__qualname__: Literal["Tensor"]