Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace NaturalExpDecay, ExponentialDecay, InverseTimeDecay with 2.0 version #54424

Merged
merged 7 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions python/paddle/distributed/passes/ps_server_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from paddle.fluid.layers.learning_rate_scheduler import (
exponential_decay,
inverse_time_decay,
natural_exp_decay,
noam_decay,
)
from paddle.optimizer.lr import (
Expand Down Expand Up @@ -112,9 +111,9 @@ def _get_lr_scheduler_program(self, lr_scheduler, lr_decay_steps):
with paddle.static.program_guard(
decay_main_program, decay_startup_program
):
lr = natural_exp_decay(
1.0, lr_decay_steps, lr_scheduler.gamma, True
)
lr = paddle.optimizer.lr.NaturalExpDecay(
1.0, lr_scheduler.gamma
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个地方的return从一个 Tensor变成Class了,需要检查下能否这么改

).get_lr()
lr_name = lr.name
logging.warn(
"NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n"
Expand Down
252 changes: 0 additions & 252 deletions python/paddle/fluid/dygraph/learning_rate_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
__all__ = [
'NoamDecay',
'PiecewiseDecay',
'NaturalExpDecay',
'ExponentialDecay',
'InverseTimeDecay',
'PolynomialDecay',
'CosineDecay',
'LinearLrWarmup',
Expand Down Expand Up @@ -197,255 +194,6 @@ def step(self):
return self.create_lr_var(self.vars[len(self.values) - 1])


class NaturalExpDecay(LearningRateDecay):
r"""
:api_attr: imperative

Applies natural exponential decay to the initial learning rate.

The algorithm can be described as following.

.. math::

decayed\_learning\_rate = learning\_rate * e^{y}

If staircase is set to False, then:

.. math::

y = - decay\_rate * \\frac{global\_step}{decay\_steps}

If staircase is set to True, then:

.. math::

y = - decay\_rate * math.floor(\\frac{global\_step}{decay\_steps})

Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(int): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.

Returns:
None.

Examples:
.. code-block:: python

import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=emb.parameters())

"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)
decayed_lr = self.learning_rate * paddle.exp(
-1 * self.decay_rate * div_res
)

return decayed_lr


class ExponentialDecay(LearningRateDecay):
r"""
:api_attr: imperative

Applies exponential decay to the learning rate.

The algorithm can be described as following.

.. math::

decayed\_learning\_rate = learning\_rate * decay\_rate ^ y

If staircase is set to False, then:

.. math::

y = \\frac{global\_step}{decay\_steps}

If staircase is set to True, then:

.. math::

y = math.floor(\\frac{global\_step}{decay\_steps})


Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.

Returns:
None.

Examples:
.. code-block:: python

import paddle.fluid as fluid
base_lr = 0.1
with fluid.dygraph.guard():
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.ExponentialDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))

"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)

decayed_lr = self.learning_rate * (self.decay_rate**div_res)

return decayed_lr


class InverseTimeDecay(LearningRateDecay):
r"""
:api_attr: imperative

Applies inverse time decay to the initial learning rate.

The algorithm can be described as following.
If staircase is set to False, then:

.. math::

decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * \\frac{global\_step}{decay\_step}}

If staircase is set to True, then:

.. math::

decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * math.floor(\\frac{global\_step}{decay\_step})}

Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be
'float32', 'float64'. The default value is 'float32'.

Returns:
None.

Examples:
.. code-block:: python

import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list = emb.parameters())

"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)

decayed_lr = self.learning_rate / (1 + self.decay_rate * div_res)

return decayed_lr


class PolynomialDecay(LearningRateDecay):
r"""
:api_attr: imperative
Expand Down
12 changes: 6 additions & 6 deletions python/paddle/fluid/layers/learning_rate_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.ExponentialDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.ExponentialDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down Expand Up @@ -228,8 +228,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.NaturalExpDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.NaturalExpDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down Expand Up @@ -288,8 +288,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.InverseTimeDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.InverseTimeDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down
Loading