Skip to content

Commit

Permalink
replace NaturalExpDecay, ExponentialDecay, InverseTimeDecay with 2.0 …
Browse files Browse the repository at this point in the history
…version (#54424)

* remove the NaturalExpDecay in fluid

* fix bug

* remove the ExponentialDecay in fluid

* remove the InverseTimeDecay in fluid

* remove the InverseTimeDecay class

* fix bug
  • Loading branch information
longranger2 authored Jun 27, 2023
1 parent 5bbbf5d commit de60c1d
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 314 deletions.
7 changes: 3 additions & 4 deletions python/paddle/distributed/passes/ps_server_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from paddle.fluid.layers.learning_rate_scheduler import (
exponential_decay,
inverse_time_decay,
natural_exp_decay,
noam_decay,
)
from paddle.optimizer.lr import (
Expand Down Expand Up @@ -112,9 +111,9 @@ def _get_lr_scheduler_program(self, lr_scheduler, lr_decay_steps):
with paddle.static.program_guard(
decay_main_program, decay_startup_program
):
lr = natural_exp_decay(
1.0, lr_decay_steps, lr_scheduler.gamma, True
)
lr = paddle.optimizer.lr.NaturalExpDecay(
1.0, lr_scheduler.gamma
).get_lr()
lr_name = lr.name
logging.warn(
"NaturalExpDecay is set, staircase = True, global learning rate decay step is [ %d ], Change decay steps as follow: \n"
Expand Down
252 changes: 0 additions & 252 deletions python/paddle/fluid/dygraph/learning_rate_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
__all__ = [
'NoamDecay',
'PiecewiseDecay',
'NaturalExpDecay',
'ExponentialDecay',
'InverseTimeDecay',
'PolynomialDecay',
'CosineDecay',
'LinearLrWarmup',
Expand Down Expand Up @@ -197,255 +194,6 @@ def step(self):
return self.create_lr_var(self.vars[len(self.values) - 1])


class NaturalExpDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies natural exponential decay to the initial learning rate.
The algorithm can be described as following.
.. math::
decayed\_learning\_rate = learning\_rate * e^{y}
If staircase is set to False, then:
.. math::
y = - decay\_rate * \\frac{global\_step}{decay\_steps}
If staircase is set to True, then:
.. math::
y = - decay\_rate * math.floor(\\frac{global\_step}{decay\_steps})
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(int): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=emb.parameters())
"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)
decayed_lr = self.learning_rate * paddle.exp(
-1 * self.decay_rate * div_res
)

return decayed_lr


class ExponentialDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies exponential decay to the learning rate.
The algorithm can be described as following.
.. math::
decayed\_learning\_rate = learning\_rate * decay\_rate ^ y
If staircase is set to False, then:
.. math::
y = \\frac{global\_step}{decay\_steps}
If staircase is set to True, then:
.. math::
y = math.floor(\\frac{global\_step}{decay\_steps})
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
base_lr = 0.1
with fluid.dygraph.guard():
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.ExponentialDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True))
"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)

decayed_lr = self.learning_rate * (self.decay_rate**div_res)

return decayed_lr


class InverseTimeDecay(LearningRateDecay):
r"""
:api_attr: imperative
Applies inverse time decay to the initial learning rate.
The algorithm can be described as following.
If staircase is set to False, then:
.. math::
decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * \\frac{global\_step}{decay\_step}}
If staircase is set to True, then:
.. math::
decayed\_learning\_rate = \\frac{learning\_rate}{1 + decay\_rate * math.floor(\\frac{global\_step}{decay\_step})}
Parameters:
learning_rate(Variable|float): The initial learning rate. If the type
is Variable, it's a tensor with shape [1], the data type can be
float32 or float64. It also can be set to python int number.
decay_steps(int): The decay step size. It determines the decay cycle.
decay_rate(float): The decay rate.
staircase(bool, optional): If set to True, decay the learning rate at discrete intervals. The
default value is False.
begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
step(int, optional): The step size used to calculate the new global_step in the description above.
The default value is 1.
dtype(str, optional): The data type used to create the learning rate variable. The data type can be
'float32', 'float64'. The default value is 'float32'.
Returns:
None.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
base_lr = 0.1
with fluid.dygraph.guard():
emb = paddle.nn.Embedding(10, 10)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list = emb.parameters())
"""

def __init__(
self,
learning_rate,
decay_steps,
decay_rate,
staircase=False,
begin=0,
step=1,
dtype='float32',
):
super().__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.staircase = staircase

def step(self):
div_res = self.create_lr_var(self.step_num / self.decay_steps)
if self.staircase:
div_res = paddle.floor(div_res)

decayed_lr = self.learning_rate / (1 + self.decay_rate * div_res)

return decayed_lr


class PolynomialDecay(LearningRateDecay):
r"""
:api_attr: imperative
Expand Down
12 changes: 6 additions & 6 deletions python/paddle/fluid/layers/learning_rate_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.ExponentialDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.ExponentialDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down Expand Up @@ -228,8 +228,8 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.NaturalExpDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.NaturalExpDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down Expand Up @@ -288,8 +288,8 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with default_main_program()._lr_schedule_guard():
if in_dygraph_mode():
decay = imperate_lr.InverseTimeDecay(
learning_rate, decay_steps, decay_rate, staircase
decay = paddle.optimizer.lr.InverseTimeDecay(
learning_rate, decay_rate
)
return decay
else:
Expand Down
Loading

0 comments on commit de60c1d

Please sign in to comment.