From 7f43f828df8eb4d7a2a7b0731799d38e6d8bf0fa Mon Sep 17 00:00:00 2001 From: PommesPeter <434596665@qq.com> Date: Wed, 9 Aug 2023 20:13:37 +0800 Subject: [PATCH 1/5] fix: updated code examples. --- python/paddle/optimizer/adadelta.py | 65 +++++++-------- python/paddle/optimizer/adagrad.py | 63 +++++++------- python/paddle/optimizer/adam.py | 125 ++++++++++++++-------------- 3 files changed, 124 insertions(+), 129 deletions(-) diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 0edebb176255d..d914e0cc7ff78 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -70,39 +70,38 @@ class Adadelta(Optimizer): Examples: .. code-block:: python - import paddle - - inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) - back = out.backward() - adadelta.step() - adadelta.clear_grad() - - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - adadelta = paddle.optimizer.Adadelta( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - }], - weight_decay=0.01) - out.backward() - adadelta.step() - adadelta.clear_grad() + >>> import paddle + >>> paddle.seed(2023) + >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) + >>> linear = paddle.nn.Linear(10, 10) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> beta1 = paddle.to_tensor([0.9], dtype="float32") + >>> beta2 = paddle.to_tensor([0.99], dtype="float32") + >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01) + >>> back = out.backward() + >>> adadelta.step() + >>> adadelta.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> adadelta = paddle.optimizer.Adadelta( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1, + ... }], + ... weight_decay=0.01) + >>> out.backward() + >>> adadelta.step() + >>> adadelta.clear_grad() """ diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index c19b3116de3fc..4d2f3f787cbdd 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -70,38 +70,37 @@ class Adagrad(Optimizer): Examples: .. code-block:: python - import paddle - - inp = paddle.rand(shape=[10, 10]) - linear = paddle.nn.Linear(10, 10) - out = linear(inp) - loss = paddle.mean(out) - adagrad = paddle.optimizer.Adagrad(learning_rate=0.1, - parameters=linear.parameters()) - out.backward() - adagrad.step() - adagrad.clear_grad() - - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - adagrad = paddle.optimizer.Adagrad( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - }], - weight_decay=0.01) - out.backward() - adagrad.step() - adagrad.clear_grad() + >>> import paddle + >>> paddle.seed(2023) + >>> inp = paddle.rand(shape=[10, 10]) + >>> linear = paddle.nn.Linear(10, 10) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> adagrad = paddle.optimizer.Adagrad(learning_rate=0.1, + ... parameters=linear.parameters()) + >>> out.backward() + >>> adagrad.step() + >>> adagrad.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> adagrad = paddle.optimizer.Adagrad( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1, + ... }], + ... weight_decay=0.01) + >>> out.backward() + >>> adagrad.step() + >>> adagrad.clear_grad() """ _moment_acc_str = "moment" diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index c1a4019b76e3b..0ecd7a760963a 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -98,63 +98,60 @@ class Adam(Optimizer): .. code-block:: python :name: code-example1 - import paddle - - linear = paddle.nn.Linear(10, 10) - inp = paddle.rand([10,10], dtype="float32") - out = linear(inp) - loss = paddle.mean(out) - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters()) - loss.backward() - adam.step() - adam.clear_grad() + >>> import paddle + >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) + >>> inp = paddle.rand([10,10], dtype="float32") + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> adam = paddle.optimizer.Adam(learning_rate=0.1, + ... parameters=linear.parameters()) + >>> loss.backward() + >>> adam.step() + >>> adam.clear_grad() .. code-block:: python :name: code-example2 - # Adam with beta1/beta2 as Tensor and weight_decay as float - import paddle - - linear = paddle.nn.Linear(10, 10) - inp = paddle.rand([10,10], dtype="float32") - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adam(learning_rate=0.1, - parameters=linear.parameters(), - beta1=beta1, - beta2=beta2, - weight_decay=0.01) - loss.backward() - adam.step() - adam.clear_grad() - - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - adam = paddle.optimizer.Adam( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - 'beta1': 0.8 - }], - weight_decay=0.01, - beta1=0.9) - loss.backward() - adam.step() - adam.clear_grad() + >>> # Adam with beta1/beta2 as Tensor and weight_decay as float + >>> import paddle + >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) + >>> inp = paddle.rand([10,10], dtype="float32") + >>> out = linear(inp) + >>> loss = paddle.mean(out) + >>> beta1 = paddle.to_tensor([0.9], dtype="float32") + >>> beta2 = paddle.to_tensor([0.99], dtype="float32") + >>> adam = paddle.optimizer.Adam(learning_rate=0.1, + ... parameters=linear.parameters(), + ... beta1=beta1, + ... beta2=beta2, + ... weight_decay=0.01) + >>> loss.backward() + >>> adam.step() + >>> adam.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> adam = paddle.optimizer.Adam( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1, + ... 'beta1': 0.8 + ... }], + ... weight_decay=0.01, + ... beta1=0.9) + >>> loss.backward() + >>> adam.step() + >>> adam.clear_grad() """ _moment1_acc_str = "moment1" @@ -409,17 +406,17 @@ def step(self): Examples: .. code-block:: python - import paddle - - a = paddle.rand([2,13], dtype="float32") - linear = paddle.nn.Linear(13, 5) - # This can be any optimizer supported by dygraph. - adam = paddle.optimizer.Adam(learning_rate = 0.01, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adam.step() - adam.clear_grad() + >>> import paddle + + >>> a = paddle.rand([2,13], dtype="float32") + >>> linear = paddle.nn.Linear(13, 5) + >>> # This can be any optimizer supported by dygraph. + >>> adam = paddle.optimizer.Adam(learning_rate = 0.01, + ... parameters = linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() """ if paddle.fluid.dygraph.base.in_declarative_mode(): self._declarative_step() From 5547c945dd564902dd9600233681ceedc0a23acb Mon Sep 17 00:00:00 2001 From: PommesPeter <434596665@qq.com> Date: Thu, 10 Aug 2023 20:52:07 +0800 Subject: [PATCH 2/5] fix: updated blank lines. --- python/paddle/optimizer/adadelta.py | 2 + python/paddle/optimizer/adagrad.py | 2 + python/paddle/optimizer/adam.py | 2 + python/paddle/optimizer/lr.py | 1447 ++++++++++++++------------- 4 files changed, 732 insertions(+), 721 deletions(-) diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index d914e0cc7ff78..de2d3e3e1d9eb 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -72,6 +72,7 @@ class Adadelta(Optimizer): >>> import paddle >>> paddle.seed(2023) + >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) >>> linear = paddle.nn.Linear(10, 10) >>> out = linear(inp) @@ -82,6 +83,7 @@ class Adadelta(Optimizer): >>> back = out.backward() >>> adadelta.step() >>> adadelta.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. >>> linear_1 = paddle.nn.Linear(10, 10) >>> linear_2 = paddle.nn.Linear(10, 10) diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index 4d2f3f787cbdd..89e4567d5f6c0 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -72,6 +72,7 @@ class Adagrad(Optimizer): >>> import paddle >>> paddle.seed(2023) + >>> inp = paddle.rand(shape=[10, 10]) >>> linear = paddle.nn.Linear(10, 10) >>> out = linear(inp) @@ -81,6 +82,7 @@ class Adagrad(Optimizer): >>> out.backward() >>> adagrad.step() >>> adagrad.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. >>> linear_1 = paddle.nn.Linear(10, 10) >>> linear_2 = paddle.nn.Linear(10, 10) diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 0ecd7a760963a..9a71e917e737d 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -100,6 +100,7 @@ class Adam(Optimizer): >>> import paddle >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) >>> inp = paddle.rand([10,10], dtype="float32") >>> out = linear(inp) @@ -116,6 +117,7 @@ class Adam(Optimizer): >>> # Adam with beta1/beta2 as Tensor and weight_decay as float >>> import paddle >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) >>> inp = paddle.rand([10,10], dtype="float32") >>> out = linear(inp) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index e628509e52afc..360167ee8cb46 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -64,31 +64,31 @@ class LRScheduler: .. code-block:: python - import paddle - from paddle.optimizer.lr import LRScheduler - - class StepDecay(LRScheduler): - def __init__(self, - learning_rate, - step_size, - gamma=0.1, - last_epoch=-1, - verbose=False): - if not isinstance(step_size, int): - raise TypeError( - "The type of 'step_size' must be 'int', but received %s." % - type(step_size)) - if gamma >= 1.0: - raise ValueError('gamma should be < 1.0.') - - self.step_size = step_size - self.gamma = gamma - super().__init__(learning_rate, last_epoch, verbose) - - def get_lr(self): - i = self.last_epoch // self.step_size - return self.base_lr * (self.gamma**i) - + >>> import paddle + >>> from paddle.optimizer.lr import LRScheduler + + >>> class StepDecay(LRScheduler): + ... def __init__(self, + ... learning_rate, + ... step_size, + ... gamma=0.1, + ... last_epoch=-1, + ... verbose=False): + ... if not isinstance(step_size, int): + ... raise TypeError( + ... "The type of 'step_size' must be 'int', but received %s." % + ... type(step_size)) + ... if gamma >= 1.0: + ... raise ValueError('gamma should be < 1.0.') + ... + ... self.step_size = step_size + ... self.gamma = gamma + ... super().__init__(learning_rate, last_epoch, verbose) + ... + ... def get_lr(self): + ... i = self.last_epoch // self.step_size + ... return self.base_lr * (self.gamma**i) + ... """ def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False): @@ -128,29 +128,29 @@ def step(self, epoch=None): Examples: .. code-block:: python - import paddle - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adadelta.step() - adadelta.clear_grad() + >>> import paddle + >>> value = paddle.arange(26, dtype='float32') + >>> a = paddle.reshape(value, [2, 13]) + >>> linear = paddle.nn.Linear(13, 5) + >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, + ... parameters = linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adadelta.step() + >>> adadelta.clear_grad() Examples: .. code-block:: python - import paddle - value = paddle.arange(26, dtype='float32') - a = paddle.reshape(value, [2, 13]) - linear = paddle.nn.Linear(13, 5) - adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, - parameters = linear.parameters()) - out = linear(a) - out.backward() - adadelta.step() - adadelta.clear_grad() + >>> import paddle + >>> value = paddle.arange(26, dtype='float32') + >>> a = paddle.reshape(value, [2, 13]) + >>> linear = paddle.nn.Linear(13, 5) + >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, + ... parameters = linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adadelta.step() + >>> adadelta.clear_grad() """ if epoch is None: self.last_epoch += 1 @@ -267,57 +267,59 @@ class NoamDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> np.random.seed(2023) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... """ def __init__( @@ -377,56 +379,58 @@ class PiecewiseDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> paddle.seed(2023) + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__(self, boundaries, values, last_epoch=-1, verbose=False): @@ -474,54 +478,55 @@ class NaturalExpDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> np.random.seed(2023) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -561,57 +566,57 @@ class InverseTimeDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -664,56 +669,56 @@ class PolynomialDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -795,58 +800,58 @@ class LinearWarmup(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.LinearWarmup( - learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.LinearWarmup( + ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.LinearWarmup( - learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.LinearWarmup( + ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -935,56 +940,56 @@ class ExponentialDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -1033,56 +1038,56 @@ class MultiStepDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1147,56 +1152,56 @@ class StepDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1251,57 +1256,57 @@ class LambdaDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1359,57 +1364,57 @@ class ReduceOnPlateau(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step(loss) # If you update learning rate each step - # scheduler.step(loss) # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step(loss) # If you update learning rate each step + ... # scheduler.step(loss) # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step(out[0]) # If you update learning rate each step - # scheduler.step(out[0]) # If you update learning rate each epoch - + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step(out[0]) # If you update learning rate each step + ... # scheduler.step(out[0]) # If you update learning rate each epoch + ... """ def __init__( @@ -1590,56 +1595,56 @@ class CosineAnnealingDecay(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(20): - for batch_id in range(5): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1714,23 +1719,23 @@ class MultiplicativeDecay(LRScheduler): .. code-block:: python - import paddle - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(20): - for batch_id in range(5): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # If you update learning rate each step - # scheduler.step() # If you update learning rate each epoch - + >>> import paddle + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(20): + ... for batch_id in range(5): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # If you update learning rate each step + ... # scheduler.step() # If you update learning rate each epoch + ... """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1795,55 +1800,55 @@ class OneCycleLR(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(5): - for batch_id in range(20): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # You should update learning rate each step - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(5): + ... for batch_id in range(20): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # You should update learning rate each step + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(5): - for batch_id in range(20): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # You should update learning rate each step - + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(5): + ... for batch_id in range(20): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # You should update learning rate each step + ... """ def __init__( @@ -2045,55 +2050,55 @@ class CyclicLR(LRScheduler): .. code-block:: python :name: code-example1 - # Example1: train on default dynamic graph mode - import paddle - import numpy as np - - # train on default dynamic graph mode - linear = paddle.nn.Linear(10, 10) - scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - for epoch in range(5): - for batch_id in range(20): - x = paddle.uniform([10, 10]) - out = linear(x) - loss = paddle.mean(out) - loss.backward() - sgd.step() - sgd.clear_gradients() - scheduler.step() # You should update learning rate each step - + >>> # Example1: train on default dynamic graph mode + >>> import paddle + >>> import numpy as np + + >>> # train on default dynamic graph mode + >>> linear = paddle.nn.Linear(10, 10) + >>> scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + >>> for epoch in range(5): + ... for batch_id in range(20): + ... x = paddle.uniform([10, 10]) + ... out = linear(x) + ... loss = paddle.mean(out) + ... loss.backward() + ... sgd.step() + ... sgd.clear_gradients() + ... scheduler.step() # You should update learning rate each step + ... .. code-block:: python :name: code-example2 - # Example2: train on static graph mode - import paddle - import numpy as np - paddle.enable_static() - main_prog = paddle.static.Program() - start_prog = paddle.static.Program() - with paddle.static.program_guard(main_prog, start_prog): - x = paddle.static.data(name='x', shape=[None, 4, 5]) - y = paddle.static.data(name='y', shape=[None, 4, 5]) - z = paddle.static.nn.fc(x, 100) - loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, - max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler) - sgd.minimize(loss) - - exe = paddle.static.Executor() - exe.run(start_prog) - for epoch in range(5): - for batch_id in range(20): - out = exe.run( - main_prog, - feed={ - 'x': np.random.randn(3, 4, 5).astype('float32'), - 'y': np.random.randn(3, 4, 5).astype('float32') - }, - fetch_list=loss.name) - scheduler.step() # You should update learning rate each step + >>> # Example2: train on static graph mode + >>> import paddle + >>> import numpy as np + >>> paddle.enable_static() + >>> main_prog = paddle.static.Program() + >>> start_prog = paddle.static.Program() + >>> with paddle.static.program_guard(main_prog, start_prog): + ... x = paddle.static.data(name='x', shape=[None, 4, 5]) + ... y = paddle.static.data(name='y', shape=[None, 4, 5]) + ... z = paddle.static.nn.fc(x, 100) + ... loss = paddle.mean(z) + ... scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, + ... max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) + ... sgd.minimize(loss) + ... + >>> exe = paddle.static.Executor() + >>> exe.run(start_prog) + >>> for epoch in range(5): + ... for batch_id in range(20): + ... out = exe.run( + ... main_prog, + ... feed={ + ... 'x': np.random.randn(3, 4, 5).astype('float32'), + ... 'y': np.random.randn(3, 4, 5).astype('float32') + ... }, + ... fetch_list=loss.name) + ... scheduler.step() # You should update learning rate each step """ def __init__( From d23b226e7b70bc92d0b5b601fa2bc476b221b1ca Mon Sep 17 00:00:00 2001 From: PommesPeter <434596665@qq.com> Date: Fri, 11 Aug 2023 10:07:39 +0800 Subject: [PATCH 3/5] fix: updated code style --- python/paddle/optimizer/lr.py | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 360167ee8cb46..ef70e8ff6aff1 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -80,15 +80,15 @@ class LRScheduler: ... type(step_size)) ... if gamma >= 1.0: ... raise ValueError('gamma should be < 1.0.') - ... + ... ... self.step_size = step_size ... self.gamma = gamma ... super().__init__(learning_rate, last_epoch, verbose) - ... + ... ... def get_lr(self): ... i = self.last_epoch // self.step_size ... return self.base_lr * (self.gamma**i) - ... + ... """ def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False): @@ -286,7 +286,7 @@ class NoamDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -304,7 +304,7 @@ class NoamDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> np.random.seed(2023) @@ -319,7 +319,7 @@ class NoamDecay(LRScheduler): ... fetch_list=loss.name) ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... """ def __init__( @@ -417,7 +417,7 @@ class PiecewiseDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -494,7 +494,7 @@ class NaturalExpDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -512,7 +512,7 @@ class NaturalExpDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> np.random.seed(2023) @@ -584,7 +584,7 @@ class InverseTimeDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -602,7 +602,7 @@ class InverseTimeDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -616,7 +616,7 @@ class InverseTimeDecay(LRScheduler): ... fetch_list=loss.name) ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -687,7 +687,7 @@ class PolynomialDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -705,7 +705,7 @@ class PolynomialDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -819,7 +819,7 @@ class LinearWarmup(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -838,7 +838,7 @@ class LinearWarmup(LRScheduler): ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -958,7 +958,7 @@ class ExponentialDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -976,7 +976,7 @@ class ExponentialDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1056,7 +1056,7 @@ class MultiStepDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -1074,7 +1074,7 @@ class MultiStepDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1170,7 +1170,7 @@ class StepDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -1188,7 +1188,7 @@ class StepDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1274,7 +1274,7 @@ class LambdaDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -1292,7 +1292,7 @@ class LambdaDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1306,7 +1306,7 @@ class LambdaDecay(LRScheduler): ... fetch_list=loss.name) ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1382,7 +1382,7 @@ class ReduceOnPlateau(LRScheduler): ... sgd.clear_gradients() ... scheduler.step(loss) # If you update learning rate each step ... # scheduler.step(loss) # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -1400,7 +1400,7 @@ class ReduceOnPlateau(LRScheduler): ... scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1414,7 +1414,7 @@ class ReduceOnPlateau(LRScheduler): ... fetch_list=loss.name) ... scheduler.step(out[0]) # If you update learning rate each step ... # scheduler.step(out[0]) # If you update learning rate each epoch - ... + ... """ def __init__( @@ -1613,7 +1613,7 @@ class CosineAnnealingDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... .. code-block:: python :name: code-example2 @@ -1631,7 +1631,7 @@ class CosineAnnealingDecay(LRScheduler): ... scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(20): @@ -1735,7 +1735,7 @@ class MultiplicativeDecay(LRScheduler): ... sgd.clear_gradients() ... scheduler.step() # If you update learning rate each step ... # scheduler.step() # If you update learning rate each epoch - ... + ... """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1817,7 +1817,7 @@ class OneCycleLR(LRScheduler): ... sgd.step() ... sgd.clear_gradients() ... scheduler.step() # You should update learning rate each step - ... + ... .. code-block:: python :name: code-example2 @@ -1835,7 +1835,7 @@ class OneCycleLR(LRScheduler): ... scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(5): @@ -1848,7 +1848,7 @@ class OneCycleLR(LRScheduler): ... }, ... fetch_list=loss.name) ... scheduler.step() # You should update learning rate each step - ... + ... """ def __init__( @@ -2067,7 +2067,7 @@ class CyclicLR(LRScheduler): ... sgd.step() ... sgd.clear_gradients() ... scheduler.step() # You should update learning rate each step - ... + ... .. code-block:: python :name: code-example2 @@ -2086,7 +2086,7 @@ class CyclicLR(LRScheduler): ... max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) ... sgd.minimize(loss) - ... + ... >>> exe = paddle.static.Executor() >>> exe.run(start_prog) >>> for epoch in range(5): From 70be8c87d3cdf1e8b9b90472afa8a0baeda5d04c Mon Sep 17 00:00:00 2001 From: PommesPeter <434596665@qq.com> Date: Mon, 14 Aug 2023 19:58:01 +0800 Subject: [PATCH 4/5] fix: removed extra changes --- python/paddle/optimizer/lr.py | 1447 ++++++++++++++++----------------- 1 file changed, 721 insertions(+), 726 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index ef70e8ff6aff1..e628509e52afc 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -64,31 +64,31 @@ class LRScheduler: .. code-block:: python - >>> import paddle - >>> from paddle.optimizer.lr import LRScheduler - - >>> class StepDecay(LRScheduler): - ... def __init__(self, - ... learning_rate, - ... step_size, - ... gamma=0.1, - ... last_epoch=-1, - ... verbose=False): - ... if not isinstance(step_size, int): - ... raise TypeError( - ... "The type of 'step_size' must be 'int', but received %s." % - ... type(step_size)) - ... if gamma >= 1.0: - ... raise ValueError('gamma should be < 1.0.') - ... - ... self.step_size = step_size - ... self.gamma = gamma - ... super().__init__(learning_rate, last_epoch, verbose) - ... - ... def get_lr(self): - ... i = self.last_epoch // self.step_size - ... return self.base_lr * (self.gamma**i) - ... + import paddle + from paddle.optimizer.lr import LRScheduler + + class StepDecay(LRScheduler): + def __init__(self, + learning_rate, + step_size, + gamma=0.1, + last_epoch=-1, + verbose=False): + if not isinstance(step_size, int): + raise TypeError( + "The type of 'step_size' must be 'int', but received %s." % + type(step_size)) + if gamma >= 1.0: + raise ValueError('gamma should be < 1.0.') + + self.step_size = step_size + self.gamma = gamma + super().__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + i = self.last_epoch // self.step_size + return self.base_lr * (self.gamma**i) + """ def __init__(self, learning_rate=0.1, last_epoch=-1, verbose=False): @@ -128,29 +128,29 @@ def step(self, epoch=None): Examples: .. code-block:: python - >>> import paddle - >>> value = paddle.arange(26, dtype='float32') - >>> a = paddle.reshape(value, [2, 13]) - >>> linear = paddle.nn.Linear(13, 5) - >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, - ... parameters = linear.parameters()) - >>> out = linear(a) - >>> out.backward() - >>> adadelta.step() - >>> adadelta.clear_grad() + import paddle + value = paddle.arange(26, dtype='float32') + a = paddle.reshape(value, [2, 13]) + linear = paddle.nn.Linear(13, 5) + adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adadelta.step() + adadelta.clear_grad() Examples: .. code-block:: python - >>> import paddle - >>> value = paddle.arange(26, dtype='float32') - >>> a = paddle.reshape(value, [2, 13]) - >>> linear = paddle.nn.Linear(13, 5) - >>> adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, - ... parameters = linear.parameters()) - >>> out = linear(a) - >>> out.backward() - >>> adadelta.step() - >>> adadelta.clear_grad() + import paddle + value = paddle.arange(26, dtype='float32') + a = paddle.reshape(value, [2, 13]) + linear = paddle.nn.Linear(13, 5) + adadelta = paddle.optimizer.Adadelta(learning_rate=0.0003, epsilon=1e-06, rho=0.95, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adadelta.step() + adadelta.clear_grad() """ if epoch is None: self.last_epoch += 1 @@ -267,59 +267,57 @@ class NoamDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> paddle.seed(2023) - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> np.random.seed(2023) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + """ def __init__( @@ -379,58 +377,56 @@ class PiecewiseDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> paddle.seed(2023) - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, boundaries, values, last_epoch=-1, verbose=False): @@ -478,55 +474,54 @@ class NaturalExpDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> np.random.seed(2023) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -566,57 +561,57 @@ class InverseTimeDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -669,56 +664,56 @@ class PolynomialDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -800,58 +795,58 @@ class LinearWarmup(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.LinearWarmup( - ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.LinearWarmup( + learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.LinearWarmup( - ... learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.LinearWarmup( + learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -940,56 +935,56 @@ class ExponentialDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -1038,56 +1033,56 @@ class MultiStepDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1152,56 +1147,56 @@ class StepDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1256,57 +1251,57 @@ class LambdaDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1364,57 +1359,57 @@ class ReduceOnPlateau(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step(loss) # If you update learning rate each step - ... # scheduler.step(loss) # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step(loss) # If you update learning rate each step + # scheduler.step(loss) # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step(out[0]) # If you update learning rate each step - ... # scheduler.step(out[0]) # If you update learning rate each epoch - ... + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step(out[0]) # If you update learning rate each step + # scheduler.step(out[0]) # If you update learning rate each epoch + """ def __init__( @@ -1595,56 +1590,56 @@ class CosineAnnealingDecay(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(20): + for batch_id in range(5): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__( @@ -1719,23 +1714,23 @@ class MultiplicativeDecay(LRScheduler): .. code-block:: python - >>> import paddle - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(20): - ... for batch_id in range(5): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # If you update learning rate each step - ... # scheduler.step() # If you update learning rate each epoch - ... + import paddle + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.MultiplicativeDecay(learning_rate=0.5, lr_lambda=lambda x:0.95, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(20): + for batch_id in range(5): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch + """ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): @@ -1800,55 +1795,55 @@ class OneCycleLR(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(5): - ... for batch_id in range(20): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # You should update learning rate each step - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(5): + for batch_id in range(20): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # You should update learning rate each step + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(5): - ... for batch_id in range(20): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # You should update learning rate each step - ... + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(5): + for batch_id in range(20): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # You should update learning rate each step + """ def __init__( @@ -2050,55 +2045,55 @@ class CyclicLR(LRScheduler): .. code-block:: python :name: code-example1 - >>> # Example1: train on default dynamic graph mode - >>> import paddle - >>> import numpy as np - - >>> # train on default dynamic graph mode - >>> linear = paddle.nn.Linear(10, 10) - >>> scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) - >>> sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) - >>> for epoch in range(5): - ... for batch_id in range(20): - ... x = paddle.uniform([10, 10]) - ... out = linear(x) - ... loss = paddle.mean(out) - ... loss.backward() - ... sgd.step() - ... sgd.clear_gradients() - ... scheduler.step() # You should update learning rate each step - ... + # Example1: train on default dynamic graph mode + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) + for epoch in range(5): + for batch_id in range(20): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # You should update learning rate each step + .. code-block:: python :name: code-example2 - >>> # Example2: train on static graph mode - >>> import paddle - >>> import numpy as np - >>> paddle.enable_static() - >>> main_prog = paddle.static.Program() - >>> start_prog = paddle.static.Program() - >>> with paddle.static.program_guard(main_prog, start_prog): - ... x = paddle.static.data(name='x', shape=[None, 4, 5]) - ... y = paddle.static.data(name='y', shape=[None, 4, 5]) - ... z = paddle.static.nn.fc(x, 100) - ... loss = paddle.mean(z) - ... scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, - ... max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) - ... sgd = paddle.optimizer.SGD(learning_rate=scheduler) - ... sgd.minimize(loss) - ... - >>> exe = paddle.static.Executor() - >>> exe.run(start_prog) - >>> for epoch in range(5): - ... for batch_id in range(20): - ... out = exe.run( - ... main_prog, - ... feed={ - ... 'x': np.random.randn(3, 4, 5).astype('float32'), - ... 'y': np.random.randn(3, 4, 5).astype('float32') - ... }, - ... fetch_list=loss.name) - ... scheduler.step() # You should update learning rate each step + # Example2: train on static graph mode + import paddle + import numpy as np + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, + max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(5): + for batch_id in range(20): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # You should update learning rate each step """ def __init__( From 69cf041859497428d825dba360caef4ba56f0ad8 Mon Sep 17 00:00:00 2001 From: PommesPeter <434596665@qq.com> Date: Mon, 14 Aug 2023 23:53:12 +0800 Subject: [PATCH 5/5] fix: refine detail --- python/paddle/optimizer/adadelta.py | 1 - python/paddle/optimizer/adagrad.py | 1 - python/paddle/optimizer/adam.py | 3 +-- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index de2d3e3e1d9eb..e4826997761c1 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -71,7 +71,6 @@ class Adadelta(Optimizer): .. code-block:: python >>> import paddle - >>> paddle.seed(2023) >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) >>> linear = paddle.nn.Linear(10, 10) diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index 89e4567d5f6c0..3373866ba048a 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -71,7 +71,6 @@ class Adagrad(Optimizer): .. code-block:: python >>> import paddle - >>> paddle.seed(2023) >>> inp = paddle.rand(shape=[10, 10]) >>> linear = paddle.nn.Linear(10, 10) diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 9a71e917e737d..f58b82a13188c 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -99,7 +99,6 @@ class Adam(Optimizer): :name: code-example1 >>> import paddle - >>> paddle.seed(2023) >>> linear = paddle.nn.Linear(10, 10) >>> inp = paddle.rand([10,10], dtype="float32") @@ -116,7 +115,6 @@ class Adam(Optimizer): >>> # Adam with beta1/beta2 as Tensor and weight_decay as float >>> import paddle - >>> paddle.seed(2023) >>> linear = paddle.nn.Linear(10, 10) >>> inp = paddle.rand([10,10], dtype="float32") @@ -132,6 +130,7 @@ class Adam(Optimizer): >>> loss.backward() >>> adam.step() >>> adam.clear_grad() + >>> # Note that the learning_rate of linear_2 is 0.01. >>> linear_1 = paddle.nn.Linear(10, 10) >>> linear_2 = paddle.nn.Linear(10, 10)