Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2.0API]support 2.0 lr_scheduler for 2.0 optimizer #26737

Merged
merged 6 commits into from
Aug 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions python/paddle/fluid/tests/unittests/test_adam_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,8 +456,9 @@ def test_adam_op_with_state_dict(self):
state_dict = adam.state_dict()
adam.set_state_dict(state_dict)

#learning_rate is Decay
learning_rate = fluid.dygraph.CosineDecay(0.1, 10000, 120)
#learning_rate is _LRScheduler
learning_rate = paddle.optimizer.CosineAnnealingLR(
learning_rate=0.1, T_max=10)
adam = paddle.optimizer.Adam(
learning_rate=learning_rate,
weight_decay=fluid.regularizer.L2Decay(0.001),
Expand Down Expand Up @@ -498,15 +499,10 @@ def test_adam_op_with_set_lr(self):
adam.set_lr(lr)
cur_lr = adam.get_lr()
assert (lr == cur_lr)

lr_var = paddle.create_global_var(shape=[1], value=lr, dtype='float32')
adam.set_lr(lr_var)
cur_lr = adam.get_lr()
assert (np.float32(lr) == cur_lr)

with self.assertRaises(TypeError):
lr = int(1)
adam.set_lr(lr)
lr_var = paddle.create_global_var(
shape=[1], value=lr, dtype='float32')
adam.set_lr(lr_var)


if __name__ == "__main__":
Expand Down
42 changes: 17 additions & 25 deletions python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,15 +200,15 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
bd = [3, 6, 9]
optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
learning_rate=paddle.optimizer.PiecewiseLR(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
parameter_list=parameter_list)
return optimizer

def get_optimizer(self):
bd = [3, 6, 9]
optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
optimizer = SGDOptimizer(learning_rate=paddle.optimizer.PiecewiseLR(
boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return optimizer

Expand Down Expand Up @@ -381,9 +381,9 @@ def test_lr_decay(self):
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]

scheduler = paddle.optimizer.PiecewiseLR(bd, value)
adam = paddle.optimizer.Adam(
fluid.dygraph.PiecewiseDecay(bd, value, 0),
parameters=linear.parameters())
scheduler, parameters=linear.parameters())

self.assertTrue(
np.allclose(
Expand All @@ -393,8 +393,8 @@ def test_lr_decay(self):
for i in range(12):
adam.minimize(loss)
lr = adam.get_lr()

self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
scheduler.step()

def test_lr_decay_natural_exp(self):
with fluid.dygraph.guard():
Expand All @@ -409,24 +409,21 @@ def test_lr_decay_natural_exp(self):
loss = fluid.layers.reduce_mean(b)
base_lr = 1.0

scheduler = paddle.optimizer.NaturalExpLR(1.0, gamma=0.5)
print("scheduler.last_lr", scheduler.last_lr)
adam = paddle.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=3,
decay_rate=0.5,
staircase=True),
parameters=linear.parameters())
scheduler, parameters=linear.parameters())

self.assertTrue(
np.allclose(
adam.get_lr(), 1.0, rtol=1e-06, atol=0.0))

ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
for i in range(5):
ret = [1.0, np.exp(-0.5), np.exp(-1)]
for i in range(3):
adam.minimize(loss)
lr = adam.get_lr()

self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
scheduler.step()

def test_set_lr(self):
with fluid.dygraph.guard():
Expand All @@ -451,20 +448,15 @@ def test_set_lr(self):
np.allclose(
lr, lr_list[i], rtol=1e-06, atol=0.0))

lr_var = fluid.layers.create_global_var(
shape=[1], value=0.7, dtype='float32')
adam.set_lr(lr_var)
adam.minimize(loss)
lr = adam.get_lr()
self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0))
with self.assertRaises(TypeError):
lr_var = fluid.layers.create_global_var(
shape=[1], value=0.7, dtype='float32')
adam.set_lr(lr_var)

with self.assertRaises(RuntimeError):
adam = paddle.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=0.1,
decay_steps=3,
decay_rate=0.5,
staircase=True),
paddle.optimizer.NaturalExpLR(
learning_rate=0.1, gamma=0.5),
parameters=linear.parameters())
adam.set_lr(0.01)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ def testLoadAndSetVarBase(self):
adam._learning_rate.step_num = 0

para_state_dict, opti_state_dict = paddle.load("./test_dy")
print(opti_state_dict['LR_Scheduler'])
adam.set_dict(opti_state_dict)

opti_dict = adam.state_dict()
Expand Down
51 changes: 28 additions & 23 deletions python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,10 @@ def setUp(self):

place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
Expand All @@ -268,7 +268,9 @@ def setUp(self):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()

if i == batch_num - 1:
for param in ptb_model.parameters():
dy_param_updated[param.name] = param.numpy()
Expand All @@ -283,7 +285,7 @@ def setUp(self):
else:
self.base_opti[k] = v

fluid.save_dygraph(self.opti_dict, "./test_dy")
fluid.save_dygraph(self.opti_dict, "./test_dy_v2")

self.state_dict = ptb_model.state_dict()

Expand All @@ -292,7 +294,7 @@ def setUp(self):
np_t = v.numpy()
self.model_base[k] = np_t

paddle.save(self.state_dict, "./test_dy")
paddle.save(self.state_dict, "./test_dy_v2")

def testLoadAndSetVarBase(self):
seed = 90
Expand Down Expand Up @@ -325,10 +327,10 @@ def testLoadAndSetVarBase(self):

place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
Expand All @@ -354,6 +356,7 @@ def testLoadAndSetVarBase(self):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
Expand All @@ -370,10 +373,7 @@ def testLoadAndSetVarBase(self):

self.assertTrue(np.sum(np.abs(v.numpy())) == 0)

if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0

para_state_dict, opti_state_dict = paddle.load("./test_dy")
para_state_dict, opti_state_dict = paddle.load("./test_dy_v2")
adam.set_state_dict(opti_state_dict)

opti_dict = adam.state_dict()
Expand Down Expand Up @@ -434,10 +434,10 @@ def testSetVariable(self):

place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
Expand All @@ -463,6 +463,7 @@ def testSetVariable(self):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
Expand Down Expand Up @@ -541,10 +542,10 @@ def testSetNumpy(self):

place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
Expand All @@ -570,6 +571,7 @@ def testSetNumpy(self):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
Expand Down Expand Up @@ -745,7 +747,7 @@ def testLoadAndSetVarBaseBeforeTrain(self):
last_hidden = None
last_cell = None

state_dict, opti_dict = fluid.load_dygraph("./test_dy")
state_dict, opti_dict = fluid.load_dygraph("./test_dy_v2")
adam.set_state_dict(opti_dict)
ptb_model.set_dict(state_dict)

Expand Down Expand Up @@ -825,9 +827,10 @@ def testSetNumpyBeforeTrain(self):

place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
learning_rate=scheduler,
beta1=0.8,
beta2=0.6,
parameters=ptb_model.parameters())
Expand Down Expand Up @@ -867,14 +870,16 @@ def testSetNumpyBeforeTrain(self):
init_cell)

dy_loss.backward()
scheduler.step()
adam.minimize(dy_loss)
ptb_model.clear_gradients()

opti_dict = adam.state_dict()
for k, v in opti_dict.items():
if k == "global_step":
if k == "LR_Scheduler":
self.assertTrue(
np.array_equal(v.numpy(), self.base_opti[v.name] + 1))
np.array_equal(v['last_epoch'], self.base_opti[k][
'last_epoch'] + 1))

if k.find("beta1_pow_acc_0") > 0:
self.assertTrue(
Expand Down
Loading