Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HotFix] Add support for optimizer with varbase input #32362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions python/paddle/fluid/regularizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ def _create_regularization_of_grad(param, grad, regularization=None):
Function helper of append_regularization_ops.
"""
# If no gradient or no regularization is specified, then we don't need to do anything
if grad is None or (param.regularizer is None and regularization is None):
if grad is None or ((not hasattr(param, 'regularizer') or (
hasattr(param, 'regularizer') and param.regularizer is None)) and
regularization is None):
return grad
regularization_term = None
if param.regularizer is not None:
if hasattr(param, 'regularizer') and param.regularizer is not None:
# Add variable for regularization term in grad block
regularization_term = param.regularizer(param, grad, grad.block)
elif regularization is not None:
Expand Down Expand Up @@ -213,7 +215,7 @@ def __call__(self, param, grad, block):
Returns:
new variable for weight decay
"""
assert isinstance(param, framework.Parameter)
assert isinstance(param, framework.Variable)
assert isinstance(block, framework.Block)

inputs = {"X": [param]}
Expand Down Expand Up @@ -320,7 +322,7 @@ def __call__(self, param, grad, block):
Returns:
new variable for weight decay
"""
assert isinstance(param, framework.Parameter)
assert isinstance(param, framework.Variable)
assert isinstance(block, framework.Block)

if framework.in_dygraph_mode():
Expand Down
122 changes: 122 additions & 0 deletions python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import numpy as np
import unittest

import paddle
import paddle.optimizer as optimizer


class TestOptimizerForVarBase(unittest.TestCase):
def setUp(self):
self.lr = 0.01

def run_optimizer_step_with_varbase_list_input(self, optimizer):
x = paddle.zeros([2, 3])
y = paddle.ones([2, 3])
x.stop_gradient = False

z = x + y

opt = optimizer(
learning_rate=self.lr, parameters=[x], weight_decay=0.01)

z.backward()
opt.step()

self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))

def run_optimizer_minimize_with_varbase_list_input(self, optimizer):
x = paddle.zeros([2, 3])
y = paddle.ones([2, 3])
x.stop_gradient = False

z = x + y

opt = optimizer(learning_rate=self.lr, parameters=[x])

z.backward()
opt.minimize(z)

self.assertTrue(np.allclose(x.numpy(), np.full([2, 3], -self.lr)))

def test_adam_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adam)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adam)

def test_sgd_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.SGD)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.SGD)

def test_adagrad_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adagrad)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adagrad)

def test_adamw_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.AdamW)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.AdamW)

def test_adamax_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Adamax)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Adamax)

def test_momentum_with_varbase_list_input(self):
self.run_optimizer_step_with_varbase_list_input(optimizer.Momentum)
self.run_optimizer_minimize_with_varbase_list_input(optimizer.Momentum)

def test_optimizer_with_varbase_input(self):
x = paddle.zeros([2, 3])
with self.assertRaises(TypeError):
optimizer.Adam(learning_rate=self.lr, parameters=x)

def test_create_param_lr_with_1_for_coverage(self):
x = paddle.fluid.framework.ParamBase(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="x",
optimize_attr={'learning_rate': 1.0})
x.value().get_tensor().set(
np.random.random((5, 10)).astype('float32'),
paddle.fluid.framework._current_expected_place())

y = paddle.ones([5, 10])
z = x + y
opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
z.backward()
opt.step()

def test_create_param_lr_with_no_1_value_for_coverage(self):
x = paddle.fluid.framework.ParamBase(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="x",
optimize_attr={'learning_rate': 0.12})
x.value().get_tensor().set(
np.random.random((5, 10)).astype('float32'),
paddle.fluid.framework._current_expected_place())

y = paddle.ones([5, 10])
z = x + y
opt = optimizer.Adam(learning_rate=self.lr, parameters=[x])
z.backward()
opt.step()


if __name__ == "__main__":
unittest.main()
41 changes: 28 additions & 13 deletions python/paddle/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,19 @@ def __init__(self,
weight_decay=None,
grad_clip=None,
name=None):
self._parameter_list = list(
parameters) if parameters is not None else None
if parameters is not None:
# paddle.Tensor is also iterable, so here we don't check whether
# the input is iterable, if the input is paddle.Tensor, the
# list(paddle.Tensor) will be a error value
if isinstance(parameters, paddle.Tensor):
raise TypeError(
"`parameters` argument given to the optimizer should be "
"an iterable of paddle Tensors, but got argument type is `{}`.".
format(type(parameters)))
self._parameter_list = list(parameters)
else:
self._parameter_list = None

self._name = name
if framework.in_dygraph_mode():
if self._parameter_list is None:
Expand All @@ -110,7 +121,8 @@ def __init__(self,
)
if weight_decay is not None:
for param in self._parameter_list:
if param.regularizer is not None:
if hasattr(param,
'regularizer') and param.regularizer is not None:
logging.info(
"If regularizer of a Parameter has been set by 'paddle.ParamAttr' or 'static.WeightNormParamAttr' already. "
"The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
Expand Down Expand Up @@ -433,17 +445,20 @@ def _append_optimize_op(self, block, param_and_grad):
def _create_param_lr(self, param_and_grad):
# create learning rate tensor for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
if type(param_lr) == Variable:
return param_lr
else:
if param_lr == 1.0:
return self._global_learning_rate()
if hasattr(param, 'optimize_attr'):
param_lr = param.optimize_attr['learning_rate']
if type(param_lr) == Variable:
return param_lr
else:
with default_main_program()._lr_schedule_guard(
is_with_opt=True), framework.name_scope(
'scale_with_param_lr'):
return self._global_learning_rate() * param_lr
if param_lr == 1.0:
return self._global_learning_rate()
else:
with default_main_program()._lr_schedule_guard(
is_with_opt=True), framework.name_scope(
'scale_with_param_lr'):
return self._global_learning_rate() * param_lr
else:
return self._global_learning_rate()

def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters
Expand Down