From ebe3c3bcae677dbe8edd0b0f0c92e1b5259ab6d5 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Tue, 24 Oct 2017 14:24:09 -0700 Subject: [PATCH 1/2] Adding nesterov momentum to python momentum wrapper --- python/paddle/v2/framework/optimizer.py | 6 ++- .../v2/framework/tests/test_optimizer.py | 38 ++++++++++++++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index f7d35ca06586a2..e908d9eb13488a 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -248,13 +248,14 @@ class MomentumOptimizer(Optimizer): """ _velocity_acc_str = "velocity" - def __init__(self, learning_rate, momentum): + def __init__(self, learning_rate, momentum, use_nesterov=False): assert learning_rate is not None assert momentum is not None super(MomentumOptimizer, self).__init__() self.type = "momentum" self._learning_rate = learning_rate self._momentum = momentum + self._use_nesterov = bool(use_nesterov) def _initialize_tensors(self, block): assert isinstance(block, framework.Block) @@ -296,7 +297,8 @@ def _append_optimize_op(self, block, param_and_grad): "ParamOut": param_and_grad[0], "VelocityOut": velocity_acc }, - attrs={"mu": self._momentum}) + attrs={"mu": self._momentum, + "useNesterov": self._use_nesterov}) return momentum_op diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 4b267598efb84a..36752e08998a34 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -35,7 +35,7 @@ def get_accumulators(self): def get_velocity_str(self): return self._velocity_acc_str - def test_momentum_optimizer(self): + def test_vanilla_momentum_optimizer(self): program = framework.Program() block = program.global_block() mul_x = block.create_parameter( @@ -59,6 +59,42 @@ def test_momentum_optimizer(self): self.assertEqual(len(opts), 1) sgd_op = opts[0] self.assertEqual(sgd_op.type, "momentum") + self.assertFalse(sgd_op.attr('useNesterov')) + + # Check accumulators + accumulators = momentum_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 1) + self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators) + velocity_acc = accumulators[momentum_optimizer.get_velocity_str()] + self.assertEqual(len(velocity_acc), 1) + self.assertTrue(mul_x.name in velocity_acc) + + def test_nesterov_momentum_optimizer(self): + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + momentum_optimizer = self.MockMomentum( + learning_rate=0.01, momentum=0.2, use_nesterov=True) + params_grads = momentum_optimizer.create_backward_pass(mul_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) + opts = momentum_optimizer.create_optimization_pass(params_grads, + mul_out) + self.assertEqual(len(opts), 1) + sgd_op = opts[0] + self.assertEqual(sgd_op.type, "momentum") + self.assertTrue(sgd_op.attr('useNesterov')) # Check accumulators accumulators = momentum_optimizer.get_accumulators() From 309ddad6c6a803c42c93a79024f09038418d7521 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Tue, 24 Oct 2017 15:55:54 -0700 Subject: [PATCH 2/2] Fixing optimizer test after merge --- python/paddle/v2/framework/tests/test_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index eeb41a6ff2911f..d1527e70c0785e 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -87,7 +87,7 @@ def test_nesterov_momentum_optimizer(self): attrs={"x_num_col_dims": 1}) momentum_optimizer = self.MockMomentum( learning_rate=0.01, momentum=0.2, use_nesterov=True) - params_grads = momentum_optimizer.create_backward_pass(mul_out) + params_grads = append_backward_ops(mul_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass(params_grads,