update api && add ut

PaddlePaddle · Aug 21, 2020 · 6cc0fc2 · 6cc0fc2
1 parent b00b85f
commit 6cc0fc2
Show file tree

Hide file tree

Showing 8 changed files with 894 additions and 25 deletions.
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -458,15 +458,24 @@ def test_adam_op_with_state_dict(self):
         adam.set_state_dict(state_dict)
 
         #learning_rate is Decay
-        from paddle.fluid.regularizer import L2Decay
+        learning_rate = fluid.dygraph.CosineDecay(0.1, 10000, 120)
         adam = paddle.optimizer.Adam(
-            learning_rate=0.01,
-            weight_decay=L2Decay(0.001),
+            learning_rate=learning_rate,
+            weight_decay=fluid.regularizer.L2Decay(0.001),
             parameters=emb.parameters())
 
         state_dict = adam.state_dict()
         adam.set_state_dict(state_dict)
 
+        #leanrning_rate is Tensor
+        learning_rate = np.array([0.01]).astype("float32")
+        learning_rate = paddle.to_tensor(learning_rate)
+        adam = paddle.optimizer.Adam(
+            learning_rate=learning_rate, parameters=emb.parameters())
+
+        state_dict = adam.state_dict()
+        adam.set_state_dict(state_dict)
+
         params = adam.get_opti_var_name_list()
         assert (params is not None)
 
@@ -478,12 +487,12 @@ def test_adam_op_with_set_lr(self):
 
         lr = 0.01
         adam.set_lr(lr)
-        cur_lr = adam.current_step_lr()
+        cur_lr = adam.get_lr()
         assert (lr == cur_lr)
 
         lr_var = paddle.create_global_var(shape=[1], value=lr, dtype='float32')
         adam.set_lr(lr_var)
-        cur_lr = adam.current_step_lr()
+        cur_lr = adam.get_lr()
         assert (np.float32(lr) == cur_lr)
 
 

diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py
@@ -19,7 +19,7 @@
 
 
 class TestAdamWOp(unittest.TestCase):
-    def test_adamw_opi_dygraph(self):
+    def test_adamw_op_dygraph(self):
         paddle.disable_static()
         value = np.arange(26).reshape(2, 13).astype("float32")
         a = paddle.to_variable(value)
@@ -34,6 +34,18 @@ def test_adamw_opi_dygraph(self):
         adam.step()
         adam.clear_gradients()
 
+    def test_adamw_op_coverage(self):
+        paddle.disable_static()
+        value = np.arange(26).reshape(2, 13).astype("float32")
+        a = paddle.to_variable(value)
+        linear = paddle.nn.Linear(13, 5, dtype="float32")
+        adam = paddle.optimizer.AdamW(
+            learning_rate=0.0,
+            parameters=linear.parameters(),
+            apply_decay_param_fun=lambda name: True,
+            weight_decay=0.01)
+        assert (adam.__str__ is not None)
+
     def test_adamw_op(self):
         place = fluid.CPUPlace()
         shape = [2, 3, 8, 8]

diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py
@@ -111,7 +111,126 @@ def node_func():
             strategy = paddle.distributed.fleet.DistributedStrategy()
             strategy.nccl_comm_num = 2
             strategy.sync_nccl_allreduce = True
-            optimizer = paddle.fluidoptimizer.SGD(learning_rate=0.01)
+            optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01)
+            optimizer = fleet.distributed_optimizer(
+                optimizer, strategy=strategy)
+            optimizer.minimize(avg_cost)
+            exe = paddle.fluid.Executor(place=paddle.fluid.CPUPlace())
+            exe.run(paddle.fluid.default_startup_program())
+
+            import numpy as np
+
+            def gen_data():
+                return {
+                    "x": np.random.random(size=(128, 32)).astype('float32'),
+                    "y": np.random.randint(
+                        2, size=(128, 1)).astype('int64')
+                }
+
+            for i in range(10):
+                cost_val = exe.run(feed=gen_data(), fetch_list=[avg_cost.name])
+                print("cost of step[{}] = {}".format(i, cost_val))
+
+        proc_a = launch_func(node_func, node_a)
+        proc_a.start()
+        proc_b = launch_func(node_func, node_b)
+        proc_b.start()
+        proc_a.join()
+        proc_b.join()
+
+    def test_graph_execution_optimizer_not_apply_v2(self):
+        node_a = {
+            "PADDLE_TRAINER_ID": "0",
+            "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36003",
+            "PADDLE_TRAINERS_NUM": "2",
+            "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36003,127.0.0.1:36004",
+            "http_proxy": "",
+            "https_proxy": ""
+        }
+
+        node_b = {
+            "PADDLE_TRAINER_ID": "1",
+            "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36004",
+            "PADDLE_TRAINERS_NUM": "2",
+            "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36003,127.0.0.1:36004",
+            "http_proxy": "",
+            "https_proxy": ""
+        }
+
+        def node_func():
+            import paddle.distributed.fleet as fleet
+            import paddle.fluid.incubate.fleet.base.role_maker as role_maker
+            role = role_maker.PaddleCloudRoleMaker(is_collective=True)
+            fleet.init(role)
+            input_x = paddle.fluid.layers.data(
+                name="x", shape=[32], dtype='float32')
+            input_y = paddle.fluid.layers.data(
+                name="y", shape=[1], dtype='int64')
+
+            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
+            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
+            prediction = paddle.fluid.layers.fc(input=[fc_2],
+                                                size=2,
+                                                act='softmax')
+            cost = paddle.fluid.layers.cross_entropy(
+                input=prediction, label=input_y)
+            avg_cost = paddle.fluid.layers.mean(x=cost)
+
+            strategy = paddle.distributed.fleet.DistributedStrategy()
+            optimizer = paddle.optimizer.SGD(learning_rate=0.01)
+            optimizer = fleet.distributed_optimizer(
+                optimizer, strategy=strategy)
+            optimizer.minimize(avg_cost)
+
+        proc_a = launch_func(node_func, node_a)
+        proc_a.start()
+        proc_b = launch_func(node_func, node_b)
+        proc_b.start()
+        proc_a.join()
+        proc_b.join()
+
+    def test_graph_execution_optimizer(self):
+        node_a = {
+            "PADDLE_TRAINER_ID": "0",
+            "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36001",
+            "PADDLE_TRAINERS_NUM": "2",
+            "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36001,127.0.0.1:36002",
+            "http_proxy": "",
+            "https_proxy": ""
+        }
+
+        node_b = {
+            "PADDLE_TRAINER_ID": "1",
+            "PADDLE_CURRENT_ENDPOINT": "127.0.0.1:36002",
+            "PADDLE_TRAINERS_NUM": "2",
+            "PADDLE_TRAINER_ENDPOINTS": "127.0.0.1:36001,127.0.0.1:36002",
+            "http_proxy": "",
+            "https_proxy": ""
+        }
+
+        def node_func():
+            import paddle.distributed.fleet as fleet
+            import paddle.fluid.incubate.fleet.base.role_maker as role_maker
+            role = role_maker.PaddleCloudRoleMaker(is_collective=True)
+            fleet.init(role)
+            input_x = paddle.fluid.layers.data(
+                name="x", shape=[32], dtype='float32')
+            input_y = paddle.fluid.layers.data(
+                name="y", shape=[1], dtype='int64')
+
+            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
+            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
+            prediction = paddle.fluid.layers.fc(input=[fc_2],
+                                                size=2,
+                                                act='softmax')
+            cost = paddle.fluid.layers.cross_entropy(
+                input=prediction, label=input_y)
+            avg_cost = paddle.fluid.layers.mean(x=cost)
+
+            strategy = paddle.distributed.fleet.DistributedStrategy()
+            strategy.nccl_comm_num = 2
+            strategy.sync_nccl_allreduce = True
+            optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01)
             optimizer = fleet.distributed_optimizer(
                 optimizer, strategy=strategy)
             optimizer.minimize(avg_cost)